code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable-msg=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable-msg=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 137     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable-msg=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, filter_, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 587   """Checks if the instances in a node group are still correct.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type group_uuid: string
 592   @param group_uuid: Node group UUID
 593   @type owned_instances: set or frozenset
 594   @param owned_instances: List of currently owned instances
 595
 596   """
 597   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 598   if owned_instances != wanted_instances:
 599     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 600                                " locks were acquired, wanted '%s', have '%s';"
 601                                " retry the operation" %
 602                                (group_uuid,
 603                                 utils.CommaJoin(wanted_instances),
 604                                 utils.CommaJoin(owned_instances)),
 605                                errors.ECODE_STATE)
 606
 607   return wanted_instances
 608
 609
 610 def _SupportsOob(cfg, node):
 611   """Tells if node supports OOB.
 612
 613   @type cfg: L{config.ConfigWriter}
 614   @param cfg: The cluster configuration
 615   @type node: L{objects.Node}
 616   @param node: The node
 617   @return: The OOB script if supported or an empty string otherwise
 618
 619   """
 620   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 621
 622
 623 def _GetWantedNodes(lu, nodes):
 624   """Returns list of checked and expanded node names.
 625
 626   @type lu: L{LogicalUnit}
 627   @param lu: the logical unit on whose behalf we execute
 628   @type nodes: list
 629   @param nodes: list of node names or None for all nodes
 630   @rtype: list
 631   @return: the list of nodes, sorted
 632   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 633
 634   """
 635   if nodes:
 636     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 637
 638   return utils.NiceSort(lu.cfg.GetNodeList())
 639
 640
 641 def _GetWantedInstances(lu, instances):
 642   """Returns list of checked and expanded instance names.
 643
 644   @type lu: L{LogicalUnit}
 645   @param lu: the logical unit on whose behalf we execute
 646   @type instances: list
 647   @param instances: list of instance names or None for all instances
 648   @rtype: list
 649   @return: the list of instances, sorted
 650   @raise errors.OpPrereqError: if the instances parameter is wrong type
 651   @raise errors.OpPrereqError: if any of the passed instances is not found
 652
 653   """
 654   if instances:
 655     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 656   else:
 657     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 658   return wanted
 659
 660
 661 def _GetUpdatedParams(old_params, update_dict,
 662                       use_default=True, use_none=False):
 663   """Return the new version of a parameter dictionary.
 664
 665   @type old_params: dict
 666   @param old_params: old parameters
 667   @type update_dict: dict
 668   @param update_dict: dict containing new parameter values, or
 669       constants.VALUE_DEFAULT to reset the parameter to its default
 670       value
 671   @param use_default: boolean
 672   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 673       values as 'to be deleted' values
 674   @param use_none: boolean
 675   @type use_none: whether to recognise C{None} values as 'to be
 676       deleted' values
 677   @rtype: dict
 678   @return: the new parameter dictionary
 679
 680   """
 681   params_copy = copy.deepcopy(old_params)
 682   for key, val in update_dict.iteritems():
 683     if ((use_default and val == constants.VALUE_DEFAULT) or
 684         (use_none and val is None)):
 685       try:
 686         del params_copy[key]
 687       except KeyError:
 688         pass
 689     else:
 690       params_copy[key] = val
 691   return params_copy
 692
 693
 694 def _ReleaseLocks(lu, level, names=None, keep=None):
 695   """Releases locks owned by an LU.
 696
 697   @type lu: L{LogicalUnit}
 698   @param level: Lock level
 699   @type names: list or None
 700   @param names: Names of locks to release
 701   @type keep: list or None
 702   @param keep: Names of locks to retain
 703
 704   """
 705   assert not (keep is not None and names is not None), \
 706          "Only one of the 'names' and the 'keep' parameters can be given"
 707
 708   if names is not None:
 709     should_release = names.__contains__
 710   elif keep:
 711     should_release = lambda name: name not in keep
 712   else:
 713     should_release = None
 714
 715   if should_release:
 716     retain = []
 717     release = []
 718
 719     # Determine which locks to release
 720     for name in lu.owned_locks(level):
 721       if should_release(name):
 722         release.append(name)
 723       else:
 724         retain.append(name)
 725
 726     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 727
 728     # Release just some locks
 729     lu.glm.release(level, names=release)
 730
 731     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 732   else:
 733     # Release everything
 734     lu.glm.release(level)
 735
 736     assert not lu.glm.is_owned(level), "No locks should be owned"
 737
 738
 739 def _MapInstanceDisksToNodes(instances):
 740   """Creates a map from (node, volume) to instance name.
 741
 742   @type instances: list of L{objects.Instance}
 743   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 744
 745   """
 746   return dict(((node, vol), inst.name)
 747               for inst in instances
 748               for (node, vols) in inst.MapLVsByNode().items()
 749               for vol in vols)
 750
 751
 752 def _RunPostHook(lu, node_name):
 753   """Runs the post-hook for an opcode on a single node.
 754
 755   """
 756   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 757   try:
 758     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 759   except:
 760     # pylint: disable-msg=W0702
 761     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 762
 763
 764 def _CheckOutputFields(static, dynamic, selected):
 765   """Checks whether all selected fields are valid.
 766
 767   @type static: L{utils.FieldSet}
 768   @param static: static fields set
 769   @type dynamic: L{utils.FieldSet}
 770   @param dynamic: dynamic fields set
 771
 772   """
 773   f = utils.FieldSet()
 774   f.Extend(static)
 775   f.Extend(dynamic)
 776
 777   delta = f.NonMatching(selected)
 778   if delta:
 779     raise errors.OpPrereqError("Unknown output fields selected: %s"
 780                                % ",".join(delta), errors.ECODE_INVAL)
 781
 782
 783 def _CheckGlobalHvParams(params):
 784   """Validates that given hypervisor params are not global ones.
 785
 786   This will ensure that instances don't get customised versions of
 787   global params.
 788
 789   """
 790   used_globals = constants.HVC_GLOBALS.intersection(params)
 791   if used_globals:
 792     msg = ("The following hypervisor parameters are global and cannot"
 793            " be customized at instance level, please modify them at"
 794            " cluster level: %s" % utils.CommaJoin(used_globals))
 795     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 796
 797
 798 def _CheckNodeOnline(lu, node, msg=None):
 799   """Ensure that a given node is online.
 800
 801   @param lu: the LU on behalf of which we make the check
 802   @param node: the node to check
 803   @param msg: if passed, should be a message to replace the default one
 804   @raise errors.OpPrereqError: if the node is offline
 805
 806   """
 807   if msg is None:
 808     msg = "Can't use offline node"
 809   if lu.cfg.GetNodeInfo(node).offline:
 810     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 811
 812
 813 def _CheckNodeNotDrained(lu, node):
 814   """Ensure that a given node is not drained.
 815
 816   @param lu: the LU on behalf of which we make the check
 817   @param node: the node to check
 818   @raise errors.OpPrereqError: if the node is drained
 819
 820   """
 821   if lu.cfg.GetNodeInfo(node).drained:
 822     raise errors.OpPrereqError("Can't use drained node %s" % node,
 823                                errors.ECODE_STATE)
 824
 825
 826 def _CheckNodeVmCapable(lu, node):
 827   """Ensure that a given node is vm capable.
 828
 829   @param lu: the LU on behalf of which we make the check
 830   @param node: the node to check
 831   @raise errors.OpPrereqError: if the node is not vm capable
 832
 833   """
 834   if not lu.cfg.GetNodeInfo(node).vm_capable:
 835     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 836                                errors.ECODE_STATE)
 837
 838
 839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 840   """Ensure that a node supports a given OS.
 841
 842   @param lu: the LU on behalf of which we make the check
 843   @param node: the node to check
 844   @param os_name: the OS to query about
 845   @param force_variant: whether to ignore variant errors
 846   @raise errors.OpPrereqError: if the node is not supporting the OS
 847
 848   """
 849   result = lu.rpc.call_os_get(node, os_name)
 850   result.Raise("OS '%s' not in supported OS list for node %s" %
 851                (os_name, node),
 852                prereq=True, ecode=errors.ECODE_INVAL)
 853   if not force_variant:
 854     _CheckOSVariant(result.payload, os_name)
 855
 856
 857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 858   """Ensure that a node has the given secondary ip.
 859
 860   @type lu: L{LogicalUnit}
 861   @param lu: the LU on behalf of which we make the check
 862   @type node: string
 863   @param node: the node to check
 864   @type secondary_ip: string
 865   @param secondary_ip: the ip to check
 866   @type prereq: boolean
 867   @param prereq: whether to throw a prerequisite or an execute error
 868   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 869   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 870
 871   """
 872   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 873   result.Raise("Failure checking secondary ip on node %s" % node,
 874                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 875   if not result.payload:
 876     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 877            " please fix and re-run this command" % secondary_ip)
 878     if prereq:
 879       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 880     else:
 881       raise errors.OpExecError(msg)
 882
 883
 884 def _GetClusterDomainSecret():
 885   """Reads the cluster domain secret.
 886
 887   """
 888   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 889                                strict=True)
 890
 891
 892 def _CheckInstanceDown(lu, instance, reason):
 893   """Ensure that an instance is not running."""
 894   if instance.admin_up:
 895     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 896                                (instance.name, reason), errors.ECODE_STATE)
 897
 898   pnode = instance.primary_node
 899   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 900   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 901               prereq=True, ecode=errors.ECODE_ENVIRON)
 902
 903   if instance.name in ins_l.payload:
 904     raise errors.OpPrereqError("Instance %s is running, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907
 908 def _ExpandItemName(fn, name, kind):
 909   """Expand an item name.
 910
 911   @param fn: the function to use for expansion
 912   @param name: requested item name
 913   @param kind: text description ('Node' or 'Instance')
 914   @return: the resolved (full) name
 915   @raise errors.OpPrereqError: if the item is not found
 916
 917   """
 918   full_name = fn(name)
 919   if full_name is None:
 920     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 921                                errors.ECODE_NOENT)
 922   return full_name
 923
 924
 925 def _ExpandNodeName(cfg, name):
 926   """Wrapper over L{_ExpandItemName} for nodes."""
 927   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 928
 929
 930 def _ExpandInstanceName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for instance."""
 932   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 933
 934
 935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 936                           memory, vcpus, nics, disk_template, disks,
 937                           bep, hvp, hypervisor_name, tags):
 938   """Builds instance related env variables for hooks
 939
 940   This builds the hook environment from individual variables.
 941
 942   @type name: string
 943   @param name: the name of the instance
 944   @type primary_node: string
 945   @param primary_node: the name of the instance's primary node
 946   @type secondary_nodes: list
 947   @param secondary_nodes: list of secondary nodes as strings
 948   @type os_type: string
 949   @param os_type: the name of the instance's OS
 950   @type status: boolean
 951   @param status: the should_run status of the instance
 952   @type memory: string
 953   @param memory: the memory size of the instance
 954   @type vcpus: string
 955   @param vcpus: the count of VCPUs the instance has
 956   @type nics: list
 957   @param nics: list of tuples (ip, mac, mode, link) representing
 958       the NICs the instance has
 959   @type disk_template: string
 960   @param disk_template: the disk template of the instance
 961   @type disks: list
 962   @param disks: the list of (size, mode) pairs
 963   @type bep: dict
 964   @param bep: the backend parameters for the instance
 965   @type hvp: dict
 966   @param hvp: the hypervisor parameters for the instance
 967   @type hypervisor_name: string
 968   @param hypervisor_name: the hypervisor for the instance
 969   @type tags: list
 970   @param tags: list of instance tags as strings
 971   @rtype: dict
 972   @return: the hook environment for this instance
 973
 974   """
 975   if status:
 976     str_status = "up"
 977   else:
 978     str_status = "down"
 979   env = {
 980     "OP_TARGET": name,
 981     "INSTANCE_NAME": name,
 982     "INSTANCE_PRIMARY": primary_node,
 983     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 984     "INSTANCE_OS_TYPE": os_type,
 985     "INSTANCE_STATUS": str_status,
 986     "INSTANCE_MEMORY": memory,
 987     "INSTANCE_VCPUS": vcpus,
 988     "INSTANCE_DISK_TEMPLATE": disk_template,
 989     "INSTANCE_HYPERVISOR": hypervisor_name,
 990   }
 991
 992   if nics:
 993     nic_count = len(nics)
 994     for idx, (ip, mac, mode, link) in enumerate(nics):
 995       if ip is None:
 996         ip = ""
 997       env["INSTANCE_NIC%d_IP" % idx] = ip
 998       env["INSTANCE_NIC%d_MAC" % idx] = mac
 999       env["INSTANCE_NIC%d_MODE" % idx] = mode
1000       env["INSTANCE_NIC%d_LINK" % idx] = link
1001       if mode == constants.NIC_MODE_BRIDGED:
1002         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003   else:
1004     nic_count = 0
1005
1006   env["INSTANCE_NIC_COUNT"] = nic_count
1007
1008   if disks:
1009     disk_count = len(disks)
1010     for idx, (size, mode) in enumerate(disks):
1011       env["INSTANCE_DISK%d_SIZE" % idx] = size
1012       env["INSTANCE_DISK%d_MODE" % idx] = mode
1013   else:
1014     disk_count = 0
1015
1016   env["INSTANCE_DISK_COUNT"] = disk_count
1017
1018   if not tags:
1019     tags = []
1020
1021   env["INSTANCE_TAGS"] = " ".join(tags)
1022
1023   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024     for key, value in source.items():
1025       env["INSTANCE_%s_%s" % (kind, key)] = value
1026
1027   return env
1028
1029
1030 def _NICListToTuple(lu, nics):
1031   """Build a list of nic information tuples.
1032
1033   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034   value in LUInstanceQueryData.
1035
1036   @type lu:  L{LogicalUnit}
1037   @param lu: the logical unit on whose behalf we execute
1038   @type nics: list of L{objects.NIC}
1039   @param nics: list of nics to convert to hooks tuples
1040
1041   """
1042   hooks_nics = []
1043   cluster = lu.cfg.GetClusterInfo()
1044   for nic in nics:
1045     ip = nic.ip
1046     mac = nic.mac
1047     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048     mode = filled_params[constants.NIC_MODE]
1049     link = filled_params[constants.NIC_LINK]
1050     hooks_nics.append((ip, mac, mode, link))
1051   return hooks_nics
1052
1053
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055   """Builds instance related env variables for hooks from an object.
1056
1057   @type lu: L{LogicalUnit}
1058   @param lu: the logical unit on whose behalf we execute
1059   @type instance: L{objects.Instance}
1060   @param instance: the instance for which we should build the
1061       environment
1062   @type override: dict
1063   @param override: dictionary with key/values that will override
1064       our values
1065   @rtype: dict
1066   @return: the hook environment dictionary
1067
1068   """
1069   cluster = lu.cfg.GetClusterInfo()
1070   bep = cluster.FillBE(instance)
1071   hvp = cluster.FillHV(instance)
1072   args = {
1073     "name": instance.name,
1074     "primary_node": instance.primary_node,
1075     "secondary_nodes": instance.secondary_nodes,
1076     "os_type": instance.os,
1077     "status": instance.admin_up,
1078     "memory": bep[constants.BE_MEMORY],
1079     "vcpus": bep[constants.BE_VCPUS],
1080     "nics": _NICListToTuple(lu, instance.nics),
1081     "disk_template": instance.disk_template,
1082     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083     "bep": bep,
1084     "hvp": hvp,
1085     "hypervisor_name": instance.hypervisor,
1086     "tags": instance.tags,
1087   }
1088   if override:
1089     args.update(override)
1090   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1091
1092
1093 def _AdjustCandidatePool(lu, exceptions):
1094   """Adjust the candidate pool after node operations.
1095
1096   """
1097   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098   if mod_list:
1099     lu.LogInfo("Promoted nodes to master candidate role: %s",
1100                utils.CommaJoin(node.name for node in mod_list))
1101     for name in mod_list:
1102       lu.context.ReaddNode(name)
1103   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104   if mc_now > mc_max:
1105     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106                (mc_now, mc_max))
1107
1108
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110   """Decide whether I should promote myself as a master candidate.
1111
1112   """
1113   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115   # the new node will increase mc_max with one, so:
1116   mc_should = min(mc_should + 1, cp_size)
1117   return mc_now < mc_should
1118
1119
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121   """Check that the brigdes needed by a list of nics exist.
1122
1123   """
1124   cluster = lu.cfg.GetClusterInfo()
1125   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126   brlist = [params[constants.NIC_LINK] for params in paramslist
1127             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128   if brlist:
1129     result = lu.rpc.call_bridges_exist(target_node, brlist)
1130     result.Raise("Error checking bridges on destination node '%s'" %
1131                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132
1133
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135   """Check that the brigdes needed by an instance exist.
1136
1137   """
1138   if node is None:
1139     node = instance.primary_node
1140   _CheckNicsBridgesExist(lu, instance.nics, node)
1141
1142
1143 def _CheckOSVariant(os_obj, name):
1144   """Check whether an OS name conforms to the os variants specification.
1145
1146   @type os_obj: L{objects.OS}
1147   @param os_obj: OS object to check
1148   @type name: string
1149   @param name: OS name passed by the user, to check for validity
1150
1151   """
1152   variant = objects.OS.GetVariant(name)
1153   if not os_obj.supported_variants:
1154     if variant:
1155       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156                                  " passed)" % (os_obj.name, variant),
1157                                  errors.ECODE_INVAL)
1158     return
1159   if not variant:
1160     raise errors.OpPrereqError("OS name must include a variant",
1161                                errors.ECODE_INVAL)
1162
1163   if variant not in os_obj.supported_variants:
1164     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165
1166
1167 def _GetNodeInstancesInner(cfg, fn):
1168   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169
1170
1171 def _GetNodeInstances(cfg, node_name):
1172   """Returns a list of all primary and secondary instances on a node.
1173
1174   """
1175
1176   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177
1178
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180   """Returns primary instances on a node.
1181
1182   """
1183   return _GetNodeInstancesInner(cfg,
1184                                 lambda inst: node_name == inst.primary_node)
1185
1186
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188   """Returns secondary instances on a node.
1189
1190   """
1191   return _GetNodeInstancesInner(cfg,
1192                                 lambda inst: node_name in inst.secondary_nodes)
1193
1194
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196   """Returns the arguments for a storage type.
1197
1198   """
1199   # Special case for file storage
1200   if storage_type == constants.ST_FILE:
1201     # storage.FileStorage wants a list of storage directories
1202     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203
1204   return []
1205
1206
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208   faulty = []
1209
1210   for dev in instance.disks:
1211     cfg.SetDiskID(dev, node_name)
1212
1213   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214   result.Raise("Failed to get disk status from node %s" % node_name,
1215                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216
1217   for idx, bdev_status in enumerate(result.payload):
1218     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219       faulty.append(idx)
1220
1221   return faulty
1222
1223
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225   """Check the sanity of iallocator and node arguments and use the
1226   cluster-wide iallocator if appropriate.
1227
1228   Check that at most one of (iallocator, node) is specified. If none is
1229   specified, then the LU's opcode's iallocator slot is filled with the
1230   cluster-wide default iallocator.
1231
1232   @type iallocator_slot: string
1233   @param iallocator_slot: the name of the opcode iallocator slot
1234   @type node_slot: string
1235   @param node_slot: the name of the opcode target node slot
1236
1237   """
1238   node = getattr(lu.op, node_slot, None)
1239   iallocator = getattr(lu.op, iallocator_slot, None)
1240
1241   if node is not None and iallocator is not None:
1242     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243                                errors.ECODE_INVAL)
1244   elif node is None and iallocator is None:
1245     default_iallocator = lu.cfg.GetDefaultIAllocator()
1246     if default_iallocator:
1247       setattr(lu.op, iallocator_slot, default_iallocator)
1248     else:
1249       raise errors.OpPrereqError("No iallocator or node given and no"
1250                                  " cluster-wide default iallocator found;"
1251                                  " please specify either an iallocator or a"
1252                                  " node, or set a cluster-wide default"
1253                                  " iallocator")
1254
1255
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257   """Decides on which iallocator to use.
1258
1259   @type cfg: L{config.ConfigWriter}
1260   @param cfg: Cluster configuration object
1261   @type iallocator: string or None
1262   @param iallocator: Iallocator specified in opcode
1263   @rtype: string
1264   @return: Iallocator name
1265
1266   """
1267   if not iallocator:
1268     # Use default iallocator
1269     iallocator = cfg.GetDefaultIAllocator()
1270
1271   if not iallocator:
1272     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273                                " opcode nor as a cluster-wide default",
1274                                errors.ECODE_INVAL)
1275
1276   return iallocator
1277
1278
1279 class LUClusterPostInit(LogicalUnit):
1280   """Logical unit for running hooks after cluster initialization.
1281
1282   """
1283   HPATH = "cluster-init"
1284   HTYPE = constants.HTYPE_CLUSTER
1285
1286   def BuildHooksEnv(self):
1287     """Build hooks env.
1288
1289     """
1290     return {
1291       "OP_TARGET": self.cfg.GetClusterName(),
1292       }
1293
1294   def BuildHooksNodes(self):
1295     """Build hooks nodes.
1296
1297     """
1298     return ([], [self.cfg.GetMasterNode()])
1299
1300   def Exec(self, feedback_fn):
1301     """Nothing to do.
1302
1303     """
1304     return True
1305
1306
1307 class LUClusterDestroy(LogicalUnit):
1308   """Logical unit for destroying the cluster.
1309
1310   """
1311   HPATH = "cluster-destroy"
1312   HTYPE = constants.HTYPE_CLUSTER
1313
1314   def BuildHooksEnv(self):
1315     """Build hooks env.
1316
1317     """
1318     return {
1319       "OP_TARGET": self.cfg.GetClusterName(),
1320       }
1321
1322   def BuildHooksNodes(self):
1323     """Build hooks nodes.
1324
1325     """
1326     return ([], [])
1327
1328   def CheckPrereq(self):
1329     """Check prerequisites.
1330
1331     This checks whether the cluster is empty.
1332
1333     Any errors are signaled by raising errors.OpPrereqError.
1334
1335     """
1336     master = self.cfg.GetMasterNode()
1337
1338     nodelist = self.cfg.GetNodeList()
1339     if len(nodelist) != 1 or nodelist[0] != master:
1340       raise errors.OpPrereqError("There are still %d node(s) in"
1341                                  " this cluster." % (len(nodelist) - 1),
1342                                  errors.ECODE_INVAL)
1343     instancelist = self.cfg.GetInstanceList()
1344     if instancelist:
1345       raise errors.OpPrereqError("There are still %d instance(s) in"
1346                                  " this cluster." % len(instancelist),
1347                                  errors.ECODE_INVAL)
1348
1349   def Exec(self, feedback_fn):
1350     """Destroys the cluster.
1351
1352     """
1353     master = self.cfg.GetMasterNode()
1354
1355     # Run post hooks on master node before it's removed
1356     _RunPostHook(self, master)
1357
1358     result = self.rpc.call_node_stop_master(master, False)
1359     result.Raise("Could not disable the master role")
1360
1361     return master
1362
1363
1364 def _VerifyCertificate(filename):
1365   """Verifies a certificate for L{LUClusterVerifyConfig}.
1366
1367   @type filename: string
1368   @param filename: Path to PEM file
1369
1370   """
1371   try:
1372     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373                                            utils.ReadFile(filename))
1374   except Exception, err: # pylint: disable-msg=W0703
1375     return (LUClusterVerifyConfig.ETYPE_ERROR,
1376             "Failed to load X509 certificate %s: %s" % (filename, err))
1377
1378   (errcode, msg) = \
1379     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380                                 constants.SSL_CERT_EXPIRATION_ERROR)
1381
1382   if msg:
1383     fnamemsg = "While verifying %s: %s" % (filename, msg)
1384   else:
1385     fnamemsg = None
1386
1387   if errcode is None:
1388     return (None, fnamemsg)
1389   elif errcode == utils.CERT_WARNING:
1390     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391   elif errcode == utils.CERT_ERROR:
1392     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393
1394   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395
1396
1397 def _GetAllHypervisorParameters(cluster, instances):
1398   """Compute the set of all hypervisor parameters.
1399
1400   @type cluster: L{objects.Cluster}
1401   @param cluster: the cluster object
1402   @param instances: list of L{objects.Instance}
1403   @param instances: additional instances from which to obtain parameters
1404   @rtype: list of (origin, hypervisor, parameters)
1405   @return: a list with all parameters found, indicating the hypervisor they
1406        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407
1408   """
1409   hvp_data = []
1410
1411   for hv_name in cluster.enabled_hypervisors:
1412     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413
1414   for os_name, os_hvp in cluster.os_hvp.items():
1415     for hv_name, hv_params in os_hvp.items():
1416       if hv_params:
1417         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419
1420   # TODO: collapse identical parameter values in a single one
1421   for instance in instances:
1422     if instance.hvparams:
1423       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424                        cluster.FillHV(instance)))
1425
1426   return hvp_data
1427
1428
1429 class _VerifyErrors(object):
1430   """Mix-in for cluster/group verify LUs.
1431
1432   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433   self.op and self._feedback_fn to be available.)
1434
1435   """
1436   TCLUSTER = "cluster"
1437   TNODE = "node"
1438   TINSTANCE = "instance"
1439
1440   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452   ENODEDRBD = (TNODE, "ENODEDRBD")
1453   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456   ENODEHV = (TNODE, "ENODEHV")
1457   ENODELVM = (TNODE, "ENODELVM")
1458   ENODEN1 = (TNODE, "ENODEN1")
1459   ENODENET = (TNODE, "ENODENET")
1460   ENODEOS = (TNODE, "ENODEOS")
1461   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463   ENODERPC = (TNODE, "ENODERPC")
1464   ENODESSH = (TNODE, "ENODESSH")
1465   ENODEVERSION = (TNODE, "ENODEVERSION")
1466   ENODESETUP = (TNODE, "ENODESETUP")
1467   ENODETIME = (TNODE, "ENODETIME")
1468   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469
1470   ETYPE_FIELD = "code"
1471   ETYPE_ERROR = "ERROR"
1472   ETYPE_WARNING = "WARNING"
1473
1474   def _Error(self, ecode, item, msg, *args, **kwargs):
1475     """Format an error message.
1476
1477     Based on the opcode's error_codes parameter, either format a
1478     parseable error code, or a simpler error string.
1479
1480     This must be called only from Exec and functions called from Exec.
1481
1482     """
1483     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484     itype, etxt = ecode
1485     # first complete the msg
1486     if args:
1487       msg = msg % args
1488     # then format the whole message
1489     if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1490       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491     else:
1492       if item:
1493         item = " " + item
1494       else:
1495         item = ""
1496       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497     # and finally report it via the feedback_fn
1498     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1499
1500   def _ErrorIf(self, cond, *args, **kwargs):
1501     """Log an error message if the passed condition is True.
1502
1503     """
1504     cond = (bool(cond)
1505             or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1506     if cond:
1507       self._Error(*args, **kwargs)
1508     # do not mark the operation as failed for WARN cases only
1509     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510       self.bad = self.bad or cond
1511
1512
1513 class LUClusterVerify(NoHooksLU):
1514   """Submits all jobs necessary to verify the cluster.
1515
1516   """
1517   REQ_BGL = False
1518
1519   def ExpandNames(self):
1520     self.needed_locks = {}
1521
1522   def Exec(self, feedback_fn):
1523     jobs = []
1524
1525     if self.op.group_name:
1526       groups = [self.op.group_name]
1527       depends_fn = lambda: None
1528     else:
1529       groups = self.cfg.GetNodeGroupList()
1530
1531       # Verify global configuration
1532       jobs.append([opcodes.OpClusterVerifyConfig()])
1533
1534       # Always depend on global verification
1535       depends_fn = lambda: [(-len(jobs), [])]
1536
1537     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538                                               depends=depends_fn())]
1539                 for group in groups)
1540
1541     # Fix up all parameters
1542     for op in itertools.chain(*jobs): # pylint: disable-msg=W0142
1543       op.debug_simulate_errors = self.op.debug_simulate_errors
1544       op.verbose = self.op.verbose
1545       op.error_codes = self.op.error_codes
1546       try:
1547         op.skip_checks = self.op.skip_checks
1548       except AttributeError:
1549         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550
1551     return ResultWithJobs(jobs)
1552
1553
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555   """Verifies the cluster config.
1556
1557   """
1558   REQ_BGL = True
1559
1560   def _VerifyHVP(self, hvp_data):
1561     """Verifies locally the syntax of the hypervisor parameters.
1562
1563     """
1564     for item, hv_name, hv_params in hvp_data:
1565       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566              (item, hv_name))
1567       try:
1568         hv_class = hypervisor.GetHypervisor(hv_name)
1569         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570         hv_class.CheckParameterSyntax(hv_params)
1571       except errors.GenericError, err:
1572         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573
1574   def ExpandNames(self):
1575     # Information can be safely retrieved as the BGL is acquired in exclusive
1576     # mode
1577     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579     self.all_node_info = self.cfg.GetAllNodesInfo()
1580     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581     self.needed_locks = {}
1582
1583   def Exec(self, feedback_fn):
1584     """Verify integrity of cluster, performing various test on nodes.
1585
1586     """
1587     self.bad = False
1588     self._feedback_fn = feedback_fn
1589
1590     feedback_fn("* Verifying cluster config")
1591
1592     for msg in self.cfg.VerifyConfig():
1593       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594
1595     feedback_fn("* Verifying cluster certificate files")
1596
1597     for cert_filename in constants.ALL_CERT_FILES:
1598       (errcode, msg) = _VerifyCertificate(cert_filename)
1599       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600
1601     feedback_fn("* Verifying hypervisor parameters")
1602
1603     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604                                                 self.all_inst_info.values()))
1605
1606     feedback_fn("* Verifying all nodes belong to an existing group")
1607
1608     # We do this verification here because, should this bogus circumstance
1609     # occur, it would never be caught by VerifyGroup, which only acts on
1610     # nodes/instances reachable from existing node groups.
1611
1612     dangling_nodes = set(node.name for node in self.all_node_info.values()
1613                          if node.group not in self.all_group_info)
1614
1615     dangling_instances = {}
1616     no_node_instances = []
1617
1618     for inst in self.all_inst_info.values():
1619       if inst.primary_node in dangling_nodes:
1620         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621       elif inst.primary_node not in self.all_node_info:
1622         no_node_instances.append(inst.name)
1623
1624     pretty_dangling = [
1625         "%s (%s)" %
1626         (node.name,
1627          utils.CommaJoin(dangling_instances.get(node.name,
1628                                                 ["no instances"])))
1629         for node in dangling_nodes]
1630
1631     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632                   "the following nodes (and their instances) belong to a non"
1633                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1634
1635     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636                   "the following instances have a non-existing primary-node:"
1637                   " %s", utils.CommaJoin(no_node_instances))
1638
1639     return not self.bad
1640
1641
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643   """Verifies the status of a node group.
1644
1645   """
1646   HPATH = "cluster-verify"
1647   HTYPE = constants.HTYPE_CLUSTER
1648   REQ_BGL = False
1649
1650   _HOOKS_INDENT_RE = re.compile("^", re.M)
1651
1652   class NodeImage(object):
1653     """A class representing the logical and physical status of a node.
1654
1655     @type name: string
1656     @ivar name: the node name to which this object refers
1657     @ivar volumes: a structure as returned from
1658         L{ganeti.backend.GetVolumeList} (runtime)
1659     @ivar instances: a list of running instances (runtime)
1660     @ivar pinst: list of configured primary instances (config)
1661     @ivar sinst: list of configured secondary instances (config)
1662     @ivar sbp: dictionary of {primary-node: list of instances} for all
1663         instances for which this node is secondary (config)
1664     @ivar mfree: free memory, as reported by hypervisor (runtime)
1665     @ivar dfree: free disk, as reported by the node (runtime)
1666     @ivar offline: the offline status (config)
1667     @type rpc_fail: boolean
1668     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669         not whether the individual keys were correct) (runtime)
1670     @type lvm_fail: boolean
1671     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672     @type hyp_fail: boolean
1673     @ivar hyp_fail: whether the RPC call didn't return the instance list
1674     @type ghost: boolean
1675     @ivar ghost: whether this is a known node or not (config)
1676     @type os_fail: boolean
1677     @ivar os_fail: whether the RPC call didn't return valid OS data
1678     @type oslist: list
1679     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680     @type vm_capable: boolean
1681     @ivar vm_capable: whether the node can host instances
1682
1683     """
1684     def __init__(self, offline=False, name=None, vm_capable=True):
1685       self.name = name
1686       self.volumes = {}
1687       self.instances = []
1688       self.pinst = []
1689       self.sinst = []
1690       self.sbp = {}
1691       self.mfree = 0
1692       self.dfree = 0
1693       self.offline = offline
1694       self.vm_capable = vm_capable
1695       self.rpc_fail = False
1696       self.lvm_fail = False
1697       self.hyp_fail = False
1698       self.ghost = False
1699       self.os_fail = False
1700       self.oslist = {}
1701
1702   def ExpandNames(self):
1703     # This raises errors.OpPrereqError on its own:
1704     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705
1706     # Get instances in node group; this is unsafe and needs verification later
1707     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708
1709     self.needed_locks = {
1710       locking.LEVEL_INSTANCE: inst_names,
1711       locking.LEVEL_NODEGROUP: [self.group_uuid],
1712       locking.LEVEL_NODE: [],
1713       }
1714
1715     self.share_locks = _ShareAll()
1716
1717   def DeclareLocks(self, level):
1718     if level == locking.LEVEL_NODE:
1719       # Get members of node group; this is unsafe and needs verification later
1720       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721
1722       all_inst_info = self.cfg.GetAllInstancesInfo()
1723
1724       # In Exec(), we warn about mirrored instances that have primary and
1725       # secondary living in separate node groups. To fully verify that
1726       # volumes for these instances are healthy, we will need to do an
1727       # extra call to their secondaries. We ensure here those nodes will
1728       # be locked.
1729       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730         # Important: access only the instances whose lock is owned
1731         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732           nodes.update(all_inst_info[inst].secondary_nodes)
1733
1734       self.needed_locks[locking.LEVEL_NODE] = nodes
1735
1736   def CheckPrereq(self):
1737     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739
1740     group_nodes = set(self.group_info.members)
1741     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742
1743     unlocked_nodes = \
1744         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745
1746     unlocked_instances = \
1747         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748
1749     if unlocked_nodes:
1750       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751                                  utils.CommaJoin(unlocked_nodes))
1752
1753     if unlocked_instances:
1754       raise errors.OpPrereqError("Missing lock for instances: %s" %
1755                                  utils.CommaJoin(unlocked_instances))
1756
1757     self.all_node_info = self.cfg.GetAllNodesInfo()
1758     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759
1760     self.my_node_names = utils.NiceSort(group_nodes)
1761     self.my_inst_names = utils.NiceSort(group_instances)
1762
1763     self.my_node_info = dict((name, self.all_node_info[name])
1764                              for name in self.my_node_names)
1765
1766     self.my_inst_info = dict((name, self.all_inst_info[name])
1767                              for name in self.my_inst_names)
1768
1769     # We detect here the nodes that will need the extra RPC calls for verifying
1770     # split LV volumes; they should be locked.
1771     extra_lv_nodes = set()
1772
1773     for inst in self.my_inst_info.values():
1774       if inst.disk_template in constants.DTS_INT_MIRROR:
1775         group = self.my_node_info[inst.primary_node].group
1776         for nname in inst.secondary_nodes:
1777           if self.all_node_info[nname].group != group:
1778             extra_lv_nodes.add(nname)
1779
1780     unlocked_lv_nodes = \
1781         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782
1783     if unlocked_lv_nodes:
1784       raise errors.OpPrereqError("these nodes could be locked: %s" %
1785                                  utils.CommaJoin(unlocked_lv_nodes))
1786     self.extra_lv_nodes = list(extra_lv_nodes)
1787
1788   def _VerifyNode(self, ninfo, nresult):
1789     """Perform some basic validation on data returned from a node.
1790
1791       - check the result data structure is well formed and has all the
1792         mandatory fields
1793       - check ganeti version
1794
1795     @type ninfo: L{objects.Node}
1796     @param ninfo: the node to check
1797     @param nresult: the results from the node
1798     @rtype: boolean
1799     @return: whether overall this call was successful (and we can expect
1800          reasonable values in the respose)
1801
1802     """
1803     node = ninfo.name
1804     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1805
1806     # main result, nresult should be a non-empty dict
1807     test = not nresult or not isinstance(nresult, dict)
1808     _ErrorIf(test, self.ENODERPC, node,
1809                   "unable to verify node: no data returned")
1810     if test:
1811       return False
1812
1813     # compares ganeti version
1814     local_version = constants.PROTOCOL_VERSION
1815     remote_version = nresult.get("version", None)
1816     test = not (remote_version and
1817                 isinstance(remote_version, (list, tuple)) and
1818                 len(remote_version) == 2)
1819     _ErrorIf(test, self.ENODERPC, node,
1820              "connection to node returned invalid data")
1821     if test:
1822       return False
1823
1824     test = local_version != remote_version[0]
1825     _ErrorIf(test, self.ENODEVERSION, node,
1826              "incompatible protocol versions: master %s,"
1827              " node %s", local_version, remote_version[0])
1828     if test:
1829       return False
1830
1831     # node seems compatible, we can actually try to look into its results
1832
1833     # full package version
1834     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835                   self.ENODEVERSION, node,
1836                   "software version mismatch: master %s, node %s",
1837                   constants.RELEASE_VERSION, remote_version[1],
1838                   code=self.ETYPE_WARNING)
1839
1840     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841     if ninfo.vm_capable and isinstance(hyp_result, dict):
1842       for hv_name, hv_result in hyp_result.iteritems():
1843         test = hv_result is not None
1844         _ErrorIf(test, self.ENODEHV, node,
1845                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846
1847     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848     if ninfo.vm_capable and isinstance(hvp_result, list):
1849       for item, hv_name, hv_result in hvp_result:
1850         _ErrorIf(True, self.ENODEHV, node,
1851                  "hypervisor %s parameter verify failure (source %s): %s",
1852                  hv_name, item, hv_result)
1853
1854     test = nresult.get(constants.NV_NODESETUP,
1855                        ["Missing NODESETUP results"])
1856     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857              "; ".join(test))
1858
1859     return True
1860
1861   def _VerifyNodeTime(self, ninfo, nresult,
1862                       nvinfo_starttime, nvinfo_endtime):
1863     """Check the node time.
1864
1865     @type ninfo: L{objects.Node}
1866     @param ninfo: the node to check
1867     @param nresult: the remote results for the node
1868     @param nvinfo_starttime: the start time of the RPC call
1869     @param nvinfo_endtime: the end time of the RPC call
1870
1871     """
1872     node = ninfo.name
1873     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1874
1875     ntime = nresult.get(constants.NV_TIME, None)
1876     try:
1877       ntime_merged = utils.MergeTime(ntime)
1878     except (ValueError, TypeError):
1879       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880       return
1881
1882     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886     else:
1887       ntime_diff = None
1888
1889     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890              "Node time diverges by at least %s from master node time",
1891              ntime_diff)
1892
1893   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894     """Check the node LVM results.
1895
1896     @type ninfo: L{objects.Node}
1897     @param ninfo: the node to check
1898     @param nresult: the remote results for the node
1899     @param vg_name: the configured VG name
1900
1901     """
1902     if vg_name is None:
1903       return
1904
1905     node = ninfo.name
1906     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1907
1908     # checks vg existence and size > 20G
1909     vglist = nresult.get(constants.NV_VGLIST, None)
1910     test = not vglist
1911     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912     if not test:
1913       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914                                             constants.MIN_VG_SIZE)
1915       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916
1917     # check pv names
1918     pvlist = nresult.get(constants.NV_PVLIST, None)
1919     test = pvlist is None
1920     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921     if not test:
1922       # check that ':' is not present in PV names, since it's a
1923       # special character for lvcreate (denotes the range of PEs to
1924       # use on the PV)
1925       for _, pvname, owner_vg in pvlist:
1926         test = ":" in pvname
1927         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928                  " '%s' of VG '%s'", pvname, owner_vg)
1929
1930   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931     """Check the node bridges.
1932
1933     @type ninfo: L{objects.Node}
1934     @param ninfo: the node to check
1935     @param nresult: the remote results for the node
1936     @param bridges: the expected list of bridges
1937
1938     """
1939     if not bridges:
1940       return
1941
1942     node = ninfo.name
1943     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1944
1945     missing = nresult.get(constants.NV_BRIDGES, None)
1946     test = not isinstance(missing, list)
1947     _ErrorIf(test, self.ENODENET, node,
1948              "did not return valid bridge information")
1949     if not test:
1950       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951                utils.CommaJoin(sorted(missing)))
1952
1953   def _VerifyNodeNetwork(self, ninfo, nresult):
1954     """Check the node network connectivity results.
1955
1956     @type ninfo: L{objects.Node}
1957     @param ninfo: the node to check
1958     @param nresult: the remote results for the node
1959
1960     """
1961     node = ninfo.name
1962     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1963
1964     test = constants.NV_NODELIST not in nresult
1965     _ErrorIf(test, self.ENODESSH, node,
1966              "node hasn't returned node ssh connectivity data")
1967     if not test:
1968       if nresult[constants.NV_NODELIST]:
1969         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970           _ErrorIf(True, self.ENODESSH, node,
1971                    "ssh communication with node '%s': %s", a_node, a_msg)
1972
1973     test = constants.NV_NODENETTEST not in nresult
1974     _ErrorIf(test, self.ENODENET, node,
1975              "node hasn't returned node tcp connectivity data")
1976     if not test:
1977       if nresult[constants.NV_NODENETTEST]:
1978         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979         for anode in nlist:
1980           _ErrorIf(True, self.ENODENET, node,
1981                    "tcp communication with node '%s': %s",
1982                    anode, nresult[constants.NV_NODENETTEST][anode])
1983
1984     test = constants.NV_MASTERIP not in nresult
1985     _ErrorIf(test, self.ENODENET, node,
1986              "node hasn't returned node master IP reachability data")
1987     if not test:
1988       if not nresult[constants.NV_MASTERIP]:
1989         if node == self.master_node:
1990           msg = "the master node cannot reach the master IP (not configured?)"
1991         else:
1992           msg = "cannot reach the master IP"
1993         _ErrorIf(True, self.ENODENET, node, msg)
1994
1995   def _VerifyInstance(self, instance, instanceconfig, node_image,
1996                       diskstatus):
1997     """Verify an instance.
1998
1999     This function checks to see if the required block devices are
2000     available on the instance's node.
2001
2002     """
2003     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2004     node_current = instanceconfig.primary_node
2005
2006     node_vol_should = {}
2007     instanceconfig.MapLVsByNode(node_vol_should)
2008
2009     for node in node_vol_should:
2010       n_img = node_image[node]
2011       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012         # ignore missing volumes on offline or broken nodes
2013         continue
2014       for volume in node_vol_should[node]:
2015         test = volume not in n_img.volumes
2016         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017                  "volume %s missing on node %s", volume, node)
2018
2019     if instanceconfig.admin_up:
2020       pri_img = node_image[node_current]
2021       test = instance not in pri_img.instances and not pri_img.offline
2022       _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023                "instance not running on its primary node %s",
2024                node_current)
2025
2026     diskdata = [(nname, success, status, idx)
2027                 for (nname, disks) in diskstatus.items()
2028                 for idx, (success, status) in enumerate(disks)]
2029
2030     for nname, success, bdev_status, idx in diskdata:
2031       # the 'ghost node' construction in Exec() ensures that we have a
2032       # node here
2033       snode = node_image[nname]
2034       bad_snode = snode.ghost or snode.offline
2035       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036                self.EINSTANCEFAULTYDISK, instance,
2037                "couldn't retrieve status for disk/%s on %s: %s",
2038                idx, nname, bdev_status)
2039       _ErrorIf((instanceconfig.admin_up and success and
2040                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041                self.EINSTANCEFAULTYDISK, instance,
2042                "disk/%s on %s is faulty", idx, nname)
2043
2044   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045     """Verify if there are any unknown volumes in the cluster.
2046
2047     The .os, .swap and backup volumes are ignored. All other volumes are
2048     reported as unknown.
2049
2050     @type reserved: L{ganeti.utils.FieldSet}
2051     @param reserved: a FieldSet of reserved volume names
2052
2053     """
2054     for node, n_img in node_image.items():
2055       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056         # skip non-healthy nodes
2057         continue
2058       for volume in n_img.volumes:
2059         test = ((node not in node_vol_should or
2060                 volume not in node_vol_should[node]) and
2061                 not reserved.Matches(volume))
2062         self._ErrorIf(test, self.ENODEORPHANLV, node,
2063                       "volume %s is unknown", volume)
2064
2065   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066     """Verify N+1 Memory Resilience.
2067
2068     Check that if one single node dies we can still start all the
2069     instances it was primary for.
2070
2071     """
2072     cluster_info = self.cfg.GetClusterInfo()
2073     for node, n_img in node_image.items():
2074       # This code checks that every node which is now listed as
2075       # secondary has enough memory to host all instances it is
2076       # supposed to should a single other node in the cluster fail.
2077       # FIXME: not ready for failover to an arbitrary node
2078       # FIXME: does not support file-backed instances
2079       # WARNING: we currently take into account down instances as well
2080       # as up ones, considering that even if they're down someone
2081       # might want to start them even in the event of a node failure.
2082       if n_img.offline:
2083         # we're skipping offline nodes from the N+1 warning, since
2084         # most likely we don't have good memory infromation from them;
2085         # we already list instances living on such nodes, and that's
2086         # enough warning
2087         continue
2088       for prinode, instances in n_img.sbp.items():
2089         needed_mem = 0
2090         for instance in instances:
2091           bep = cluster_info.FillBE(instance_cfg[instance])
2092           if bep[constants.BE_AUTO_BALANCE]:
2093             needed_mem += bep[constants.BE_MEMORY]
2094         test = n_img.mfree < needed_mem
2095         self._ErrorIf(test, self.ENODEN1, node,
2096                       "not enough memory to accomodate instance failovers"
2097                       " should node %s fail (%dMiB needed, %dMiB available)",
2098                       prinode, needed_mem, n_img.mfree)
2099
2100   @classmethod
2101   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102                    (files_all, files_all_opt, files_mc, files_vm)):
2103     """Verifies file checksums collected from all nodes.
2104
2105     @param errorif: Callback for reporting errors
2106     @param nodeinfo: List of L{objects.Node} objects
2107     @param master_node: Name of master node
2108     @param all_nvinfo: RPC results
2109
2110     """
2111     node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2112
2113     assert master_node in node_names
2114     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2115             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2116            "Found file listed in more than one file list"
2117
2118     # Define functions determining which nodes to consider for a file
2119     file2nodefn = dict([(filename, fn)
2120       for (files, fn) in [(files_all, None),
2121                           (files_all_opt, None),
2122                           (files_mc, lambda node: (node.master_candidate or
2123                                                    node.name == master_node)),
2124                           (files_vm, lambda node: node.vm_capable)]
2125       for filename in files])
2126
2127     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2128
2129     for node in nodeinfo:
2130       if node.offline:
2131         continue
2132
2133       nresult = all_nvinfo[node.name]
2134
2135       if nresult.fail_msg or not nresult.payload:
2136         node_files = None
2137       else:
2138         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2139
2140       test = not (node_files and isinstance(node_files, dict))
2141       errorif(test, cls.ENODEFILECHECK, node.name,
2142               "Node did not return file checksum data")
2143       if test:
2144         continue
2145
2146       for (filename, checksum) in node_files.items():
2147         # Check if the file should be considered for a node
2148         fn = file2nodefn[filename]
2149         if fn is None or fn(node):
2150           fileinfo[filename].setdefault(checksum, set()).add(node.name)
2151
2152     for (filename, checksums) in fileinfo.items():
2153       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2154
2155       # Nodes having the file
2156       with_file = frozenset(node_name
2157                             for nodes in fileinfo[filename].values()
2158                             for node_name in nodes)
2159
2160       # Nodes missing file
2161       missing_file = node_names - with_file
2162
2163       if filename in files_all_opt:
2164         # All or no nodes
2165         errorif(missing_file and missing_file != node_names,
2166                 cls.ECLUSTERFILECHECK, None,
2167                 "File %s is optional, but it must exist on all or no"
2168                 " nodes (not found on %s)",
2169                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2170       else:
2171         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2172                 "File %s is missing from node(s) %s", filename,
2173                 utils.CommaJoin(utils.NiceSort(missing_file)))
2174
2175       # See if there are multiple versions of the file
2176       test = len(checksums) > 1
2177       if test:
2178         variants = ["variant %s on %s" %
2179                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2180                     for (idx, (checksum, nodes)) in
2181                       enumerate(sorted(checksums.items()))]
2182       else:
2183         variants = []
2184
2185       errorif(test, cls.ECLUSTERFILECHECK, None,
2186               "File %s found with %s different checksums (%s)",
2187               filename, len(checksums), "; ".join(variants))
2188
2189   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2190                       drbd_map):
2191     """Verifies and the node DRBD status.
2192
2193     @type ninfo: L{objects.Node}
2194     @param ninfo: the node to check
2195     @param nresult: the remote results for the node
2196     @param instanceinfo: the dict of instances
2197     @param drbd_helper: the configured DRBD usermode helper
2198     @param drbd_map: the DRBD map as returned by
2199         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2200
2201     """
2202     node = ninfo.name
2203     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2204
2205     if drbd_helper:
2206       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2207       test = (helper_result == None)
2208       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2209                "no drbd usermode helper returned")
2210       if helper_result:
2211         status, payload = helper_result
2212         test = not status
2213         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2214                  "drbd usermode helper check unsuccessful: %s", payload)
2215         test = status and (payload != drbd_helper)
2216         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2217                  "wrong drbd usermode helper: %s", payload)
2218
2219     # compute the DRBD minors
2220     node_drbd = {}
2221     for minor, instance in drbd_map[node].items():
2222       test = instance not in instanceinfo
2223       _ErrorIf(test, self.ECLUSTERCFG, None,
2224                "ghost instance '%s' in temporary DRBD map", instance)
2225         # ghost instance should not be running, but otherwise we
2226         # don't give double warnings (both ghost instance and
2227         # unallocated minor in use)
2228       if test:
2229         node_drbd[minor] = (instance, False)
2230       else:
2231         instance = instanceinfo[instance]
2232         node_drbd[minor] = (instance.name, instance.admin_up)
2233
2234     # and now check them
2235     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2236     test = not isinstance(used_minors, (tuple, list))
2237     _ErrorIf(test, self.ENODEDRBD, node,
2238              "cannot parse drbd status file: %s", str(used_minors))
2239     if test:
2240       # we cannot check drbd status
2241       return
2242
2243     for minor, (iname, must_exist) in node_drbd.items():
2244       test = minor not in used_minors and must_exist
2245       _ErrorIf(test, self.ENODEDRBD, node,
2246                "drbd minor %d of instance %s is not active", minor, iname)
2247     for minor in used_minors:
2248       test = minor not in node_drbd
2249       _ErrorIf(test, self.ENODEDRBD, node,
2250                "unallocated drbd minor %d is in use", minor)
2251
2252   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2253     """Builds the node OS structures.
2254
2255     @type ninfo: L{objects.Node}
2256     @param ninfo: the node to check
2257     @param nresult: the remote results for the node
2258     @param nimg: the node image object
2259
2260     """
2261     node = ninfo.name
2262     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2263
2264     remote_os = nresult.get(constants.NV_OSLIST, None)
2265     test = (not isinstance(remote_os, list) or
2266             not compat.all(isinstance(v, list) and len(v) == 7
2267                            for v in remote_os))
2268
2269     _ErrorIf(test, self.ENODEOS, node,
2270              "node hasn't returned valid OS data")
2271
2272     nimg.os_fail = test
2273
2274     if test:
2275       return
2276
2277     os_dict = {}
2278
2279     for (name, os_path, status, diagnose,
2280          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2281
2282       if name not in os_dict:
2283         os_dict[name] = []
2284
2285       # parameters is a list of lists instead of list of tuples due to
2286       # JSON lacking a real tuple type, fix it:
2287       parameters = [tuple(v) for v in parameters]
2288       os_dict[name].append((os_path, status, diagnose,
2289                             set(variants), set(parameters), set(api_ver)))
2290
2291     nimg.oslist = os_dict
2292
2293   def _VerifyNodeOS(self, ninfo, nimg, base):
2294     """Verifies the node OS list.
2295
2296     @type ninfo: L{objects.Node}
2297     @param ninfo: the node to check
2298     @param nimg: the node image object
2299     @param base: the 'template' node we match against (e.g. from the master)
2300
2301     """
2302     node = ninfo.name
2303     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2304
2305     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2306
2307     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2308     for os_name, os_data in nimg.oslist.items():
2309       assert os_data, "Empty OS status for OS %s?!" % os_name
2310       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2311       _ErrorIf(not f_status, self.ENODEOS, node,
2312                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2313       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2314                "OS '%s' has multiple entries (first one shadows the rest): %s",
2315                os_name, utils.CommaJoin([v[0] for v in os_data]))
2316       # comparisons with the 'base' image
2317       test = os_name not in base.oslist
2318       _ErrorIf(test, self.ENODEOS, node,
2319                "Extra OS %s not present on reference node (%s)",
2320                os_name, base.name)
2321       if test:
2322         continue
2323       assert base.oslist[os_name], "Base node has empty OS status?"
2324       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2325       if not b_status:
2326         # base OS is invalid, skipping
2327         continue
2328       for kind, a, b in [("API version", f_api, b_api),
2329                          ("variants list", f_var, b_var),
2330                          ("parameters", beautify_params(f_param),
2331                           beautify_params(b_param))]:
2332         _ErrorIf(a != b, self.ENODEOS, node,
2333                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2334                  kind, os_name, base.name,
2335                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2336
2337     # check any missing OSes
2338     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2339     _ErrorIf(missing, self.ENODEOS, node,
2340              "OSes present on reference node %s but missing on this node: %s",
2341              base.name, utils.CommaJoin(missing))
2342
2343   def _VerifyOob(self, ninfo, nresult):
2344     """Verifies out of band functionality of a node.
2345
2346     @type ninfo: L{objects.Node}
2347     @param ninfo: the node to check
2348     @param nresult: the remote results for the node
2349
2350     """
2351     node = ninfo.name
2352     # We just have to verify the paths on master and/or master candidates
2353     # as the oob helper is invoked on the master
2354     if ((ninfo.master_candidate or ninfo.master_capable) and
2355         constants.NV_OOB_PATHS in nresult):
2356       for path_result in nresult[constants.NV_OOB_PATHS]:
2357         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2358
2359   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2360     """Verifies and updates the node volume data.
2361
2362     This function will update a L{NodeImage}'s internal structures
2363     with data from the remote call.
2364
2365     @type ninfo: L{objects.Node}
2366     @param ninfo: the node to check
2367     @param nresult: the remote results for the node
2368     @param nimg: the node image object
2369     @param vg_name: the configured VG name
2370
2371     """
2372     node = ninfo.name
2373     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2374
2375     nimg.lvm_fail = True
2376     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2377     if vg_name is None:
2378       pass
2379     elif isinstance(lvdata, basestring):
2380       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2381                utils.SafeEncode(lvdata))
2382     elif not isinstance(lvdata, dict):
2383       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2384     else:
2385       nimg.volumes = lvdata
2386       nimg.lvm_fail = False
2387
2388   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2389     """Verifies and updates the node instance list.
2390
2391     If the listing was successful, then updates this node's instance
2392     list. Otherwise, it marks the RPC call as failed for the instance
2393     list key.
2394
2395     @type ninfo: L{objects.Node}
2396     @param ninfo: the node to check
2397     @param nresult: the remote results for the node
2398     @param nimg: the node image object
2399
2400     """
2401     idata = nresult.get(constants.NV_INSTANCELIST, None)
2402     test = not isinstance(idata, list)
2403     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2404                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2405     if test:
2406       nimg.hyp_fail = True
2407     else:
2408       nimg.instances = idata
2409
2410   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2411     """Verifies and computes a node information map
2412
2413     @type ninfo: L{objects.Node}
2414     @param ninfo: the node to check
2415     @param nresult: the remote results for the node
2416     @param nimg: the node image object
2417     @param vg_name: the configured VG name
2418
2419     """
2420     node = ninfo.name
2421     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2422
2423     # try to read free memory (from the hypervisor)
2424     hv_info = nresult.get(constants.NV_HVINFO, None)
2425     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2426     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2427     if not test:
2428       try:
2429         nimg.mfree = int(hv_info["memory_free"])
2430       except (ValueError, TypeError):
2431         _ErrorIf(True, self.ENODERPC, node,
2432                  "node returned invalid nodeinfo, check hypervisor")
2433
2434     # FIXME: devise a free space model for file based instances as well
2435     if vg_name is not None:
2436       test = (constants.NV_VGLIST not in nresult or
2437               vg_name not in nresult[constants.NV_VGLIST])
2438       _ErrorIf(test, self.ENODELVM, node,
2439                "node didn't return data for the volume group '%s'"
2440                " - it is either missing or broken", vg_name)
2441       if not test:
2442         try:
2443           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2444         except (ValueError, TypeError):
2445           _ErrorIf(True, self.ENODERPC, node,
2446                    "node returned invalid LVM info, check LVM status")
2447
2448   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2449     """Gets per-disk status information for all instances.
2450
2451     @type nodelist: list of strings
2452     @param nodelist: Node names
2453     @type node_image: dict of (name, L{objects.Node})
2454     @param node_image: Node objects
2455     @type instanceinfo: dict of (name, L{objects.Instance})
2456     @param instanceinfo: Instance objects
2457     @rtype: {instance: {node: [(succes, payload)]}}
2458     @return: a dictionary of per-instance dictionaries with nodes as
2459         keys and disk information as values; the disk information is a
2460         list of tuples (success, payload)
2461
2462     """
2463     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2464
2465     node_disks = {}
2466     node_disks_devonly = {}
2467     diskless_instances = set()
2468     diskless = constants.DT_DISKLESS
2469
2470     for nname in nodelist:
2471       node_instances = list(itertools.chain(node_image[nname].pinst,
2472                                             node_image[nname].sinst))
2473       diskless_instances.update(inst for inst in node_instances
2474                                 if instanceinfo[inst].disk_template == diskless)
2475       disks = [(inst, disk)
2476                for inst in node_instances
2477                for disk in instanceinfo[inst].disks]
2478
2479       if not disks:
2480         # No need to collect data
2481         continue
2482
2483       node_disks[nname] = disks
2484
2485       # Creating copies as SetDiskID below will modify the objects and that can
2486       # lead to incorrect data returned from nodes
2487       devonly = [dev.Copy() for (_, dev) in disks]
2488
2489       for dev in devonly:
2490         self.cfg.SetDiskID(dev, nname)
2491
2492       node_disks_devonly[nname] = devonly
2493
2494     assert len(node_disks) == len(node_disks_devonly)
2495
2496     # Collect data from all nodes with disks
2497     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2498                                                           node_disks_devonly)
2499
2500     assert len(result) == len(node_disks)
2501
2502     instdisk = {}
2503
2504     for (nname, nres) in result.items():
2505       disks = node_disks[nname]
2506
2507       if nres.offline:
2508         # No data from this node
2509         data = len(disks) * [(False, "node offline")]
2510       else:
2511         msg = nres.fail_msg
2512         _ErrorIf(msg, self.ENODERPC, nname,
2513                  "while getting disk information: %s", msg)
2514         if msg:
2515           # No data from this node
2516           data = len(disks) * [(False, msg)]
2517         else:
2518           data = []
2519           for idx, i in enumerate(nres.payload):
2520             if isinstance(i, (tuple, list)) and len(i) == 2:
2521               data.append(i)
2522             else:
2523               logging.warning("Invalid result from node %s, entry %d: %s",
2524                               nname, idx, i)
2525               data.append((False, "Invalid result from the remote node"))
2526
2527       for ((inst, _), status) in zip(disks, data):
2528         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2529
2530     # Add empty entries for diskless instances.
2531     for inst in diskless_instances:
2532       assert inst not in instdisk
2533       instdisk[inst] = {}
2534
2535     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2536                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2537                       compat.all(isinstance(s, (tuple, list)) and
2538                                  len(s) == 2 for s in statuses)
2539                       for inst, nnames in instdisk.items()
2540                       for nname, statuses in nnames.items())
2541     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2542
2543     return instdisk
2544
2545   def BuildHooksEnv(self):
2546     """Build hooks env.
2547
2548     Cluster-Verify hooks just ran in the post phase and their failure makes
2549     the output be logged in the verify output and the verification to fail.
2550
2551     """
2552     env = {
2553       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2554       }
2555
2556     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2557                for node in self.my_node_info.values())
2558
2559     return env
2560
2561   def BuildHooksNodes(self):
2562     """Build hooks nodes.
2563
2564     """
2565     return ([], self.my_node_names)
2566
2567   def Exec(self, feedback_fn):
2568     """Verify integrity of the node group, performing various test on nodes.
2569
2570     """
2571     # This method has too many local variables. pylint: disable-msg=R0914
2572     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2573
2574     if not self.my_node_names:
2575       # empty node group
2576       feedback_fn("* Empty node group, skipping verification")
2577       return True
2578
2579     self.bad = False
2580     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2581     verbose = self.op.verbose
2582     self._feedback_fn = feedback_fn
2583
2584     vg_name = self.cfg.GetVGName()
2585     drbd_helper = self.cfg.GetDRBDHelper()
2586     cluster = self.cfg.GetClusterInfo()
2587     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2588     hypervisors = cluster.enabled_hypervisors
2589     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2590
2591     i_non_redundant = [] # Non redundant instances
2592     i_non_a_balanced = [] # Non auto-balanced instances
2593     n_offline = 0 # Count of offline nodes
2594     n_drained = 0 # Count of nodes being drained
2595     node_vol_should = {}
2596
2597     # FIXME: verify OS list
2598
2599     # File verification
2600     filemap = _ComputeAncillaryFiles(cluster, False)
2601
2602     # do local checksums
2603     master_node = self.master_node = self.cfg.GetMasterNode()
2604     master_ip = self.cfg.GetMasterIP()
2605
2606     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2607
2608     # We will make nodes contact all nodes in their group, and one node from
2609     # every other group.
2610     # TODO: should it be a *random* node, different every time?
2611     online_nodes = [node.name for node in node_data_list if not node.offline]
2612     other_group_nodes = {}
2613
2614     for name in sorted(self.all_node_info):
2615       node = self.all_node_info[name]
2616       if (node.group not in other_group_nodes
2617           and node.group != self.group_uuid
2618           and not node.offline):
2619         other_group_nodes[node.group] = node.name
2620
2621     node_verify_param = {
2622       constants.NV_FILELIST:
2623         utils.UniqueSequence(filename
2624                              for files in filemap
2625                              for filename in files),
2626       constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2627       constants.NV_HYPERVISOR: hypervisors,
2628       constants.NV_HVPARAMS:
2629         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2630       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2631                                  for node in node_data_list
2632                                  if not node.offline],
2633       constants.NV_INSTANCELIST: hypervisors,
2634       constants.NV_VERSION: None,
2635       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2636       constants.NV_NODESETUP: None,
2637       constants.NV_TIME: None,
2638       constants.NV_MASTERIP: (master_node, master_ip),
2639       constants.NV_OSLIST: None,
2640       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2641       }
2642
2643     if vg_name is not None:
2644       node_verify_param[constants.NV_VGLIST] = None
2645       node_verify_param[constants.NV_LVLIST] = vg_name
2646       node_verify_param[constants.NV_PVLIST] = [vg_name]
2647       node_verify_param[constants.NV_DRBDLIST] = None
2648
2649     if drbd_helper:
2650       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2651
2652     # bridge checks
2653     # FIXME: this needs to be changed per node-group, not cluster-wide
2654     bridges = set()
2655     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2656     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2657       bridges.add(default_nicpp[constants.NIC_LINK])
2658     for instance in self.my_inst_info.values():
2659       for nic in instance.nics:
2660         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2661         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2662           bridges.add(full_nic[constants.NIC_LINK])
2663
2664     if bridges:
2665       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2666
2667     # Build our expected cluster state
2668     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2669                                                  name=node.name,
2670                                                  vm_capable=node.vm_capable))
2671                       for node in node_data_list)
2672
2673     # Gather OOB paths
2674     oob_paths = []
2675     for node in self.all_node_info.values():
2676       path = _SupportsOob(self.cfg, node)
2677       if path and path not in oob_paths:
2678         oob_paths.append(path)
2679
2680     if oob_paths:
2681       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2682
2683     for instance in self.my_inst_names:
2684       inst_config = self.my_inst_info[instance]
2685
2686       for nname in inst_config.all_nodes:
2687         if nname not in node_image:
2688           gnode = self.NodeImage(name=nname)
2689           gnode.ghost = (nname not in self.all_node_info)
2690           node_image[nname] = gnode
2691
2692       inst_config.MapLVsByNode(node_vol_should)
2693
2694       pnode = inst_config.primary_node
2695       node_image[pnode].pinst.append(instance)
2696
2697       for snode in inst_config.secondary_nodes:
2698         nimg = node_image[snode]
2699         nimg.sinst.append(instance)
2700         if pnode not in nimg.sbp:
2701           nimg.sbp[pnode] = []
2702         nimg.sbp[pnode].append(instance)
2703
2704     # At this point, we have the in-memory data structures complete,
2705     # except for the runtime information, which we'll gather next
2706
2707     # Due to the way our RPC system works, exact response times cannot be
2708     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2709     # time before and after executing the request, we can at least have a time
2710     # window.
2711     nvinfo_starttime = time.time()
2712     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2713                                            node_verify_param,
2714                                            self.cfg.GetClusterName())
2715     nvinfo_endtime = time.time()
2716
2717     if self.extra_lv_nodes and vg_name is not None:
2718       extra_lv_nvinfo = \
2719           self.rpc.call_node_verify(self.extra_lv_nodes,
2720                                     {constants.NV_LVLIST: vg_name},
2721                                     self.cfg.GetClusterName())
2722     else:
2723       extra_lv_nvinfo = {}
2724
2725     all_drbd_map = self.cfg.ComputeDRBDMap()
2726
2727     feedback_fn("* Gathering disk information (%s nodes)" %
2728                 len(self.my_node_names))
2729     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2730                                      self.my_inst_info)
2731
2732     feedback_fn("* Verifying configuration file consistency")
2733
2734     # If not all nodes are being checked, we need to make sure the master node
2735     # and a non-checked vm_capable node are in the list.
2736     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2737     if absent_nodes:
2738       vf_nvinfo = all_nvinfo.copy()
2739       vf_node_info = list(self.my_node_info.values())
2740       additional_nodes = []
2741       if master_node not in self.my_node_info:
2742         additional_nodes.append(master_node)
2743         vf_node_info.append(self.all_node_info[master_node])
2744       # Add the first vm_capable node we find which is not included
2745       for node in absent_nodes:
2746         nodeinfo = self.all_node_info[node]
2747         if nodeinfo.vm_capable and not nodeinfo.offline:
2748           additional_nodes.append(node)
2749           vf_node_info.append(self.all_node_info[node])
2750           break
2751       key = constants.NV_FILELIST
2752       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2753                                                  {key: node_verify_param[key]},
2754                                                  self.cfg.GetClusterName()))
2755     else:
2756       vf_nvinfo = all_nvinfo
2757       vf_node_info = self.my_node_info.values()
2758
2759     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2760
2761     feedback_fn("* Verifying node status")
2762
2763     refos_img = None
2764
2765     for node_i in node_data_list:
2766       node = node_i.name
2767       nimg = node_image[node]
2768
2769       if node_i.offline:
2770         if verbose:
2771           feedback_fn("* Skipping offline node %s" % (node,))
2772         n_offline += 1
2773         continue
2774
2775       if node == master_node:
2776         ntype = "master"
2777       elif node_i.master_candidate:
2778         ntype = "master candidate"
2779       elif node_i.drained:
2780         ntype = "drained"
2781         n_drained += 1
2782       else:
2783         ntype = "regular"
2784       if verbose:
2785         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2786
2787       msg = all_nvinfo[node].fail_msg
2788       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2789       if msg:
2790         nimg.rpc_fail = True
2791         continue
2792
2793       nresult = all_nvinfo[node].payload
2794
2795       nimg.call_ok = self._VerifyNode(node_i, nresult)
2796       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2797       self._VerifyNodeNetwork(node_i, nresult)
2798       self._VerifyOob(node_i, nresult)
2799
2800       if nimg.vm_capable:
2801         self._VerifyNodeLVM(node_i, nresult, vg_name)
2802         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2803                              all_drbd_map)
2804
2805         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2806         self._UpdateNodeInstances(node_i, nresult, nimg)
2807         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2808         self._UpdateNodeOS(node_i, nresult, nimg)
2809
2810         if not nimg.os_fail:
2811           if refos_img is None:
2812             refos_img = nimg
2813           self._VerifyNodeOS(node_i, nimg, refos_img)
2814         self._VerifyNodeBridges(node_i, nresult, bridges)
2815
2816         # Check whether all running instancies are primary for the node. (This
2817         # can no longer be done from _VerifyInstance below, since some of the
2818         # wrong instances could be from other node groups.)
2819         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2820
2821         for inst in non_primary_inst:
2822           test = inst in self.all_inst_info
2823           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2824                    "instance should not run on node %s", node_i.name)
2825           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2826                    "node is running unknown instance %s", inst)
2827
2828     for node, result in extra_lv_nvinfo.items():
2829       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2830                               node_image[node], vg_name)
2831
2832     feedback_fn("* Verifying instance status")
2833     for instance in self.my_inst_names:
2834       if verbose:
2835         feedback_fn("* Verifying instance %s" % instance)
2836       inst_config = self.my_inst_info[instance]
2837       self._VerifyInstance(instance, inst_config, node_image,
2838                            instdisk[instance])
2839       inst_nodes_offline = []
2840
2841       pnode = inst_config.primary_node
2842       pnode_img = node_image[pnode]
2843       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2844                self.ENODERPC, pnode, "instance %s, connection to"
2845                " primary node failed", instance)
2846
2847       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2848                self.EINSTANCEBADNODE, instance,
2849                "instance is marked as running and lives on offline node %s",
2850                inst_config.primary_node)
2851
2852       # If the instance is non-redundant we cannot survive losing its primary
2853       # node, so we are not N+1 compliant. On the other hand we have no disk
2854       # templates with more than one secondary so that situation is not well
2855       # supported either.
2856       # FIXME: does not support file-backed instances
2857       if not inst_config.secondary_nodes:
2858         i_non_redundant.append(instance)
2859
2860       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2861                instance, "instance has multiple secondary nodes: %s",
2862                utils.CommaJoin(inst_config.secondary_nodes),
2863                code=self.ETYPE_WARNING)
2864
2865       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2866         pnode = inst_config.primary_node
2867         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2868         instance_groups = {}
2869
2870         for node in instance_nodes:
2871           instance_groups.setdefault(self.all_node_info[node].group,
2872                                      []).append(node)
2873
2874         pretty_list = [
2875           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2876           # Sort so that we always list the primary node first.
2877           for group, nodes in sorted(instance_groups.items(),
2878                                      key=lambda (_, nodes): pnode in nodes,
2879                                      reverse=True)]
2880
2881         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2882                       instance, "instance has primary and secondary nodes in"
2883                       " different groups: %s", utils.CommaJoin(pretty_list),
2884                       code=self.ETYPE_WARNING)
2885
2886       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2887         i_non_a_balanced.append(instance)
2888
2889       for snode in inst_config.secondary_nodes:
2890         s_img = node_image[snode]
2891         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2892                  "instance %s, connection to secondary node failed", instance)
2893
2894         if s_img.offline:
2895           inst_nodes_offline.append(snode)
2896
2897       # warn that the instance lives on offline nodes
2898       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2899                "instance has offline secondary node(s) %s",
2900                utils.CommaJoin(inst_nodes_offline))
2901       # ... or ghost/non-vm_capable nodes
2902       for node in inst_config.all_nodes:
2903         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2904                  "instance lives on ghost node %s", node)
2905         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2906                  instance, "instance lives on non-vm_capable node %s", node)
2907
2908     feedback_fn("* Verifying orphan volumes")
2909     reserved = utils.FieldSet(*cluster.reserved_lvs)
2910
2911     # We will get spurious "unknown volume" warnings if any node of this group
2912     # is secondary for an instance whose primary is in another group. To avoid
2913     # them, we find these instances and add their volumes to node_vol_should.
2914     for inst in self.all_inst_info.values():
2915       for secondary in inst.secondary_nodes:
2916         if (secondary in self.my_node_info
2917             and inst.name not in self.my_inst_info):
2918           inst.MapLVsByNode(node_vol_should)
2919           break
2920
2921     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2922
2923     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2924       feedback_fn("* Verifying N+1 Memory redundancy")
2925       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2926
2927     feedback_fn("* Other Notes")
2928     if i_non_redundant:
2929       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2930                   % len(i_non_redundant))
2931
2932     if i_non_a_balanced:
2933       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2934                   % len(i_non_a_balanced))
2935
2936     if n_offline:
2937       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2938
2939     if n_drained:
2940       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2941
2942     return not self.bad
2943
2944   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2945     """Analyze the post-hooks' result
2946
2947     This method analyses the hook result, handles it, and sends some
2948     nicely-formatted feedback back to the user.
2949
2950     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2951         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2952     @param hooks_results: the results of the multi-node hooks rpc call
2953     @param feedback_fn: function used send feedback back to the caller
2954     @param lu_result: previous Exec result
2955     @return: the new Exec result, based on the previous result
2956         and hook results
2957
2958     """
2959     # We only really run POST phase hooks, only for non-empty groups,
2960     # and are only interested in their results
2961     if not self.my_node_names:
2962       # empty node group
2963       pass
2964     elif phase == constants.HOOKS_PHASE_POST:
2965       # Used to change hooks' output to proper indentation
2966       feedback_fn("* Hooks Results")
2967       assert hooks_results, "invalid result from hooks"
2968
2969       for node_name in hooks_results:
2970         res = hooks_results[node_name]
2971         msg = res.fail_msg
2972         test = msg and not res.offline
2973         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2974                       "Communication failure in hooks execution: %s", msg)
2975         if res.offline or msg:
2976           # No need to investigate payload if node is offline or gave an error.
2977           # override manually lu_result here as _ErrorIf only
2978           # overrides self.bad
2979           lu_result = 1
2980           continue
2981         for script, hkr, output in res.payload:
2982           test = hkr == constants.HKR_FAIL
2983           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2984                         "Script %s failed, output:", script)
2985           if test:
2986             output = self._HOOKS_INDENT_RE.sub("      ", output)
2987             feedback_fn("%s" % output)
2988             lu_result = 0
2989
2990     return lu_result
2991
2992
2993 class LUClusterVerifyDisks(NoHooksLU):
2994   """Verifies the cluster disks status.
2995
2996   """
2997   REQ_BGL = False
2998
2999   def ExpandNames(self):
3000     self.share_locks = _ShareAll()
3001     self.needed_locks = {
3002       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3003       }
3004
3005   def Exec(self, feedback_fn):
3006     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3007
3008     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3009     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3010                            for group in group_names])
3011
3012
3013 class LUGroupVerifyDisks(NoHooksLU):
3014   """Verifies the status of all disks in a node group.
3015
3016   """
3017   REQ_BGL = False
3018
3019   def ExpandNames(self):
3020     # Raises errors.OpPrereqError on its own if group can't be found
3021     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3022
3023     self.share_locks = _ShareAll()
3024     self.needed_locks = {
3025       locking.LEVEL_INSTANCE: [],
3026       locking.LEVEL_NODEGROUP: [],
3027       locking.LEVEL_NODE: [],
3028       }
3029
3030   def DeclareLocks(self, level):
3031     if level == locking.LEVEL_INSTANCE:
3032       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3033
3034       # Lock instances optimistically, needs verification once node and group
3035       # locks have been acquired
3036       self.needed_locks[locking.LEVEL_INSTANCE] = \
3037         self.cfg.GetNodeGroupInstances(self.group_uuid)
3038
3039     elif level == locking.LEVEL_NODEGROUP:
3040       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3041
3042       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3043         set([self.group_uuid] +
3044             # Lock all groups used by instances optimistically; this requires
3045             # going via the node before it's locked, requiring verification
3046             # later on
3047             [group_uuid
3048              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3049              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3050
3051     elif level == locking.LEVEL_NODE:
3052       # This will only lock the nodes in the group to be verified which contain
3053       # actual instances
3054       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3055       self._LockInstancesNodes()
3056
3057       # Lock all nodes in group to be verified
3058       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3059       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3060       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3061
3062   def CheckPrereq(self):
3063     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3064     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3065     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3066
3067     assert self.group_uuid in owned_groups
3068
3069     # Check if locked instances are still correct
3070     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3071
3072     # Get instance information
3073     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3074
3075     # Check if node groups for locked instances are still correct
3076     for (instance_name, inst) in self.instances.items():
3077       assert owned_nodes.issuperset(inst.all_nodes), \
3078         "Instance %s's nodes changed while we kept the lock" % instance_name
3079
3080       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3081                                              owned_groups)
3082
3083       assert self.group_uuid in inst_groups, \
3084         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3085
3086   def Exec(self, feedback_fn):
3087     """Verify integrity of cluster disks.
3088
3089     @rtype: tuple of three items
3090     @return: a tuple of (dict of node-to-node_error, list of instances
3091         which need activate-disks, dict of instance: (node, volume) for
3092         missing volumes
3093
3094     """
3095     res_nodes = {}
3096     res_instances = set()
3097     res_missing = {}
3098
3099     nv_dict = _MapInstanceDisksToNodes([inst
3100                                         for inst in self.instances.values()
3101                                         if inst.admin_up])
3102
3103     if nv_dict:
3104       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3105                              set(self.cfg.GetVmCapableNodeList()))
3106
3107       node_lvs = self.rpc.call_lv_list(nodes, [])
3108
3109       for (node, node_res) in node_lvs.items():
3110         if node_res.offline:
3111           continue
3112
3113         msg = node_res.fail_msg
3114         if msg:
3115           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3116           res_nodes[node] = msg
3117           continue
3118
3119         for lv_name, (_, _, lv_online) in node_res.payload.items():
3120           inst = nv_dict.pop((node, lv_name), None)
3121           if not (lv_online or inst is None):
3122             res_instances.add(inst)
3123
3124       # any leftover items in nv_dict are missing LVs, let's arrange the data
3125       # better
3126       for key, inst in nv_dict.iteritems():
3127         res_missing.setdefault(inst, []).append(key)
3128
3129     return (res_nodes, list(res_instances), res_missing)
3130
3131
3132 class LUClusterRepairDiskSizes(NoHooksLU):
3133   """Verifies the cluster disks sizes.
3134
3135   """
3136   REQ_BGL = False
3137
3138   def ExpandNames(self):
3139     if self.op.instances:
3140       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3141       self.needed_locks = {
3142         locking.LEVEL_NODE: [],
3143         locking.LEVEL_INSTANCE: self.wanted_names,
3144         }
3145       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3146     else:
3147       self.wanted_names = None
3148       self.needed_locks = {
3149         locking.LEVEL_NODE: locking.ALL_SET,
3150         locking.LEVEL_INSTANCE: locking.ALL_SET,
3151         }
3152     self.share_locks = _ShareAll()
3153
3154   def DeclareLocks(self, level):
3155     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3156       self._LockInstancesNodes(primary_only=True)
3157
3158   def CheckPrereq(self):
3159     """Check prerequisites.
3160
3161     This only checks the optional instance list against the existing names.
3162
3163     """
3164     if self.wanted_names is None:
3165       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3166
3167     self.wanted_instances = \
3168         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3169
3170   def _EnsureChildSizes(self, disk):
3171     """Ensure children of the disk have the needed disk size.
3172
3173     This is valid mainly for DRBD8 and fixes an issue where the
3174     children have smaller disk size.
3175
3176     @param disk: an L{ganeti.objects.Disk} object
3177
3178     """
3179     if disk.dev_type == constants.LD_DRBD8:
3180       assert disk.children, "Empty children for DRBD8?"
3181       fchild = disk.children[0]
3182       mismatch = fchild.size < disk.size
3183       if mismatch:
3184         self.LogInfo("Child disk has size %d, parent %d, fixing",
3185                      fchild.size, disk.size)
3186         fchild.size = disk.size
3187
3188       # and we recurse on this child only, not on the metadev
3189       return self._EnsureChildSizes(fchild) or mismatch
3190     else:
3191       return False
3192
3193   def Exec(self, feedback_fn):
3194     """Verify the size of cluster disks.
3195
3196     """
3197     # TODO: check child disks too
3198     # TODO: check differences in size between primary/secondary nodes
3199     per_node_disks = {}
3200     for instance in self.wanted_instances:
3201       pnode = instance.primary_node
3202       if pnode not in per_node_disks:
3203         per_node_disks[pnode] = []
3204       for idx, disk in enumerate(instance.disks):
3205         per_node_disks[pnode].append((instance, idx, disk))
3206
3207     changed = []
3208     for node, dskl in per_node_disks.items():
3209       newl = [v[2].Copy() for v in dskl]
3210       for dsk in newl:
3211         self.cfg.SetDiskID(dsk, node)
3212       result = self.rpc.call_blockdev_getsize(node, newl)
3213       if result.fail_msg:
3214         self.LogWarning("Failure in blockdev_getsize call to node"
3215                         " %s, ignoring", node)
3216         continue
3217       if len(result.payload) != len(dskl):
3218         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3219                         " result.payload=%s", node, len(dskl), result.payload)
3220         self.LogWarning("Invalid result from node %s, ignoring node results",
3221                         node)
3222         continue
3223       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3224         if size is None:
3225           self.LogWarning("Disk %d of instance %s did not return size"
3226                           " information, ignoring", idx, instance.name)
3227           continue
3228         if not isinstance(size, (int, long)):
3229           self.LogWarning("Disk %d of instance %s did not return valid"
3230                           " size information, ignoring", idx, instance.name)
3231           continue
3232         size = size >> 20
3233         if size != disk.size:
3234           self.LogInfo("Disk %d of instance %s has mismatched size,"
3235                        " correcting: recorded %d, actual %d", idx,
3236                        instance.name, disk.size, size)
3237           disk.size = size
3238           self.cfg.Update(instance, feedback_fn)
3239           changed.append((instance.name, idx, size))
3240         if self._EnsureChildSizes(disk):
3241           self.cfg.Update(instance, feedback_fn)
3242           changed.append((instance.name, idx, disk.size))
3243     return changed
3244
3245
3246 class LUClusterRename(LogicalUnit):
3247   """Rename the cluster.
3248
3249   """
3250   HPATH = "cluster-rename"
3251   HTYPE = constants.HTYPE_CLUSTER
3252
3253   def BuildHooksEnv(self):
3254     """Build hooks env.
3255
3256     """
3257     return {
3258       "OP_TARGET": self.cfg.GetClusterName(),
3259       "NEW_NAME": self.op.name,
3260       }
3261
3262   def BuildHooksNodes(self):
3263     """Build hooks nodes.
3264
3265     """
3266     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3267
3268   def CheckPrereq(self):
3269     """Verify that the passed name is a valid one.
3270
3271     """
3272     hostname = netutils.GetHostname(name=self.op.name,
3273                                     family=self.cfg.GetPrimaryIPFamily())
3274
3275     new_name = hostname.name
3276     self.ip = new_ip = hostname.ip
3277     old_name = self.cfg.GetClusterName()
3278     old_ip = self.cfg.GetMasterIP()
3279     if new_name == old_name and new_ip == old_ip:
3280       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3281                                  " cluster has changed",
3282                                  errors.ECODE_INVAL)
3283     if new_ip != old_ip:
3284       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3285         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3286                                    " reachable on the network" %
3287                                    new_ip, errors.ECODE_NOTUNIQUE)
3288
3289     self.op.name = new_name
3290
3291   def Exec(self, feedback_fn):
3292     """Rename the cluster.
3293
3294     """
3295     clustername = self.op.name
3296     ip = self.ip
3297
3298     # shutdown the master IP
3299     master = self.cfg.GetMasterNode()
3300     result = self.rpc.call_node_stop_master(master, False)
3301     result.Raise("Could not disable the master role")
3302
3303     try:
3304       cluster = self.cfg.GetClusterInfo()
3305       cluster.cluster_name = clustername
3306       cluster.master_ip = ip
3307       self.cfg.Update(cluster, feedback_fn)
3308
3309       # update the known hosts file
3310       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3311       node_list = self.cfg.GetOnlineNodeList()
3312       try:
3313         node_list.remove(master)
3314       except ValueError:
3315         pass
3316       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3317     finally:
3318       result = self.rpc.call_node_start_master(master, False, False)
3319       msg = result.fail_msg
3320       if msg:
3321         self.LogWarning("Could not re-enable the master role on"
3322                         " the master, please restart manually: %s", msg)
3323
3324     return clustername
3325
3326
3327 class LUClusterSetParams(LogicalUnit):
3328   """Change the parameters of the cluster.
3329
3330   """
3331   HPATH = "cluster-modify"
3332   HTYPE = constants.HTYPE_CLUSTER
3333   REQ_BGL = False
3334
3335   def CheckArguments(self):
3336     """Check parameters
3337
3338     """
3339     if self.op.uid_pool:
3340       uidpool.CheckUidPool(self.op.uid_pool)
3341
3342     if self.op.add_uids:
3343       uidpool.CheckUidPool(self.op.add_uids)
3344
3345     if self.op.remove_uids:
3346       uidpool.CheckUidPool(self.op.remove_uids)
3347
3348   def ExpandNames(self):
3349     # FIXME: in the future maybe other cluster params won't require checking on
3350     # all nodes to be modified.
3351     self.needed_locks = {
3352       locking.LEVEL_NODE: locking.ALL_SET,
3353     }
3354     self.share_locks[locking.LEVEL_NODE] = 1
3355
3356   def BuildHooksEnv(self):
3357     """Build hooks env.
3358
3359     """
3360     return {
3361       "OP_TARGET": self.cfg.GetClusterName(),
3362       "NEW_VG_NAME": self.op.vg_name,
3363       }
3364
3365   def BuildHooksNodes(self):
3366     """Build hooks nodes.
3367
3368     """
3369     mn = self.cfg.GetMasterNode()
3370     return ([mn], [mn])
3371
3372   def CheckPrereq(self):
3373     """Check prerequisites.
3374
3375     This checks whether the given params don't conflict and
3376     if the given volume group is valid.
3377
3378     """
3379     if self.op.vg_name is not None and not self.op.vg_name:
3380       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3381         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3382                                    " instances exist", errors.ECODE_INVAL)
3383
3384     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3385       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3386         raise errors.OpPrereqError("Cannot disable drbd helper while"
3387                                    " drbd-based instances exist",
3388                                    errors.ECODE_INVAL)
3389
3390     node_list = self.owned_locks(locking.LEVEL_NODE)
3391
3392     # if vg_name not None, checks given volume group on all nodes
3393     if self.op.vg_name:
3394       vglist = self.rpc.call_vg_list(node_list)
3395       for node in node_list:
3396         msg = vglist[node].fail_msg
3397         if msg:
3398           # ignoring down node
3399           self.LogWarning("Error while gathering data on node %s"
3400                           " (ignoring node): %s", node, msg)
3401           continue
3402         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3403                                               self.op.vg_name,
3404                                               constants.MIN_VG_SIZE)
3405         if vgstatus:
3406           raise errors.OpPrereqError("Error on node '%s': %s" %
3407                                      (node, vgstatus), errors.ECODE_ENVIRON)
3408
3409     if self.op.drbd_helper:
3410       # checks given drbd helper on all nodes
3411       helpers = self.rpc.call_drbd_helper(node_list)
3412       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3413         if ninfo.offline:
3414           self.LogInfo("Not checking drbd helper on offline node %s", node)
3415           continue
3416         msg = helpers[node].fail_msg
3417         if msg:
3418           raise errors.OpPrereqError("Error checking drbd helper on node"
3419                                      " '%s': %s" % (node, msg),
3420                                      errors.ECODE_ENVIRON)
3421         node_helper = helpers[node].payload
3422         if node_helper != self.op.drbd_helper:
3423           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3424                                      (node, node_helper), errors.ECODE_ENVIRON)
3425
3426     self.cluster = cluster = self.cfg.GetClusterInfo()
3427     # validate params changes
3428     if self.op.beparams:
3429       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3430       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3431
3432     if self.op.ndparams:
3433       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3434       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3435
3436       # TODO: we need a more general way to handle resetting
3437       # cluster-level parameters to default values
3438       if self.new_ndparams["oob_program"] == "":
3439         self.new_ndparams["oob_program"] = \
3440             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3441
3442     if self.op.nicparams:
3443       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3444       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3445       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3446       nic_errors = []
3447
3448       # check all instances for consistency
3449       for instance in self.cfg.GetAllInstancesInfo().values():
3450         for nic_idx, nic in enumerate(instance.nics):
3451           params_copy = copy.deepcopy(nic.nicparams)
3452           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3453
3454           # check parameter syntax
3455           try:
3456             objects.NIC.CheckParameterSyntax(params_filled)
3457           except errors.ConfigurationError, err:
3458             nic_errors.append("Instance %s, nic/%d: %s" %
3459                               (instance.name, nic_idx, err))
3460
3461           # if we're moving instances to routed, check that they have an ip
3462           target_mode = params_filled[constants.NIC_MODE]
3463           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3464             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3465                               " address" % (instance.name, nic_idx))
3466       if nic_errors:
3467         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3468                                    "\n".join(nic_errors))
3469
3470     # hypervisor list/parameters
3471     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3472     if self.op.hvparams:
3473       for hv_name, hv_dict in self.op.hvparams.items():
3474         if hv_name not in self.new_hvparams:
3475           self.new_hvparams[hv_name] = hv_dict
3476         else:
3477           self.new_hvparams[hv_name].update(hv_dict)
3478
3479     # os hypervisor parameters
3480     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3481     if self.op.os_hvp:
3482       for os_name, hvs in self.op.os_hvp.items():
3483         if os_name not in self.new_os_hvp:
3484           self.new_os_hvp[os_name] = hvs
3485         else:
3486           for hv_name, hv_dict in hvs.items():
3487             if hv_name not in self.new_os_hvp[os_name]:
3488               self.new_os_hvp[os_name][hv_name] = hv_dict
3489             else:
3490               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3491
3492     # os parameters
3493     self.new_osp = objects.FillDict(cluster.osparams, {})
3494     if self.op.osparams:
3495       for os_name, osp in self.op.osparams.items():
3496         if os_name not in self.new_osp:
3497           self.new_osp[os_name] = {}
3498
3499         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3500                                                   use_none=True)
3501
3502         if not self.new_osp[os_name]:
3503           # we removed all parameters
3504           del self.new_osp[os_name]
3505         else:
3506           # check the parameter validity (remote check)
3507           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3508                          os_name, self.new_osp[os_name])
3509
3510     # changes to the hypervisor list
3511     if self.op.enabled_hypervisors is not None:
3512       self.hv_list = self.op.enabled_hypervisors
3513       for hv in self.hv_list:
3514         # if the hypervisor doesn't already exist in the cluster
3515         # hvparams, we initialize it to empty, and then (in both
3516         # cases) we make sure to fill the defaults, as we might not
3517         # have a complete defaults list if the hypervisor wasn't
3518         # enabled before
3519         if hv not in new_hvp:
3520           new_hvp[hv] = {}
3521         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3522         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3523     else:
3524       self.hv_list = cluster.enabled_hypervisors
3525
3526     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3527       # either the enabled list has changed, or the parameters have, validate
3528       for hv_name, hv_params in self.new_hvparams.items():
3529         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3530             (self.op.enabled_hypervisors and
3531              hv_name in self.op.enabled_hypervisors)):
3532           # either this is a new hypervisor, or its parameters have changed
3533           hv_class = hypervisor.GetHypervisor(hv_name)
3534           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3535           hv_class.CheckParameterSyntax(hv_params)
3536           _CheckHVParams(self, node_list, hv_name, hv_params)
3537
3538     if self.op.os_hvp:
3539       # no need to check any newly-enabled hypervisors, since the
3540       # defaults have already been checked in the above code-block
3541       for os_name, os_hvp in self.new_os_hvp.items():
3542         for hv_name, hv_params in os_hvp.items():
3543           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3544           # we need to fill in the new os_hvp on top of the actual hv_p
3545           cluster_defaults = self.new_hvparams.get(hv_name, {})
3546           new_osp = objects.FillDict(cluster_defaults, hv_params)
3547           hv_class = hypervisor.GetHypervisor(hv_name)
3548           hv_class.CheckParameterSyntax(new_osp)
3549           _CheckHVParams(self, node_list, hv_name, new_osp)
3550
3551     if self.op.default_iallocator:
3552       alloc_script = utils.FindFile(self.op.default_iallocator,
3553                                     constants.IALLOCATOR_SEARCH_PATH,
3554                                     os.path.isfile)
3555       if alloc_script is None:
3556         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3557                                    " specified" % self.op.default_iallocator,
3558                                    errors.ECODE_INVAL)
3559
3560   def Exec(self, feedback_fn):
3561     """Change the parameters of the cluster.
3562
3563     """
3564     if self.op.vg_name is not None:
3565       new_volume = self.op.vg_name
3566       if not new_volume:
3567         new_volume = None
3568       if new_volume != self.cfg.GetVGName():
3569         self.cfg.SetVGName(new_volume)
3570       else:
3571         feedback_fn("Cluster LVM configuration already in desired"
3572                     " state, not changing")
3573     if self.op.drbd_helper is not None:
3574       new_helper = self.op.drbd_helper
3575       if not new_helper:
3576         new_helper = None
3577       if new_helper != self.cfg.GetDRBDHelper():
3578         self.cfg.SetDRBDHelper(new_helper)
3579       else:
3580         feedback_fn("Cluster DRBD helper already in desired state,"
3581                     " not changing")
3582     if self.op.hvparams:
3583       self.cluster.hvparams = self.new_hvparams
3584     if self.op.os_hvp:
3585       self.cluster.os_hvp = self.new_os_hvp
3586     if self.op.enabled_hypervisors is not None:
3587       self.cluster.hvparams = self.new_hvparams
3588       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3589     if self.op.beparams:
3590       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3591     if self.op.nicparams:
3592       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3593     if self.op.osparams:
3594       self.cluster.osparams = self.new_osp
3595     if self.op.ndparams:
3596       self.cluster.ndparams = self.new_ndparams
3597
3598     if self.op.candidate_pool_size is not None:
3599       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3600       # we need to update the pool size here, otherwise the save will fail
3601       _AdjustCandidatePool(self, [])
3602
3603     if self.op.maintain_node_health is not None:
3604       self.cluster.maintain_node_health = self.op.maintain_node_health
3605
3606     if self.op.prealloc_wipe_disks is not None:
3607       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3608
3609     if self.op.add_uids is not None:
3610       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3611
3612     if self.op.remove_uids is not None:
3613       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3614
3615     if self.op.uid_pool is not None:
3616       self.cluster.uid_pool = self.op.uid_pool
3617
3618     if self.op.default_iallocator is not None:
3619       self.cluster.default_iallocator = self.op.default_iallocator
3620
3621     if self.op.reserved_lvs is not None:
3622       self.cluster.reserved_lvs = self.op.reserved_lvs
3623
3624     def helper_os(aname, mods, desc):
3625       desc += " OS list"
3626       lst = getattr(self.cluster, aname)
3627       for key, val in mods:
3628         if key == constants.DDM_ADD:
3629           if val in lst:
3630             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3631           else:
3632             lst.append(val)
3633         elif key == constants.DDM_REMOVE:
3634           if val in lst:
3635             lst.remove(val)
3636           else:
3637             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3638         else:
3639           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3640
3641     if self.op.hidden_os:
3642       helper_os("hidden_os", self.op.hidden_os, "hidden")
3643
3644     if self.op.blacklisted_os:
3645       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3646
3647     if self.op.master_netdev:
3648       master = self.cfg.GetMasterNode()
3649       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3650                   self.cluster.master_netdev)
3651       result = self.rpc.call_node_stop_master(master, False)
3652       result.Raise("Could not disable the master ip")
3653       feedback_fn("Changing master_netdev from %s to %s" %
3654                   (self.cluster.master_netdev, self.op.master_netdev))
3655       self.cluster.master_netdev = self.op.master_netdev
3656
3657     self.cfg.Update(self.cluster, feedback_fn)
3658
3659     if self.op.master_netdev:
3660       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3661                   self.op.master_netdev)
3662       result = self.rpc.call_node_start_master(master, False, False)
3663       if result.fail_msg:
3664         self.LogWarning("Could not re-enable the master ip on"
3665                         " the master, please restart manually: %s",
3666                         result.fail_msg)
3667
3668
3669 def _UploadHelper(lu, nodes, fname):
3670   """Helper for uploading a file and showing warnings.
3671
3672   """
3673   if os.path.exists(fname):
3674     result = lu.rpc.call_upload_file(nodes, fname)
3675     for to_node, to_result in result.items():
3676       msg = to_result.fail_msg
3677       if msg:
3678         msg = ("Copy of file %s to node %s failed: %s" %
3679                (fname, to_node, msg))
3680         lu.proc.LogWarning(msg)
3681
3682
3683 def _ComputeAncillaryFiles(cluster, redist):
3684   """Compute files external to Ganeti which need to be consistent.
3685
3686   @type redist: boolean
3687   @param redist: Whether to include files which need to be redistributed
3688
3689   """
3690   # Compute files for all nodes
3691   files_all = set([
3692     constants.SSH_KNOWN_HOSTS_FILE,
3693     constants.CONFD_HMAC_KEY,
3694     constants.CLUSTER_DOMAIN_SECRET_FILE,
3695     ])
3696
3697   if not redist:
3698     files_all.update(constants.ALL_CERT_FILES)
3699     files_all.update(ssconf.SimpleStore().GetFileList())
3700
3701   if cluster.modify_etc_hosts:
3702     files_all.add(constants.ETC_HOSTS)
3703
3704   # Files which must either exist on all nodes or on none
3705   files_all_opt = set([
3706     constants.RAPI_USERS_FILE,
3707     ])
3708
3709   # Files which should only be on master candidates
3710   files_mc = set()
3711   if not redist:
3712     files_mc.add(constants.CLUSTER_CONF_FILE)
3713
3714   # Files which should only be on VM-capable nodes
3715   files_vm = set(filename
3716     for hv_name in cluster.enabled_hypervisors
3717     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3718
3719   # Filenames must be unique
3720   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3721           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3722          "Found file listed in more than one file list"
3723
3724   return (files_all, files_all_opt, files_mc, files_vm)
3725
3726
3727 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3728   """Distribute additional files which are part of the cluster configuration.
3729
3730   ConfigWriter takes care of distributing the config and ssconf files, but
3731   there are more files which should be distributed to all nodes. This function
3732   makes sure those are copied.
3733
3734   @param lu: calling logical unit
3735   @param additional_nodes: list of nodes not in the config to distribute to
3736   @type additional_vm: boolean
3737   @param additional_vm: whether the additional nodes are vm-capable or not
3738
3739   """
3740   # Gather target nodes
3741   cluster = lu.cfg.GetClusterInfo()
3742   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3743
3744   online_nodes = lu.cfg.GetOnlineNodeList()
3745   vm_nodes = lu.cfg.GetVmCapableNodeList()
3746
3747   if additional_nodes is not None:
3748     online_nodes.extend(additional_nodes)
3749     if additional_vm:
3750       vm_nodes.extend(additional_nodes)
3751
3752   # Never distribute to master node
3753   for nodelist in [online_nodes, vm_nodes]:
3754     if master_info.name in nodelist:
3755       nodelist.remove(master_info.name)
3756
3757   # Gather file lists
3758   (files_all, files_all_opt, files_mc, files_vm) = \
3759     _ComputeAncillaryFiles(cluster, True)
3760
3761   # Never re-distribute configuration file from here
3762   assert not (constants.CLUSTER_CONF_FILE in files_all or
3763               constants.CLUSTER_CONF_FILE in files_vm)
3764   assert not files_mc, "Master candidates not handled in this function"
3765
3766   filemap = [
3767     (online_nodes, files_all),
3768     (online_nodes, files_all_opt),
3769     (vm_nodes, files_vm),
3770     ]
3771
3772   # Upload the files
3773   for (node_list, files) in filemap:
3774     for fname in files:
3775       _UploadHelper(lu, node_list, fname)
3776
3777
3778 class LUClusterRedistConf(NoHooksLU):
3779   """Force the redistribution of cluster configuration.
3780
3781   This is a very simple LU.
3782
3783   """
3784   REQ_BGL = False
3785
3786   def ExpandNames(self):
3787     self.needed_locks = {
3788       locking.LEVEL_NODE: locking.ALL_SET,
3789     }
3790     self.share_locks[locking.LEVEL_NODE] = 1
3791
3792   def Exec(self, feedback_fn):
3793     """Redistribute the configuration.
3794
3795     """
3796     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3797     _RedistributeAncillaryFiles(self)
3798
3799
3800 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3801   """Sleep and poll for an instance's disk to sync.
3802
3803   """
3804   if not instance.disks or disks is not None and not disks:
3805     return True
3806
3807   disks = _ExpandCheckDisks(instance, disks)
3808
3809   if not oneshot:
3810     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3811
3812   node = instance.primary_node
3813
3814   for dev in disks:
3815     lu.cfg.SetDiskID(dev, node)
3816
3817   # TODO: Convert to utils.Retry
3818
3819   retries = 0
3820   degr_retries = 10 # in seconds, as we sleep 1 second each time
3821   while True:
3822     max_time = 0
3823     done = True
3824     cumul_degraded = False
3825     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3826     msg = rstats.fail_msg
3827     if msg:
3828       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3829       retries += 1
3830       if retries >= 10:
3831         raise errors.RemoteError("Can't contact node %s for mirror data,"
3832                                  " aborting." % node)
3833       time.sleep(6)
3834       continue
3835     rstats = rstats.payload
3836     retries = 0
3837     for i, mstat in enumerate(rstats):
3838       if mstat is None:
3839         lu.LogWarning("Can't compute data for node %s/%s",
3840                            node, disks[i].iv_name)
3841         continue
3842
3843       cumul_degraded = (cumul_degraded or
3844                         (mstat.is_degraded and mstat.sync_percent is None))
3845       if mstat.sync_percent is not None:
3846         done = False
3847         if mstat.estimated_time is not None:
3848           rem_time = ("%s remaining (estimated)" %
3849                       utils.FormatSeconds(mstat.estimated_time))
3850           max_time = mstat.estimated_time
3851         else:
3852           rem_time = "no time estimate"
3853         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3854                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3855
3856     # if we're done but degraded, let's do a few small retries, to
3857     # make sure we see a stable and not transient situation; therefore
3858     # we force restart of the loop
3859     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3860       logging.info("Degraded disks found, %d retries left", degr_retries)
3861       degr_retries -= 1
3862       time.sleep(1)
3863       continue
3864
3865     if done or oneshot:
3866       break
3867
3868     time.sleep(min(60, max_time))
3869
3870   if done:
3871     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3872   return not cumul_degraded
3873
3874
3875 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3876   """Check that mirrors are not degraded.
3877
3878   The ldisk parameter, if True, will change the test from the
3879   is_degraded attribute (which represents overall non-ok status for
3880   the device(s)) to the ldisk (representing the local storage status).
3881
3882   """
3883   lu.cfg.SetDiskID(dev, node)
3884
3885   result = True
3886
3887   if on_primary or dev.AssembleOnSecondary():
3888     rstats = lu.rpc.call_blockdev_find(node, dev)
3889     msg = rstats.fail_msg
3890     if msg:
3891       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3892       result = False
3893     elif not rstats.payload:
3894       lu.LogWarning("Can't find disk on node %s", node)
3895       result = False
3896     else:
3897       if ldisk:
3898         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3899       else:
3900         result = result and not rstats.payload.is_degraded
3901
3902   if dev.children:
3903     for child in dev.children:
3904       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3905
3906   return result
3907
3908
3909 class LUOobCommand(NoHooksLU):
3910   """Logical unit for OOB handling.
3911
3912   """
3913   REG_BGL = False
3914   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3915
3916   def ExpandNames(self):
3917     """Gather locks we need.
3918
3919     """
3920     if self.op.node_names:
3921       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3922       lock_names = self.op.node_names
3923     else:
3924       lock_names = locking.ALL_SET
3925
3926     self.needed_locks = {
3927       locking.LEVEL_NODE: lock_names,
3928       }
3929
3930   def CheckPrereq(self):
3931     """Check prerequisites.
3932
3933     This checks:
3934      - the node exists in the configuration
3935      - OOB is supported
3936
3937     Any errors are signaled by raising errors.OpPrereqError.
3938
3939     """
3940     self.nodes = []
3941     self.master_node = self.cfg.GetMasterNode()
3942
3943     assert self.op.power_delay >= 0.0
3944
3945     if self.op.node_names:
3946       if (self.op.command in self._SKIP_MASTER and
3947           self.master_node in self.op.node_names):
3948         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3949         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3950
3951         if master_oob_handler:
3952           additional_text = ("run '%s %s %s' if you want to operate on the"
3953                              " master regardless") % (master_oob_handler,
3954                                                       self.op.command,
3955                                                       self.master_node)
3956         else:
3957           additional_text = "it does not support out-of-band operations"
3958
3959         raise errors.OpPrereqError(("Operating on the master node %s is not"
3960                                     " allowed for %s; %s") %
3961                                    (self.master_node, self.op.command,
3962                                     additional_text), errors.ECODE_INVAL)
3963     else:
3964       self.op.node_names = self.cfg.GetNodeList()
3965       if self.op.command in self._SKIP_MASTER:
3966         self.op.node_names.remove(self.master_node)
3967
3968     if self.op.command in self._SKIP_MASTER:
3969       assert self.master_node not in self.op.node_names
3970
3971     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3972       if node is None:
3973         raise errors.OpPrereqError("Node %s not found" % node_name,
3974                                    errors.ECODE_NOENT)
3975       else:
3976         self.nodes.append(node)
3977
3978       if (not self.op.ignore_status and
3979           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3980         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3981                                     " not marked offline") % node_name,
3982                                    errors.ECODE_STATE)
3983
3984   def Exec(self, feedback_fn):
3985     """Execute OOB and return result if we expect any.
3986
3987     """
3988     master_node = self.master_node
3989     ret = []
3990
3991     for idx, node in enumerate(utils.NiceSort(self.nodes,
3992                                               key=lambda node: node.name)):
3993       node_entry = [(constants.RS_NORMAL, node.name)]
3994       ret.append(node_entry)
3995
3996       oob_program = _SupportsOob(self.cfg, node)
3997
3998       if not oob_program:
3999         node_entry.append((constants.RS_UNAVAIL, None))
4000         continue
4001
4002       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4003                    self.op.command, oob_program, node.name)
4004       result = self.rpc.call_run_oob(master_node, oob_program,
4005                                      self.op.command, node.name,
4006                                      self.op.timeout)
4007
4008       if result.fail_msg:
4009         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4010                         node.name, result.fail_msg)
4011         node_entry.append((constants.RS_NODATA, None))
4012       else:
4013         try:
4014           self._CheckPayload(result)
4015         except errors.OpExecError, err:
4016           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4017                           node.name, err)
4018           node_entry.append((constants.RS_NODATA, None))
4019         else:
4020           if self.op.command == constants.OOB_HEALTH:
4021             # For health we should log important events
4022             for item, status in result.payload:
4023               if status in [constants.OOB_STATUS_WARNING,
4024                             constants.OOB_STATUS_CRITICAL]:
4025                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4026                                 item, node.name, status)
4027
4028           if self.op.command == constants.OOB_POWER_ON:
4029             node.powered = True
4030           elif self.op.command == constants.OOB_POWER_OFF:
4031             node.powered = False
4032           elif self.op.command == constants.OOB_POWER_STATUS:
4033             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4034             if powered != node.powered:
4035               logging.warning(("Recorded power state (%s) of node '%s' does not"
4036                                " match actual power state (%s)"), node.powered,
4037                               node.name, powered)
4038
4039           # For configuration changing commands we should update the node
4040           if self.op.command in (constants.OOB_POWER_ON,
4041                                  constants.OOB_POWER_OFF):
4042             self.cfg.Update(node, feedback_fn)
4043
4044           node_entry.append((constants.RS_NORMAL, result.payload))
4045
4046           if (self.op.command == constants.OOB_POWER_ON and
4047               idx < len(self.nodes) - 1):
4048             time.sleep(self.op.power_delay)
4049
4050     return ret
4051
4052   def _CheckPayload(self, result):
4053     """Checks if the payload is valid.
4054
4055     @param result: RPC result
4056     @raises errors.OpExecError: If payload is not valid
4057
4058     """
4059     errs = []
4060     if self.op.command == constants.OOB_HEALTH:
4061       if not isinstance(result.payload, list):
4062         errs.append("command 'health' is expected to return a list but got %s" %
4063                     type(result.payload))
4064       else:
4065         for item, status in result.payload:
4066           if status not in constants.OOB_STATUSES:
4067             errs.append("health item '%s' has invalid status '%s'" %
4068                         (item, status))
4069
4070     if self.op.command == constants.OOB_POWER_STATUS:
4071       if not isinstance(result.payload, dict):
4072         errs.append("power-status is expected to return a dict but got %s" %
4073                     type(result.payload))
4074
4075     if self.op.command in [
4076         constants.OOB_POWER_ON,
4077         constants.OOB_POWER_OFF,
4078         constants.OOB_POWER_CYCLE,
4079         ]:
4080       if result.payload is not None:
4081         errs.append("%s is expected to not return payload but got '%s'" %
4082                     (self.op.command, result.payload))
4083
4084     if errs:
4085       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4086                                utils.CommaJoin(errs))
4087
4088 class _OsQuery(_QueryBase):
4089   FIELDS = query.OS_FIELDS
4090
4091   def ExpandNames(self, lu):
4092     # Lock all nodes in shared mode
4093     # Temporary removal of locks, should be reverted later
4094     # TODO: reintroduce locks when they are lighter-weight
4095     lu.needed_locks = {}
4096     #self.share_locks[locking.LEVEL_NODE] = 1
4097     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4098
4099     # The following variables interact with _QueryBase._GetNames
4100     if self.names:
4101       self.wanted = self.names
4102     else:
4103       self.wanted = locking.ALL_SET
4104
4105     self.do_locking = self.use_locking
4106
4107   def DeclareLocks(self, lu, level):
4108     pass
4109
4110   @staticmethod
4111   def _DiagnoseByOS(rlist):
4112     """Remaps a per-node return list into an a per-os per-node dictionary
4113
4114     @param rlist: a map with node names as keys and OS objects as values
4115
4116     @rtype: dict
4117     @return: a dictionary with osnames as keys and as value another
4118         map, with nodes as keys and tuples of (path, status, diagnose,
4119         variants, parameters, api_versions) as values, eg::
4120
4121           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4122                                      (/srv/..., False, "invalid api")],
4123                            "node2": [(/srv/..., True, "", [], [])]}
4124           }
4125
4126     """
4127     all_os = {}
4128     # we build here the list of nodes that didn't fail the RPC (at RPC
4129     # level), so that nodes with a non-responding node daemon don't
4130     # make all OSes invalid
4131     good_nodes = [node_name for node_name in rlist
4132                   if not rlist[node_name].fail_msg]
4133     for node_name, nr in rlist.items():
4134       if nr.fail_msg or not nr.payload:
4135         continue
4136       for (name, path, status, diagnose, variants,
4137            params, api_versions) in nr.payload:
4138         if name not in all_os:
4139           # build a list of nodes for this os containing empty lists
4140           # for each node in node_list
4141           all_os[name] = {}
4142           for nname in good_nodes:
4143             all_os[name][nname] = []
4144         # convert params from [name, help] to (name, help)
4145         params = [tuple(v) for v in params]
4146         all_os[name][node_name].append((path, status, diagnose,
4147                                         variants, params, api_versions))
4148     return all_os
4149
4150   def _GetQueryData(self, lu):
4151     """Computes the list of nodes and their attributes.
4152
4153     """
4154     # Locking is not used
4155     assert not (compat.any(lu.glm.is_owned(level)
4156                            for level in locking.LEVELS
4157                            if level != locking.LEVEL_CLUSTER) or
4158                 self.do_locking or self.use_locking)
4159
4160     valid_nodes = [node.name
4161                    for node in lu.cfg.GetAllNodesInfo().values()
4162                    if not node.offline and node.vm_capable]
4163     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4164     cluster = lu.cfg.GetClusterInfo()
4165
4166     data = {}
4167
4168     for (os_name, os_data) in pol.items():
4169       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4170                           hidden=(os_name in cluster.hidden_os),
4171                           blacklisted=(os_name in cluster.blacklisted_os))
4172
4173       variants = set()
4174       parameters = set()
4175       api_versions = set()
4176
4177       for idx, osl in enumerate(os_data.values()):
4178         info.valid = bool(info.valid and osl and osl[0][1])
4179         if not info.valid:
4180           break
4181
4182         (node_variants, node_params, node_api) = osl[0][3:6]
4183         if idx == 0:
4184           # First entry
4185           variants.update(node_variants)
4186           parameters.update(node_params)
4187           api_versions.update(node_api)
4188         else:
4189           # Filter out inconsistent values
4190           variants.intersection_update(node_variants)
4191           parameters.intersection_update(node_params)
4192           api_versions.intersection_update(node_api)
4193
4194       info.variants = list(variants)
4195       info.parameters = list(parameters)
4196       info.api_versions = list(api_versions)
4197
4198       data[os_name] = info
4199
4200     # Prepare data in requested order
4201     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4202             if name in data]
4203
4204
4205 class LUOsDiagnose(NoHooksLU):
4206   """Logical unit for OS diagnose/query.
4207
4208   """
4209   REQ_BGL = False
4210
4211   @staticmethod
4212   def _BuildFilter(fields, names):
4213     """Builds a filter for querying OSes.
4214
4215     """
4216     name_filter = qlang.MakeSimpleFilter("name", names)
4217
4218     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4219     # respective field is not requested
4220     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4221                      for fname in ["hidden", "blacklisted"]
4222                      if fname not in fields]
4223     if "valid" not in fields:
4224       status_filter.append([qlang.OP_TRUE, "valid"])
4225
4226     if status_filter:
4227       status_filter.insert(0, qlang.OP_AND)
4228     else:
4229       status_filter = None
4230
4231     if name_filter and status_filter:
4232       return [qlang.OP_AND, name_filter, status_filter]
4233     elif name_filter:
4234       return name_filter
4235     else:
4236       return status_filter
4237
4238   def CheckArguments(self):
4239     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4240                        self.op.output_fields, False)
4241
4242   def ExpandNames(self):
4243     self.oq.ExpandNames(self)
4244
4245   def Exec(self, feedback_fn):
4246     return self.oq.OldStyleQuery(self)
4247
4248
4249 class LUNodeRemove(LogicalUnit):
4250   """Logical unit for removing a node.
4251
4252   """
4253   HPATH = "node-remove"
4254   HTYPE = constants.HTYPE_NODE
4255
4256   def BuildHooksEnv(self):
4257     """Build hooks env.
4258
4259     This doesn't run on the target node in the pre phase as a failed
4260     node would then be impossible to remove.
4261
4262     """
4263     return {
4264       "OP_TARGET": self.op.node_name,
4265       "NODE_NAME": self.op.node_name,
4266       }
4267
4268   def BuildHooksNodes(self):
4269     """Build hooks nodes.
4270
4271     """
4272     all_nodes = self.cfg.GetNodeList()
4273     try:
4274       all_nodes.remove(self.op.node_name)
4275     except ValueError:
4276       logging.warning("Node '%s', which is about to be removed, was not found"
4277                       " in the list of all nodes", self.op.node_name)
4278     return (all_nodes, all_nodes)
4279
4280   def CheckPrereq(self):
4281     """Check prerequisites.
4282
4283     This checks:
4284      - the node exists in the configuration
4285      - it does not have primary or secondary instances
4286      - it's not the master
4287
4288     Any errors are signaled by raising errors.OpPrereqError.
4289
4290     """
4291     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4292     node = self.cfg.GetNodeInfo(self.op.node_name)
4293     assert node is not None
4294
4295     masternode = self.cfg.GetMasterNode()
4296     if node.name == masternode:
4297       raise errors.OpPrereqError("Node is the master node, failover to another"
4298                                  " node is required", errors.ECODE_INVAL)
4299
4300     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4301       if node.name in instance.all_nodes:
4302         raise errors.OpPrereqError("Instance %s is still running on the node,"
4303                                    " please remove first" % instance_name,
4304                                    errors.ECODE_INVAL)
4305     self.op.node_name = node.name
4306     self.node = node
4307
4308   def Exec(self, feedback_fn):
4309     """Removes the node from the cluster.
4310
4311     """
4312     node = self.node
4313     logging.info("Stopping the node daemon and removing configs from node %s",
4314                  node.name)
4315
4316     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4317
4318     # Promote nodes to master candidate as needed
4319     _AdjustCandidatePool(self, exceptions=[node.name])
4320     self.context.RemoveNode(node.name)
4321
4322     # Run post hooks on the node before it's removed
4323     _RunPostHook(self, node.name)
4324
4325     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4326     msg = result.fail_msg
4327     if msg:
4328       self.LogWarning("Errors encountered on the remote node while leaving"
4329                       " the cluster: %s", msg)
4330
4331     # Remove node from our /etc/hosts
4332     if self.cfg.GetClusterInfo().modify_etc_hosts:
4333       master_node = self.cfg.GetMasterNode()
4334       result = self.rpc.call_etc_hosts_modify(master_node,
4335                                               constants.ETC_HOSTS_REMOVE,
4336                                               node.name, None)
4337       result.Raise("Can't update hosts file with new host data")
4338       _RedistributeAncillaryFiles(self)
4339
4340
4341 class _NodeQuery(_QueryBase):
4342   FIELDS = query.NODE_FIELDS
4343
4344   def ExpandNames(self, lu):
4345     lu.needed_locks = {}
4346     lu.share_locks = _ShareAll()
4347
4348     if self.names:
4349       self.wanted = _GetWantedNodes(lu, self.names)
4350     else:
4351       self.wanted = locking.ALL_SET
4352
4353     self.do_locking = (self.use_locking and
4354                        query.NQ_LIVE in self.requested_data)
4355
4356     if self.do_locking:
4357       # If any non-static field is requested we need to lock the nodes
4358       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4359
4360   def DeclareLocks(self, lu, level):
4361     pass
4362
4363   def _GetQueryData(self, lu):
4364     """Computes the list of nodes and their attributes.
4365
4366     """
4367     all_info = lu.cfg.GetAllNodesInfo()
4368
4369     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4370
4371     # Gather data as requested
4372     if query.NQ_LIVE in self.requested_data:
4373       # filter out non-vm_capable nodes
4374       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4375
4376       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4377                                         lu.cfg.GetHypervisorType())
4378       live_data = dict((name, nresult.payload)
4379                        for (name, nresult) in node_data.items()
4380                        if not nresult.fail_msg and nresult.payload)
4381     else:
4382       live_data = None
4383
4384     if query.NQ_INST in self.requested_data:
4385       node_to_primary = dict([(name, set()) for name in nodenames])
4386       node_to_secondary = dict([(name, set()) for name in nodenames])
4387
4388       inst_data = lu.cfg.GetAllInstancesInfo()
4389
4390       for inst in inst_data.values():
4391         if inst.primary_node in node_to_primary:
4392           node_to_primary[inst.primary_node].add(inst.name)
4393         for secnode in inst.secondary_nodes:
4394           if secnode in node_to_secondary:
4395             node_to_secondary[secnode].add(inst.name)
4396     else:
4397       node_to_primary = None
4398       node_to_secondary = None
4399
4400     if query.NQ_OOB in self.requested_data:
4401       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4402                          for name, node in all_info.iteritems())
4403     else:
4404       oob_support = None
4405
4406     if query.NQ_GROUP in self.requested_data:
4407       groups = lu.cfg.GetAllNodeGroupsInfo()
4408     else:
4409       groups = {}
4410
4411     return query.NodeQueryData([all_info[name] for name in nodenames],
4412                                live_data, lu.cfg.GetMasterNode(),
4413                                node_to_primary, node_to_secondary, groups,
4414                                oob_support, lu.cfg.GetClusterInfo())
4415
4416
4417 class LUNodeQuery(NoHooksLU):
4418   """Logical unit for querying nodes.
4419
4420   """
4421   # pylint: disable-msg=W0142
4422   REQ_BGL = False
4423
4424   def CheckArguments(self):
4425     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4426                          self.op.output_fields, self.op.use_locking)
4427
4428   def ExpandNames(self):
4429     self.nq.ExpandNames(self)
4430
4431   def Exec(self, feedback_fn):
4432     return self.nq.OldStyleQuery(self)
4433
4434
4435 class LUNodeQueryvols(NoHooksLU):
4436   """Logical unit for getting volumes on node(s).
4437
4438   """
4439   REQ_BGL = False
4440   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4441   _FIELDS_STATIC = utils.FieldSet("node")
4442
4443   def CheckArguments(self):
4444     _CheckOutputFields(static=self._FIELDS_STATIC,
4445                        dynamic=self._FIELDS_DYNAMIC,
4446                        selected=self.op.output_fields)
4447
4448   def ExpandNames(self):
4449     self.needed_locks = {}
4450     self.share_locks[locking.LEVEL_NODE] = 1
4451     if not self.op.nodes:
4452       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4453     else:
4454       self.needed_locks[locking.LEVEL_NODE] = \
4455         _GetWantedNodes(self, self.op.nodes)
4456
4457   def Exec(self, feedback_fn):
4458     """Computes the list of nodes and their attributes.
4459
4460     """
4461     nodenames = self.owned_locks(locking.LEVEL_NODE)
4462     volumes = self.rpc.call_node_volumes(nodenames)
4463
4464     ilist = self.cfg.GetAllInstancesInfo()
4465     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4466
4467     output = []
4468     for node in nodenames:
4469       nresult = volumes[node]
4470       if nresult.offline:
4471         continue
4472       msg = nresult.fail_msg
4473       if msg:
4474         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4475         continue
4476
4477       node_vols = sorted(nresult.payload,
4478                          key=operator.itemgetter("dev"))
4479
4480       for vol in node_vols:
4481         node_output = []
4482         for field in self.op.output_fields:
4483           if field == "node":
4484             val = node
4485           elif field == "phys":
4486             val = vol["dev"]
4487           elif field == "vg":
4488             val = vol["vg"]
4489           elif field == "name":
4490             val = vol["name"]
4491           elif field == "size":
4492             val = int(float(vol["size"]))
4493           elif field == "instance":
4494             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4495           else:
4496             raise errors.ParameterError(field)
4497           node_output.append(str(val))
4498
4499         output.append(node_output)
4500
4501     return output
4502
4503
4504 class LUNodeQueryStorage(NoHooksLU):
4505   """Logical unit for getting information on storage units on node(s).
4506
4507   """
4508   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4509   REQ_BGL = False
4510
4511   def CheckArguments(self):
4512     _CheckOutputFields(static=self._FIELDS_STATIC,
4513                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4514                        selected=self.op.output_fields)
4515
4516   def ExpandNames(self):
4517     self.needed_locks = {}
4518     self.share_locks[locking.LEVEL_NODE] = 1
4519
4520     if self.op.nodes:
4521       self.needed_locks[locking.LEVEL_NODE] = \
4522         _GetWantedNodes(self, self.op.nodes)
4523     else:
4524       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4525
4526   def Exec(self, feedback_fn):
4527     """Computes the list of nodes and their attributes.
4528
4529     """
4530     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4531
4532     # Always get name to sort by
4533     if constants.SF_NAME in self.op.output_fields:
4534       fields = self.op.output_fields[:]
4535     else:
4536       fields = [constants.SF_NAME] + self.op.output_fields
4537
4538     # Never ask for node or type as it's only known to the LU
4539     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4540       while extra in fields:
4541         fields.remove(extra)
4542
4543     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4544     name_idx = field_idx[constants.SF_NAME]
4545
4546     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4547     data = self.rpc.call_storage_list(self.nodes,
4548                                       self.op.storage_type, st_args,
4549                                       self.op.name, fields)
4550
4551     result = []
4552
4553     for node in utils.NiceSort(self.nodes):
4554       nresult = data[node]
4555       if nresult.offline:
4556         continue
4557
4558       msg = nresult.fail_msg
4559       if msg:
4560         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4561         continue
4562
4563       rows = dict([(row[name_idx], row) for row in nresult.payload])
4564
4565       for name in utils.NiceSort(rows.keys()):
4566         row = rows[name]
4567
4568         out = []
4569
4570         for field in self.op.output_fields:
4571           if field == constants.SF_NODE:
4572             val = node
4573           elif field == constants.SF_TYPE:
4574             val = self.op.storage_type
4575           elif field in field_idx:
4576             val = row[field_idx[field]]
4577           else:
4578             raise errors.ParameterError(field)
4579
4580           out.append(val)
4581
4582         result.append(out)
4583
4584     return result
4585
4586
4587 class _InstanceQuery(_QueryBase):
4588   FIELDS = query.INSTANCE_FIELDS
4589
4590   def ExpandNames(self, lu):
4591     lu.needed_locks = {}
4592     lu.share_locks = _ShareAll()
4593
4594     if self.names:
4595       self.wanted = _GetWantedInstances(lu, self.names)
4596     else:
4597       self.wanted = locking.ALL_SET
4598
4599     self.do_locking = (self.use_locking and
4600                        query.IQ_LIVE in self.requested_data)
4601     if self.do_locking:
4602       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4603       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4604       lu.needed_locks[locking.LEVEL_NODE] = []
4605       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4606
4607     self.do_grouplocks = (self.do_locking and
4608                           query.IQ_NODES in self.requested_data)
4609
4610   def DeclareLocks(self, lu, level):
4611     if self.do_locking:
4612       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4613         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4614
4615         # Lock all groups used by instances optimistically; this requires going
4616         # via the node before it's locked, requiring verification later on
4617         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4618           set(group_uuid
4619               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4620               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4621       elif level == locking.LEVEL_NODE:
4622         lu._LockInstancesNodes() # pylint: disable-msg=W0212
4623
4624   @staticmethod
4625   def _CheckGroupLocks(lu):
4626     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4627     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4628
4629     # Check if node groups for locked instances are still correct
4630     for instance_name in owned_instances:
4631       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4632
4633   def _GetQueryData(self, lu):
4634     """Computes the list of instances and their attributes.
4635
4636     """
4637     if self.do_grouplocks:
4638       self._CheckGroupLocks(lu)
4639
4640     cluster = lu.cfg.GetClusterInfo()
4641     all_info = lu.cfg.GetAllInstancesInfo()
4642
4643     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4644
4645     instance_list = [all_info[name] for name in instance_names]
4646     nodes = frozenset(itertools.chain(*(inst.all_nodes
4647                                         for inst in instance_list)))
4648     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4649     bad_nodes = []
4650     offline_nodes = []
4651     wrongnode_inst = set()
4652
4653     # Gather data as requested
4654     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4655       live_data = {}
4656       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4657       for name in nodes:
4658         result = node_data[name]
4659         if result.offline:
4660           # offline nodes will be in both lists
4661           assert result.fail_msg
4662           offline_nodes.append(name)
4663         if result.fail_msg:
4664           bad_nodes.append(name)
4665         elif result.payload:
4666           for inst in result.payload:
4667             if inst in all_info:
4668               if all_info[inst].primary_node == name:
4669                 live_data.update(result.payload)
4670               else:
4671                 wrongnode_inst.add(inst)
4672             else:
4673               # orphan instance; we don't list it here as we don't
4674               # handle this case yet in the output of instance listing
4675               logging.warning("Orphan instance '%s' found on node %s",
4676                               inst, name)
4677         # else no instance is alive
4678     else:
4679       live_data = {}
4680
4681     if query.IQ_DISKUSAGE in self.requested_data:
4682       disk_usage = dict((inst.name,
4683                          _ComputeDiskSize(inst.disk_template,
4684                                           [{constants.IDISK_SIZE: disk.size}
4685                                            for disk in inst.disks]))
4686                         for inst in instance_list)
4687     else:
4688       disk_usage = None
4689
4690     if query.IQ_CONSOLE in self.requested_data:
4691       consinfo = {}
4692       for inst in instance_list:
4693         if inst.name in live_data:
4694           # Instance is running
4695           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4696         else:
4697           consinfo[inst.name] = None
4698       assert set(consinfo.keys()) == set(instance_names)
4699     else:
4700       consinfo = None
4701
4702     if query.IQ_NODES in self.requested_data:
4703       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4704                                             instance_list)))
4705       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4706       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4707                     for uuid in set(map(operator.attrgetter("group"),
4708                                         nodes.values())))
4709     else:
4710       nodes = None
4711       groups = None
4712
4713     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4714                                    disk_usage, offline_nodes, bad_nodes,
4715                                    live_data, wrongnode_inst, consinfo,
4716                                    nodes, groups)
4717
4718
4719 class LUQuery(NoHooksLU):
4720   """Query for resources/items of a certain kind.
4721
4722   """
4723   # pylint: disable-msg=W0142
4724   REQ_BGL = False
4725
4726   def CheckArguments(self):
4727     qcls = _GetQueryImplementation(self.op.what)
4728
4729     self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4730
4731   def ExpandNames(self):
4732     self.impl.ExpandNames(self)
4733
4734   def DeclareLocks(self, level):
4735     self.impl.DeclareLocks(self, level)
4736
4737   def Exec(self, feedback_fn):
4738     return self.impl.NewStyleQuery(self)
4739
4740
4741 class LUQueryFields(NoHooksLU):
4742   """Query for resources/items of a certain kind.
4743
4744   """
4745   # pylint: disable-msg=W0142
4746   REQ_BGL = False
4747
4748   def CheckArguments(self):
4749     self.qcls = _GetQueryImplementation(self.op.what)
4750
4751   def ExpandNames(self):
4752     self.needed_locks = {}
4753
4754   def Exec(self, feedback_fn):
4755     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4756
4757
4758 class LUNodeModifyStorage(NoHooksLU):
4759   """Logical unit for modifying a storage volume on a node.
4760
4761   """
4762   REQ_BGL = False
4763
4764   def CheckArguments(self):
4765     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4766
4767     storage_type = self.op.storage_type
4768
4769     try:
4770       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4771     except KeyError:
4772       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4773                                  " modified" % storage_type,
4774                                  errors.ECODE_INVAL)
4775
4776     diff = set(self.op.changes.keys()) - modifiable
4777     if diff:
4778       raise errors.OpPrereqError("The following fields can not be modified for"
4779                                  " storage units of type '%s': %r" %
4780                                  (storage_type, list(diff)),
4781                                  errors.ECODE_INVAL)
4782
4783   def ExpandNames(self):
4784     self.needed_locks = {
4785       locking.LEVEL_NODE: self.op.node_name,
4786       }
4787
4788   def Exec(self, feedback_fn):
4789     """Computes the list of nodes and their attributes.
4790
4791     """
4792     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4793     result = self.rpc.call_storage_modify(self.op.node_name,
4794                                           self.op.storage_type, st_args,
4795                                           self.op.name, self.op.changes)
4796     result.Raise("Failed to modify storage unit '%s' on %s" %
4797                  (self.op.name, self.op.node_name))
4798
4799
4800 class LUNodeAdd(LogicalUnit):
4801   """Logical unit for adding node to the cluster.
4802
4803   """
4804   HPATH = "node-add"
4805   HTYPE = constants.HTYPE_NODE
4806   _NFLAGS = ["master_capable", "vm_capable"]
4807
4808   def CheckArguments(self):
4809     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4810     # validate/normalize the node name
4811     self.hostname = netutils.GetHostname(name=self.op.node_name,
4812                                          family=self.primary_ip_family)
4813     self.op.node_name = self.hostname.name
4814
4815     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4816       raise errors.OpPrereqError("Cannot readd the master node",
4817                                  errors.ECODE_STATE)
4818
4819     if self.op.readd and self.op.group:
4820       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4821                                  " being readded", errors.ECODE_INVAL)
4822
4823   def BuildHooksEnv(self):
4824     """Build hooks env.
4825
4826     This will run on all nodes before, and on all nodes + the new node after.
4827
4828     """
4829     return {
4830       "OP_TARGET": self.op.node_name,
4831       "NODE_NAME": self.op.node_name,
4832       "NODE_PIP": self.op.primary_ip,
4833       "NODE_SIP": self.op.secondary_ip,
4834       "MASTER_CAPABLE": str(self.op.master_capable),
4835       "VM_CAPABLE": str(self.op.vm_capable),
4836       }
4837
4838   def BuildHooksNodes(self):
4839     """Build hooks nodes.
4840
4841     """
4842     # Exclude added node
4843     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4844     post_nodes = pre_nodes + [self.op.node_name, ]
4845
4846     return (pre_nodes, post_nodes)
4847
4848   def CheckPrereq(self):
4849     """Check prerequisites.
4850
4851     This checks:
4852      - the new node is not already in the config
4853      - it is resolvable
4854      - its parameters (single/dual homed) matches the cluster
4855
4856     Any errors are signaled by raising errors.OpPrereqError.
4857
4858     """
4859     cfg = self.cfg
4860     hostname = self.hostname
4861     node = hostname.name
4862     primary_ip = self.op.primary_ip = hostname.ip
4863     if self.op.secondary_ip is None:
4864       if self.primary_ip_family == netutils.IP6Address.family:
4865         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4866                                    " IPv4 address must be given as secondary",
4867                                    errors.ECODE_INVAL)
4868       self.op.secondary_ip = primary_ip
4869
4870     secondary_ip = self.op.secondary_ip
4871     if not netutils.IP4Address.IsValid(secondary_ip):
4872       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4873                                  " address" % secondary_ip, errors.ECODE_INVAL)
4874
4875     node_list = cfg.GetNodeList()
4876     if not self.op.readd and node in node_list:
4877       raise errors.OpPrereqError("Node %s is already in the configuration" %
4878                                  node, errors.ECODE_EXISTS)
4879     elif self.op.readd and node not in node_list:
4880       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4881                                  errors.ECODE_NOENT)
4882
4883     self.changed_primary_ip = False
4884
4885     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4886       if self.op.readd and node == existing_node_name:
4887         if existing_node.secondary_ip != secondary_ip:
4888           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4889                                      " address configuration as before",
4890                                      errors.ECODE_INVAL)
4891         if existing_node.primary_ip != primary_ip:
4892           self.changed_primary_ip = True
4893
4894         continue
4895
4896       if (existing_node.primary_ip == primary_ip or
4897           existing_node.secondary_ip == primary_ip or
4898           existing_node.primary_ip == secondary_ip or
4899           existing_node.secondary_ip == secondary_ip):
4900         raise errors.OpPrereqError("New node ip address(es) conflict with"
4901                                    " existing node %s" % existing_node.name,
4902                                    errors.ECODE_NOTUNIQUE)
4903
4904     # After this 'if' block, None is no longer a valid value for the
4905     # _capable op attributes
4906     if self.op.readd:
4907       old_node = self.cfg.GetNodeInfo(node)
4908       assert old_node is not None, "Can't retrieve locked node %s" % node
4909       for attr in self._NFLAGS:
4910         if getattr(self.op, attr) is None:
4911           setattr(self.op, attr, getattr(old_node, attr))
4912     else:
4913       for attr in self._NFLAGS:
4914         if getattr(self.op, attr) is None:
4915           setattr(self.op, attr, True)
4916
4917     if self.op.readd and not self.op.vm_capable:
4918       pri, sec = cfg.GetNodeInstances(node)
4919       if pri or sec:
4920         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4921                                    " flag set to false, but it already holds"
4922                                    " instances" % node,
4923                                    errors.ECODE_STATE)
4924
4925     # check that the type of the node (single versus dual homed) is the
4926     # same as for the master
4927     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4928     master_singlehomed = myself.secondary_ip == myself.primary_ip
4929     newbie_singlehomed = secondary_ip == primary_ip
4930     if master_singlehomed != newbie_singlehomed:
4931       if master_singlehomed:
4932         raise errors.OpPrereqError("The master has no secondary ip but the"
4933                                    " new node has one",
4934                                    errors.ECODE_INVAL)
4935       else:
4936         raise errors.OpPrereqError("The master has a secondary ip but the"
4937                                    " new node doesn't have one",
4938                                    errors.ECODE_INVAL)
4939
4940     # checks reachability
4941     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4942       raise errors.OpPrereqError("Node not reachable by ping",
4943                                  errors.ECODE_ENVIRON)
4944
4945     if not newbie_singlehomed:
4946       # check reachability from my secondary ip to newbie's secondary ip
4947       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4948                            source=myself.secondary_ip):
4949         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4950                                    " based ping to node daemon port",
4951                                    errors.ECODE_ENVIRON)
4952
4953     if self.op.readd:
4954       exceptions = [node]
4955     else:
4956       exceptions = []
4957
4958     if self.op.master_capable:
4959       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4960     else:
4961       self.master_candidate = False
4962
4963     if self.op.readd:
4964       self.new_node = old_node
4965     else:
4966       node_group = cfg.LookupNodeGroup(self.op.group)
4967       self.new_node = objects.Node(name=node,
4968                                    primary_ip=primary_ip,
4969                                    secondary_ip=secondary_ip,
4970                                    master_candidate=self.master_candidate,
4971                                    offline=False, drained=False,
4972                                    group=node_group)
4973
4974     if self.op.ndparams:
4975       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4976
4977   def Exec(self, feedback_fn):
4978     """Adds the new node to the cluster.
4979
4980     """
4981     new_node = self.new_node
4982     node = new_node.name
4983
4984     # We adding a new node so we assume it's powered
4985     new_node.powered = True
4986
4987     # for re-adds, reset the offline/drained/master-candidate flags;
4988     # we need to reset here, otherwise offline would prevent RPC calls
4989     # later in the procedure; this also means that if the re-add
4990     # fails, we are left with a non-offlined, broken node
4991     if self.op.readd:
4992       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4993       self.LogInfo("Readding a node, the offline/drained flags were reset")
4994       # if we demote the node, we do cleanup later in the procedure
4995       new_node.master_candidate = self.master_candidate
4996       if self.changed_primary_ip:
4997         new_node.primary_ip = self.op.primary_ip
4998
4999     # copy the master/vm_capable flags
5000     for attr in self._NFLAGS:
5001       setattr(new_node, attr, getattr(self.op, attr))
5002
5003     # notify the user about any possible mc promotion
5004     if new_node.master_candidate:
5005       self.LogInfo("Node will be a master candidate")
5006
5007     if self.op.ndparams:
5008       new_node.ndparams = self.op.ndparams
5009     else:
5010       new_node.ndparams = {}
5011
5012     # check connectivity
5013     result = self.rpc.call_version([node])[node]
5014     result.Raise("Can't get version information from node %s" % node)
5015     if constants.PROTOCOL_VERSION == result.payload:
5016       logging.info("Communication to node %s fine, sw version %s match",
5017                    node, result.payload)
5018     else:
5019       raise errors.OpExecError("Version mismatch master version %s,"
5020                                " node version %s" %
5021                                (constants.PROTOCOL_VERSION, result.payload))
5022
5023     # Add node to our /etc/hosts, and add key to known_hosts
5024     if self.cfg.GetClusterInfo().modify_etc_hosts:
5025       master_node = self.cfg.GetMasterNode()
5026       result = self.rpc.call_etc_hosts_modify(master_node,
5027                                               constants.ETC_HOSTS_ADD,
5028                                               self.hostname.name,
5029                                               self.hostname.ip)
5030       result.Raise("Can't update hosts file with new host data")
5031
5032     if new_node.secondary_ip != new_node.primary_ip:
5033       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5034                                False)
5035
5036     node_verify_list = [self.cfg.GetMasterNode()]
5037     node_verify_param = {
5038       constants.NV_NODELIST: [node],
5039       # TODO: do a node-net-test as well?
5040     }
5041
5042     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5043                                        self.cfg.GetClusterName())
5044     for verifier in node_verify_list:
5045       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5046       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5047       if nl_payload:
5048         for failed in nl_payload:
5049           feedback_fn("ssh/hostname verification failed"
5050                       " (checking from %s): %s" %
5051                       (verifier, nl_payload[failed]))
5052         raise errors.OpExecError("ssh/hostname verification failed")
5053
5054     if self.op.readd:
5055       _RedistributeAncillaryFiles(self)
5056       self.context.ReaddNode(new_node)
5057       # make sure we redistribute the config
5058       self.cfg.Update(new_node, feedback_fn)
5059       # and make sure the new node will not have old files around
5060       if not new_node.master_candidate:
5061         result = self.rpc.call_node_demote_from_mc(new_node.name)
5062         msg = result.fail_msg
5063         if msg:
5064           self.LogWarning("Node failed to demote itself from master"
5065                           " candidate status: %s" % msg)
5066     else:
5067       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5068                                   additional_vm=self.op.vm_capable)
5069       self.context.AddNode(new_node, self.proc.GetECId())
5070
5071
5072 class LUNodeSetParams(LogicalUnit):
5073   """Modifies the parameters of a node.
5074
5075   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5076       to the node role (as _ROLE_*)
5077   @cvar _R2F: a dictionary from node role to tuples of flags
5078   @cvar _FLAGS: a list of attribute names corresponding to the flags
5079
5080   """
5081   HPATH = "node-modify"
5082   HTYPE = constants.HTYPE_NODE
5083   REQ_BGL = False
5084   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5085   _F2R = {
5086     (True, False, False): _ROLE_CANDIDATE,
5087     (False, True, False): _ROLE_DRAINED,
5088     (False, False, True): _ROLE_OFFLINE,
5089     (False, False, False): _ROLE_REGULAR,
5090     }
5091   _R2F = dict((v, k) for k, v in _F2R.items())
5092   _FLAGS = ["master_candidate", "drained", "offline"]
5093
5094   def CheckArguments(self):
5095     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5096     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5097                 self.op.master_capable, self.op.vm_capable,
5098                 self.op.secondary_ip, self.op.ndparams]
5099     if all_mods.count(None) == len(all_mods):
5100       raise errors.OpPrereqError("Please pass at least one modification",
5101                                  errors.ECODE_INVAL)
5102     if all_mods.count(True) > 1:
5103       raise errors.OpPrereqError("Can't set the node into more than one"
5104                                  " state at the same time",
5105                                  errors.ECODE_INVAL)
5106
5107     # Boolean value that tells us whether we might be demoting from MC
5108     self.might_demote = (self.op.master_candidate == False or
5109                          self.op.offline == True or
5110                          self.op.drained == True or
5111                          self.op.master_capable == False)
5112
5113     if self.op.secondary_ip:
5114       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5115         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5116                                    " address" % self.op.secondary_ip,
5117                                    errors.ECODE_INVAL)
5118
5119     self.lock_all = self.op.auto_promote and self.might_demote
5120     self.lock_instances = self.op.secondary_ip is not None
5121
5122   def ExpandNames(self):
5123     if self.lock_all:
5124       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5125     else:
5126       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5127
5128     if self.lock_instances:
5129       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5130
5131   def DeclareLocks(self, level):
5132     # If we have locked all instances, before waiting to lock nodes, release
5133     # all the ones living on nodes unrelated to the current operation.
5134     if level == locking.LEVEL_NODE and self.lock_instances:
5135       self.affected_instances = []
5136       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5137         instances_keep = []
5138
5139         # Build list of instances to release
5140         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5141         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5142           if (instance.disk_template in constants.DTS_INT_MIRROR and
5143               self.op.node_name in instance.all_nodes):
5144             instances_keep.append(instance_name)
5145             self.affected_instances.append(instance)
5146
5147         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5148
5149         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5150                 set(instances_keep))
5151
5152   def BuildHooksEnv(self):
5153     """Build hooks env.
5154
5155     This runs on the master node.
5156
5157     """
5158     return {
5159       "OP_TARGET": self.op.node_name,
5160       "MASTER_CANDIDATE": str(self.op.master_candidate),
5161       "OFFLINE": str(self.op.offline),
5162       "DRAINED": str(self.op.drained),
5163       "MASTER_CAPABLE": str(self.op.master_capable),
5164       "VM_CAPABLE": str(self.op.vm_capable),
5165       }
5166
5167   def BuildHooksNodes(self):
5168     """Build hooks nodes.
5169
5170     """
5171     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5172     return (nl, nl)
5173
5174   def CheckPrereq(self):
5175     """Check prerequisites.
5176
5177     This only checks the instance list against the existing names.
5178
5179     """
5180     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5181
5182     if (self.op.master_candidate is not None or
5183         self.op.drained is not None or
5184         self.op.offline is not None):
5185       # we can't change the master's node flags
5186       if self.op.node_name == self.cfg.GetMasterNode():
5187         raise errors.OpPrereqError("The master role can be changed"
5188                                    " only via master-failover",
5189                                    errors.ECODE_INVAL)
5190
5191     if self.op.master_candidate and not node.master_capable:
5192       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5193                                  " it a master candidate" % node.name,
5194                                  errors.ECODE_STATE)
5195
5196     if self.op.vm_capable == False:
5197       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5198       if ipri or isec:
5199         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5200                                    " the vm_capable flag" % node.name,
5201                                    errors.ECODE_STATE)
5202
5203     if node.master_candidate and self.might_demote and not self.lock_all:
5204       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5205       # check if after removing the current node, we're missing master
5206       # candidates
5207       (mc_remaining, mc_should, _) = \
5208           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5209       if mc_remaining < mc_should:
5210         raise errors.OpPrereqError("Not enough master candidates, please"
5211                                    " pass auto promote option to allow"
5212                                    " promotion", errors.ECODE_STATE)
5213
5214     self.old_flags = old_flags = (node.master_candidate,
5215                                   node.drained, node.offline)
5216     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5217     self.old_role = old_role = self._F2R[old_flags]
5218
5219     # Check for ineffective changes
5220     for attr in self._FLAGS:
5221       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5222         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5223         setattr(self.op, attr, None)
5224
5225     # Past this point, any flag change to False means a transition
5226     # away from the respective state, as only real changes are kept
5227
5228     # TODO: We might query the real power state if it supports OOB
5229     if _SupportsOob(self.cfg, node):
5230       if self.op.offline is False and not (node.powered or
5231                                            self.op.powered == True):
5232         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5233                                     " offline status can be reset") %
5234                                    self.op.node_name)
5235     elif self.op.powered is not None:
5236       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5237                                   " as it does not support out-of-band"
5238                                   " handling") % self.op.node_name)
5239
5240     # If we're being deofflined/drained, we'll MC ourself if needed
5241     if (self.op.drained == False or self.op.offline == False or
5242         (self.op.master_capable and not node.master_capable)):
5243       if _DecideSelfPromotion(self):
5244         self.op.master_candidate = True
5245         self.LogInfo("Auto-promoting node to master candidate")
5246
5247     # If we're no longer master capable, we'll demote ourselves from MC
5248     if self.op.master_capable == False and node.master_candidate:
5249       self.LogInfo("Demoting from master candidate")
5250       self.op.master_candidate = False
5251
5252     # Compute new role
5253     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5254     if self.op.master_candidate:
5255       new_role = self._ROLE_CANDIDATE
5256     elif self.op.drained:
5257       new_role = self._ROLE_DRAINED
5258     elif self.op.offline:
5259       new_role = self._ROLE_OFFLINE
5260     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5261       # False is still in new flags, which means we're un-setting (the
5262       # only) True flag
5263       new_role = self._ROLE_REGULAR
5264     else: # no new flags, nothing, keep old role
5265       new_role = old_role
5266
5267     self.new_role = new_role
5268
5269     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5270       # Trying to transition out of offline status
5271       result = self.rpc.call_version([node.name])[node.name]
5272       if result.fail_msg:
5273         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5274                                    " to report its version: %s" %
5275                                    (node.name, result.fail_msg),
5276                                    errors.ECODE_STATE)
5277       else:
5278         self.LogWarning("Transitioning node from offline to online state"
5279                         " without using re-add. Please make sure the node"
5280                         " is healthy!")
5281
5282     if self.op.secondary_ip:
5283       # Ok even without locking, because this can't be changed by any LU
5284       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5285       master_singlehomed = master.secondary_ip == master.primary_ip
5286       if master_singlehomed and self.op.secondary_ip:
5287         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5288                                    " homed cluster", errors.ECODE_INVAL)
5289
5290       if node.offline:
5291         if self.affected_instances:
5292           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5293                                      " node has instances (%s) configured"
5294                                      " to use it" % self.affected_instances)
5295       else:
5296         # On online nodes, check that no instances are running, and that
5297         # the node has the new ip and we can reach it.
5298         for instance in self.affected_instances:
5299           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5300
5301         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5302         if master.name != node.name:
5303           # check reachability from master secondary ip to new secondary ip
5304           if not netutils.TcpPing(self.op.secondary_ip,
5305                                   constants.DEFAULT_NODED_PORT,
5306                                   source=master.secondary_ip):
5307             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5308                                        " based ping to node daemon port",
5309                                        errors.ECODE_ENVIRON)
5310
5311     if self.op.ndparams:
5312       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5313       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5314       self.new_ndparams = new_ndparams
5315
5316   def Exec(self, feedback_fn):
5317     """Modifies a node.
5318
5319     """
5320     node = self.node
5321     old_role = self.old_role
5322     new_role = self.new_role
5323
5324     result = []
5325
5326     if self.op.ndparams:
5327       node.ndparams = self.new_ndparams
5328
5329     if self.op.powered is not None:
5330       node.powered = self.op.powered
5331
5332     for attr in ["master_capable", "vm_capable"]:
5333       val = getattr(self.op, attr)
5334       if val is not None:
5335         setattr(node, attr, val)
5336         result.append((attr, str(val)))
5337
5338     if new_role != old_role:
5339       # Tell the node to demote itself, if no longer MC and not offline
5340       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5341         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5342         if msg:
5343           self.LogWarning("Node failed to demote itself: %s", msg)
5344
5345       new_flags = self._R2F[new_role]
5346       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5347         if of != nf:
5348           result.append((desc, str(nf)))
5349       (node.master_candidate, node.drained, node.offline) = new_flags
5350
5351       # we locked all nodes, we adjust the CP before updating this node
5352       if self.lock_all:
5353         _AdjustCandidatePool(self, [node.name])
5354
5355     if self.op.secondary_ip:
5356       node.secondary_ip = self.op.secondary_ip
5357       result.append(("secondary_ip", self.op.secondary_ip))
5358
5359     # this will trigger configuration file update, if needed
5360     self.cfg.Update(node, feedback_fn)
5361
5362     # this will trigger job queue propagation or cleanup if the mc
5363     # flag changed
5364     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5365       self.context.ReaddNode(node)
5366
5367     return result
5368
5369
5370 class LUNodePowercycle(NoHooksLU):
5371   """Powercycles a node.
5372
5373   """
5374   REQ_BGL = False
5375
5376   def CheckArguments(self):
5377     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5378     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5379       raise errors.OpPrereqError("The node is the master and the force"
5380                                  " parameter was not set",
5381                                  errors.ECODE_INVAL)
5382
5383   def ExpandNames(self):
5384     """Locking for PowercycleNode.
5385
5386     This is a last-resort option and shouldn't block on other
5387     jobs. Therefore, we grab no locks.
5388
5389     """
5390     self.needed_locks = {}
5391
5392   def Exec(self, feedback_fn):
5393     """Reboots a node.
5394
5395     """
5396     result = self.rpc.call_node_powercycle(self.op.node_name,
5397                                            self.cfg.GetHypervisorType())
5398     result.Raise("Failed to schedule the reboot")
5399     return result.payload
5400
5401
5402 class LUClusterQuery(NoHooksLU):
5403   """Query cluster configuration.
5404
5405   """
5406   REQ_BGL = False
5407
5408   def ExpandNames(self):
5409     self.needed_locks = {}
5410
5411   def Exec(self, feedback_fn):
5412     """Return cluster config.
5413
5414     """
5415     cluster = self.cfg.GetClusterInfo()
5416     os_hvp = {}
5417
5418     # Filter just for enabled hypervisors
5419     for os_name, hv_dict in cluster.os_hvp.items():
5420       os_hvp[os_name] = {}
5421       for hv_name, hv_params in hv_dict.items():
5422         if hv_name in cluster.enabled_hypervisors:
5423           os_hvp[os_name][hv_name] = hv_params
5424
5425     # Convert ip_family to ip_version
5426     primary_ip_version = constants.IP4_VERSION
5427     if cluster.primary_ip_family == netutils.IP6Address.family:
5428       primary_ip_version = constants.IP6_VERSION
5429
5430     result = {
5431       "software_version": constants.RELEASE_VERSION,
5432       "protocol_version": constants.PROTOCOL_VERSION,
5433       "config_version": constants.CONFIG_VERSION,
5434       "os_api_version": max(constants.OS_API_VERSIONS),
5435       "export_version": constants.EXPORT_VERSION,
5436       "architecture": (platform.architecture()[0], platform.machine()),
5437       "name": cluster.cluster_name,
5438       "master": cluster.master_node,
5439       "default_hypervisor": cluster.enabled_hypervisors[0],
5440       "enabled_hypervisors": cluster.enabled_hypervisors,
5441       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5442                         for hypervisor_name in cluster.enabled_hypervisors]),
5443       "os_hvp": os_hvp,
5444       "beparams": cluster.beparams,
5445       "osparams": cluster.osparams,
5446       "nicparams": cluster.nicparams,
5447       "ndparams": cluster.ndparams,
5448       "candidate_pool_size": cluster.candidate_pool_size,
5449       "master_netdev": cluster.master_netdev,
5450       "volume_group_name": cluster.volume_group_name,
5451       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5452       "file_storage_dir": cluster.file_storage_dir,
5453       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5454       "maintain_node_health": cluster.maintain_node_health,
5455       "ctime": cluster.ctime,
5456       "mtime": cluster.mtime,
5457       "uuid": cluster.uuid,
5458       "tags": list(cluster.GetTags()),
5459       "uid_pool": cluster.uid_pool,
5460       "default_iallocator": cluster.default_iallocator,
5461       "reserved_lvs": cluster.reserved_lvs,
5462       "primary_ip_version": primary_ip_version,
5463       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5464       "hidden_os": cluster.hidden_os,
5465       "blacklisted_os": cluster.blacklisted_os,
5466       }
5467
5468     return result
5469
5470
5471 class LUClusterConfigQuery(NoHooksLU):
5472   """Return configuration values.
5473
5474   """
5475   REQ_BGL = False
5476   _FIELDS_DYNAMIC = utils.FieldSet()
5477   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5478                                   "watcher_pause", "volume_group_name")
5479
5480   def CheckArguments(self):
5481     _CheckOutputFields(static=self._FIELDS_STATIC,
5482                        dynamic=self._FIELDS_DYNAMIC,
5483                        selected=self.op.output_fields)
5484
5485   def ExpandNames(self):
5486     self.needed_locks = {}
5487
5488   def Exec(self, feedback_fn):
5489     """Dump a representation of the cluster config to the standard output.
5490
5491     """
5492     values = []
5493     for field in self.op.output_fields:
5494       if field == "cluster_name":
5495         entry = self.cfg.GetClusterName()
5496       elif field == "master_node":
5497         entry = self.cfg.GetMasterNode()
5498       elif field == "drain_flag":
5499         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5500       elif field == "watcher_pause":
5501         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5502       elif field == "volume_group_name":
5503         entry = self.cfg.GetVGName()
5504       else:
5505         raise errors.ParameterError(field)
5506       values.append(entry)
5507     return values
5508
5509
5510 class LUInstanceActivateDisks(NoHooksLU):
5511   """Bring up an instance's disks.
5512
5513   """
5514   REQ_BGL = False
5515
5516   def ExpandNames(self):
5517     self._ExpandAndLockInstance()
5518     self.needed_locks[locking.LEVEL_NODE] = []
5519     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5520
5521   def DeclareLocks(self, level):
5522     if level == locking.LEVEL_NODE:
5523       self._LockInstancesNodes()
5524
5525   def CheckPrereq(self):
5526     """Check prerequisites.
5527
5528     This checks that the instance is in the cluster.
5529
5530     """
5531     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5532     assert self.instance is not None, \
5533       "Cannot retrieve locked instance %s" % self.op.instance_name
5534     _CheckNodeOnline(self, self.instance.primary_node)
5535
5536   def Exec(self, feedback_fn):
5537     """Activate the disks.
5538
5539     """
5540     disks_ok, disks_info = \
5541               _AssembleInstanceDisks(self, self.instance,
5542                                      ignore_size=self.op.ignore_size)
5543     if not disks_ok:
5544       raise errors.OpExecError("Cannot activate block devices")
5545
5546     return disks_info
5547
5548
5549 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5550                            ignore_size=False):
5551   """Prepare the block devices for an instance.
5552
5553   This sets up the block devices on all nodes.
5554
5555   @type lu: L{LogicalUnit}
5556   @param lu: the logical unit on whose behalf we execute
5557   @type instance: L{objects.Instance}
5558   @param instance: the instance for whose disks we assemble
5559   @type disks: list of L{objects.Disk} or None
5560   @param disks: which disks to assemble (or all, if None)
5561   @type ignore_secondaries: boolean
5562   @param ignore_secondaries: if true, errors on secondary nodes
5563       won't result in an error return from the function
5564   @type ignore_size: boolean
5565   @param ignore_size: if true, the current known size of the disk
5566       will not be used during the disk activation, useful for cases
5567       when the size is wrong
5568   @return: False if the operation failed, otherwise a list of
5569       (host, instance_visible_name, node_visible_name)
5570       with the mapping from node devices to instance devices
5571
5572   """
5573   device_info = []
5574   disks_ok = True
5575   iname = instance.name
5576   disks = _ExpandCheckDisks(instance, disks)
5577
5578   # With the two passes mechanism we try to reduce the window of
5579   # opportunity for the race condition of switching DRBD to primary
5580   # before handshaking occured, but we do not eliminate it
5581
5582   # The proper fix would be to wait (with some limits) until the
5583   # connection has been made and drbd transitions from WFConnection
5584   # into any other network-connected state (Connected, SyncTarget,
5585   # SyncSource, etc.)
5586
5587   # 1st pass, assemble on all nodes in secondary mode
5588   for idx, inst_disk in enumerate(disks):
5589     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5590       if ignore_size:
5591         node_disk = node_disk.Copy()
5592         node_disk.UnsetSize()
5593       lu.cfg.SetDiskID(node_disk, node)
5594       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5595       msg = result.fail_msg
5596       if msg:
5597         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5598                            " (is_primary=False, pass=1): %s",
5599                            inst_disk.iv_name, node, msg)
5600         if not ignore_secondaries:
5601           disks_ok = False
5602
5603   # FIXME: race condition on drbd migration to primary
5604
5605   # 2nd pass, do only the primary node
5606   for idx, inst_disk in enumerate(disks):
5607     dev_path = None
5608
5609     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5610       if node != instance.primary_node:
5611         continue
5612       if ignore_size:
5613         node_disk = node_disk.Copy()
5614         node_disk.UnsetSize()
5615       lu.cfg.SetDiskID(node_disk, node)
5616       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5617       msg = result.fail_msg
5618       if msg:
5619         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5620                            " (is_primary=True, pass=2): %s",
5621                            inst_disk.iv_name, node, msg)
5622         disks_ok = False
5623       else:
5624         dev_path = result.payload
5625
5626     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5627
5628   # leave the disks configured for the primary node
5629   # this is a workaround that would be fixed better by
5630   # improving the logical/physical id handling
5631   for disk in disks:
5632     lu.cfg.SetDiskID(disk, instance.primary_node)
5633
5634   return disks_ok, device_info
5635
5636
5637 def _StartInstanceDisks(lu, instance, force):
5638   """Start the disks of an instance.
5639
5640   """
5641   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5642                                            ignore_secondaries=force)
5643   if not disks_ok:
5644     _ShutdownInstanceDisks(lu, instance)
5645     if force is not None and not force:
5646       lu.proc.LogWarning("", hint="If the message above refers to a"
5647                          " secondary node,"
5648                          " you can retry the operation using '--force'.")
5649     raise errors.OpExecError("Disk consistency error")
5650
5651
5652 class LUInstanceDeactivateDisks(NoHooksLU):
5653   """Shutdown an instance's disks.
5654
5655   """
5656   REQ_BGL = False
5657
5658   def ExpandNames(self):
5659     self._ExpandAndLockInstance()
5660     self.needed_locks[locking.LEVEL_NODE] = []
5661     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5662
5663   def DeclareLocks(self, level):
5664     if level == locking.LEVEL_NODE:
5665       self._LockInstancesNodes()
5666
5667   def CheckPrereq(self):
5668     """Check prerequisites.
5669
5670     This checks that the instance is in the cluster.
5671
5672     """
5673     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5674     assert self.instance is not None, \
5675       "Cannot retrieve locked instance %s" % self.op.instance_name
5676
5677   def Exec(self, feedback_fn):
5678     """Deactivate the disks
5679
5680     """
5681     instance = self.instance
5682     if self.op.force:
5683       _ShutdownInstanceDisks(self, instance)
5684     else:
5685       _SafeShutdownInstanceDisks(self, instance)
5686
5687
5688 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5689   """Shutdown block devices of an instance.
5690
5691   This function checks if an instance is running, before calling
5692   _ShutdownInstanceDisks.
5693
5694   """
5695   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5696   _ShutdownInstanceDisks(lu, instance, disks=disks)
5697
5698
5699 def _ExpandCheckDisks(instance, disks):
5700   """Return the instance disks selected by the disks list
5701
5702   @type disks: list of L{objects.Disk} or None
5703   @param disks: selected disks
5704   @rtype: list of L{objects.Disk}
5705   @return: selected instance disks to act on
5706
5707   """
5708   if disks is None:
5709     return instance.disks
5710   else:
5711     if not set(disks).issubset(instance.disks):
5712       raise errors.ProgrammerError("Can only act on disks belonging to the"
5713                                    " target instance")
5714     return disks
5715
5716
5717 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5718   """Shutdown block devices of an instance.
5719
5720   This does the shutdown on all nodes of the instance.
5721
5722   If the ignore_primary is false, errors on the primary node are
5723   ignored.
5724
5725   """
5726   all_result = True
5727   disks = _ExpandCheckDisks(instance, disks)
5728
5729   for disk in disks:
5730     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5731       lu.cfg.SetDiskID(top_disk, node)
5732       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5733       msg = result.fail_msg
5734       if msg:
5735         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5736                       disk.iv_name, node, msg)
5737         if ((node == instance.primary_node and not ignore_primary) or
5738             (node != instance.primary_node and not result.offline)):
5739           all_result = False
5740   return all_result
5741
5742
5743 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5744   """Checks if a node has enough free memory.
5745
5746   This function check if a given node has the needed amount of free
5747   memory. In case the node has less memory or we cannot get the
5748   information from the node, this function raise an OpPrereqError
5749   exception.
5750
5751   @type lu: C{LogicalUnit}
5752   @param lu: a logical unit from which we get configuration data
5753   @type node: C{str}
5754   @param node: the node to check
5755   @type reason: C{str}
5756   @param reason: string to use in the error message
5757   @type requested: C{int}
5758   @param requested: the amount of memory in MiB to check for
5759   @type hypervisor_name: C{str}
5760   @param hypervisor_name: the hypervisor to ask for memory stats
5761   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5762       we cannot check the node
5763
5764   """
5765   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5766   nodeinfo[node].Raise("Can't get data from node %s" % node,
5767                        prereq=True, ecode=errors.ECODE_ENVIRON)
5768   free_mem = nodeinfo[node].payload.get("memory_free", None)
5769   if not isinstance(free_mem, int):
5770     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5771                                " was '%s'" % (node, free_mem),
5772                                errors.ECODE_ENVIRON)
5773   if requested > free_mem:
5774     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5775                                " needed %s MiB, available %s MiB" %
5776                                (node, reason, requested, free_mem),
5777                                errors.ECODE_NORES)
5778
5779
5780 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5781   """Checks if nodes have enough free disk space in the all VGs.
5782
5783   This function check if all given nodes have the needed amount of
5784   free disk. In case any node has less disk or we cannot get the
5785   information from the node, this function raise an OpPrereqError
5786   exception.
5787
5788   @type lu: C{LogicalUnit}
5789   @param lu: a logical unit from which we get configuration data
5790   @type nodenames: C{list}
5791   @param nodenames: the list of node names to check
5792   @type req_sizes: C{dict}
5793   @param req_sizes: the hash of vg and corresponding amount of disk in
5794       MiB to check for
5795   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5796       or we cannot check the node
5797
5798   """
5799   for vg, req_size in req_sizes.items():
5800     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5801
5802
5803 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5804   """Checks if nodes have enough free disk space in the specified VG.
5805
5806   This function check if all given nodes have the needed amount of
5807   free disk. In case any node has less disk or we cannot get the
5808   information from the node, this function raise an OpPrereqError
5809   exception.
5810
5811   @type lu: C{LogicalUnit}
5812   @param lu: a logical unit from which we get configuration data
5813   @type nodenames: C{list}
5814   @param nodenames: the list of node names to check
5815   @type vg: C{str}
5816   @param vg: the volume group to check
5817   @type requested: C{int}
5818   @param requested: the amount of disk in MiB to check for
5819   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5820       or we cannot check the node
5821
5822   """
5823   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5824   for node in nodenames:
5825     info = nodeinfo[node]
5826     info.Raise("Cannot get current information from node %s" % node,
5827                prereq=True, ecode=errors.ECODE_ENVIRON)
5828     vg_free = info.payload.get("vg_free", None)
5829     if not isinstance(vg_free, int):
5830       raise errors.OpPrereqError("Can't compute free disk space on node"
5831                                  " %s for vg %s, result was '%s'" %
5832                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5833     if requested > vg_free:
5834       raise errors.OpPrereqError("Not enough disk space on target node %s"
5835                                  " vg %s: required %d MiB, available %d MiB" %
5836                                  (node, vg, requested, vg_free),
5837                                  errors.ECODE_NORES)
5838
5839
5840 class LUInstanceStartup(LogicalUnit):
5841   """Starts an instance.
5842
5843   """
5844   HPATH = "instance-start"
5845   HTYPE = constants.HTYPE_INSTANCE
5846   REQ_BGL = False
5847
5848   def CheckArguments(self):
5849     # extra beparams
5850     if self.op.beparams:
5851       # fill the beparams dict
5852       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5853
5854   def ExpandNames(self):
5855     self._ExpandAndLockInstance()
5856
5857   def BuildHooksEnv(self):
5858     """Build hooks env.
5859
5860     This runs on master, primary and secondary nodes of the instance.
5861
5862     """
5863     env = {
5864       "FORCE": self.op.force,
5865       }
5866
5867     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5868
5869     return env
5870
5871   def BuildHooksNodes(self):
5872     """Build hooks nodes.
5873
5874     """
5875     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5876     return (nl, nl)
5877
5878   def CheckPrereq(self):
5879     """Check prerequisites.
5880
5881     This checks that the instance is in the cluster.
5882
5883     """
5884     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5885     assert self.instance is not None, \
5886       "Cannot retrieve locked instance %s" % self.op.instance_name
5887
5888     # extra hvparams
5889     if self.op.hvparams:
5890       # check hypervisor parameter syntax (locally)
5891       cluster = self.cfg.GetClusterInfo()
5892       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5893       filled_hvp = cluster.FillHV(instance)
5894       filled_hvp.update(self.op.hvparams)
5895       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5896       hv_type.CheckParameterSyntax(filled_hvp)
5897       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5898
5899     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5900
5901     if self.primary_offline and self.op.ignore_offline_nodes:
5902       self.proc.LogWarning("Ignoring offline primary node")
5903
5904       if self.op.hvparams or self.op.beparams:
5905         self.proc.LogWarning("Overridden parameters are ignored")
5906     else:
5907       _CheckNodeOnline(self, instance.primary_node)
5908
5909       bep = self.cfg.GetClusterInfo().FillBE(instance)
5910
5911       # check bridges existence
5912       _CheckInstanceBridgesExist(self, instance)
5913
5914       remote_info = self.rpc.call_instance_info(instance.primary_node,
5915                                                 instance.name,
5916                                                 instance.hypervisor)
5917       remote_info.Raise("Error checking node %s" % instance.primary_node,
5918                         prereq=True, ecode=errors.ECODE_ENVIRON)
5919       if not remote_info.payload: # not running already
5920         _CheckNodeFreeMemory(self, instance.primary_node,
5921                              "starting instance %s" % instance.name,
5922                              bep[constants.BE_MEMORY], instance.hypervisor)
5923
5924   def Exec(self, feedback_fn):
5925     """Start the instance.
5926
5927     """
5928     instance = self.instance
5929     force = self.op.force
5930
5931     if not self.op.no_remember:
5932       self.cfg.MarkInstanceUp(instance.name)
5933
5934     if self.primary_offline:
5935       assert self.op.ignore_offline_nodes
5936       self.proc.LogInfo("Primary node offline, marked instance as started")
5937     else:
5938       node_current = instance.primary_node
5939
5940       _StartInstanceDisks(self, instance, force)
5941
5942       result = self.rpc.call_instance_start(node_current, instance,
5943                                             self.op.hvparams, self.op.beparams,
5944                                             self.op.startup_paused)
5945       msg = result.fail_msg
5946       if msg:
5947         _ShutdownInstanceDisks(self, instance)
5948         raise errors.OpExecError("Could not start instance: %s" % msg)
5949
5950
5951 class LUInstanceReboot(LogicalUnit):
5952   """Reboot an instance.
5953
5954   """
5955   HPATH = "instance-reboot"
5956   HTYPE = constants.HTYPE_INSTANCE
5957   REQ_BGL = False
5958
5959   def ExpandNames(self):
5960     self._ExpandAndLockInstance()
5961
5962   def BuildHooksEnv(self):
5963     """Build hooks env.
5964
5965     This runs on master, primary and secondary nodes of the instance.
5966
5967     """
5968     env = {
5969       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5970       "REBOOT_TYPE": self.op.reboot_type,
5971       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5972       }
5973
5974     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5975
5976     return env
5977
5978   def BuildHooksNodes(self):
5979     """Build hooks nodes.
5980
5981     """
5982     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5983     return (nl, nl)
5984
5985   def CheckPrereq(self):
5986     """Check prerequisites.
5987
5988     This checks that the instance is in the cluster.
5989
5990     """
5991     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5992     assert self.instance is not None, \
5993       "Cannot retrieve locked instance %s" % self.op.instance_name
5994
5995     _CheckNodeOnline(self, instance.primary_node)
5996
5997     # check bridges existence
5998     _CheckInstanceBridgesExist(self, instance)
5999
6000   def Exec(self, feedback_fn):
6001     """Reboot the instance.
6002
6003     """
6004     instance = self.instance
6005     ignore_secondaries = self.op.ignore_secondaries
6006     reboot_type = self.op.reboot_type
6007
6008     remote_info = self.rpc.call_instance_info(instance.primary_node,
6009                                               instance.name,
6010                                               instance.hypervisor)
6011     remote_info.Raise("Error checking node %s" % instance.primary_node)
6012     instance_running = bool(remote_info.payload)
6013
6014     node_current = instance.primary_node
6015
6016     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6017                                             constants.INSTANCE_REBOOT_HARD]:
6018       for disk in instance.disks:
6019         self.cfg.SetDiskID(disk, node_current)
6020       result = self.rpc.call_instance_reboot(node_current, instance,
6021                                              reboot_type,
6022                                              self.op.shutdown_timeout)
6023       result.Raise("Could not reboot instance")
6024     else:
6025       if instance_running:
6026         result = self.rpc.call_instance_shutdown(node_current, instance,
6027                                                  self.op.shutdown_timeout)
6028         result.Raise("Could not shutdown instance for full reboot")
6029         _ShutdownInstanceDisks(self, instance)
6030       else:
6031         self.LogInfo("Instance %s was already stopped, starting now",
6032                      instance.name)
6033       _StartInstanceDisks(self, instance, ignore_secondaries)
6034       result = self.rpc.call_instance_start(node_current, instance,
6035                                             None, None, False)
6036       msg = result.fail_msg
6037       if msg:
6038         _ShutdownInstanceDisks(self, instance)
6039         raise errors.OpExecError("Could not start instance for"
6040                                  " full reboot: %s" % msg)
6041
6042     self.cfg.MarkInstanceUp(instance.name)
6043
6044
6045 class LUInstanceShutdown(LogicalUnit):
6046   """Shutdown an instance.
6047
6048   """
6049   HPATH = "instance-stop"
6050   HTYPE = constants.HTYPE_INSTANCE
6051   REQ_BGL = False
6052
6053   def ExpandNames(self):
6054     self._ExpandAndLockInstance()
6055
6056   def BuildHooksEnv(self):
6057     """Build hooks env.
6058
6059     This runs on master, primary and secondary nodes of the instance.
6060
6061     """
6062     env = _BuildInstanceHookEnvByObject(self, self.instance)
6063     env["TIMEOUT"] = self.op.timeout
6064     return env
6065
6066   def BuildHooksNodes(self):
6067     """Build hooks nodes.
6068
6069     """
6070     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6071     return (nl, nl)
6072
6073   def CheckPrereq(self):
6074     """Check prerequisites.
6075
6076     This checks that the instance is in the cluster.
6077
6078     """
6079     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6080     assert self.instance is not None, \
6081       "Cannot retrieve locked instance %s" % self.op.instance_name
6082
6083     self.primary_offline = \
6084       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6085
6086     if self.primary_offline and self.op.ignore_offline_nodes:
6087       self.proc.LogWarning("Ignoring offline primary node")
6088     else:
6089       _CheckNodeOnline(self, self.instance.primary_node)
6090
6091   def Exec(self, feedback_fn):
6092     """Shutdown the instance.
6093
6094     """
6095     instance = self.instance
6096     node_current = instance.primary_node
6097     timeout = self.op.timeout
6098
6099     if not self.op.no_remember:
6100       self.cfg.MarkInstanceDown(instance.name)
6101
6102     if self.primary_offline:
6103       assert self.op.ignore_offline_nodes
6104       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6105     else:
6106       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6107       msg = result.fail_msg
6108       if msg:
6109         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6110
6111       _ShutdownInstanceDisks(self, instance)
6112
6113
6114 class LUInstanceReinstall(LogicalUnit):
6115   """Reinstall an instance.
6116
6117   """
6118   HPATH = "instance-reinstall"
6119   HTYPE = constants.HTYPE_INSTANCE
6120   REQ_BGL = False
6121
6122   def ExpandNames(self):
6123     self._ExpandAndLockInstance()
6124
6125   def BuildHooksEnv(self):
6126     """Build hooks env.
6127
6128     This runs on master, primary and secondary nodes of the instance.
6129
6130     """
6131     return _BuildInstanceHookEnvByObject(self, self.instance)
6132
6133   def BuildHooksNodes(self):
6134     """Build hooks nodes.
6135
6136     """
6137     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6138     return (nl, nl)
6139
6140   def CheckPrereq(self):
6141     """Check prerequisites.
6142
6143     This checks that the instance is in the cluster and is not running.
6144
6145     """
6146     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6147     assert instance is not None, \
6148       "Cannot retrieve locked instance %s" % self.op.instance_name
6149     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6150                      " offline, cannot reinstall")
6151     for node in instance.secondary_nodes:
6152       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6153                        " cannot reinstall")
6154
6155     if instance.disk_template == constants.DT_DISKLESS:
6156       raise errors.OpPrereqError("Instance '%s' has no disks" %
6157                                  self.op.instance_name,
6158                                  errors.ECODE_INVAL)
6159     _CheckInstanceDown(self, instance, "cannot reinstall")
6160
6161     if self.op.os_type is not None:
6162       # OS verification
6163       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6164       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6165       instance_os = self.op.os_type
6166     else:
6167       instance_os = instance.os
6168
6169     nodelist = list(instance.all_nodes)
6170
6171     if self.op.osparams:
6172       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6173       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6174       self.os_inst = i_osdict # the new dict (without defaults)
6175     else:
6176       self.os_inst = None
6177
6178     self.instance = instance
6179
6180   def Exec(self, feedback_fn):
6181     """Reinstall the instance.
6182
6183     """
6184     inst = self.instance
6185
6186     if self.op.os_type is not None:
6187       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6188       inst.os = self.op.os_type
6189       # Write to configuration
6190       self.cfg.Update(inst, feedback_fn)
6191
6192     _StartInstanceDisks(self, inst, None)
6193     try:
6194       feedback_fn("Running the instance OS create scripts...")
6195       # FIXME: pass debug option from opcode to backend
6196       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6197                                              self.op.debug_level,
6198                                              osparams=self.os_inst)
6199       result.Raise("Could not install OS for instance %s on node %s" %
6200                    (inst.name, inst.primary_node))
6201     finally:
6202       _ShutdownInstanceDisks(self, inst)
6203
6204
6205 class LUInstanceRecreateDisks(LogicalUnit):
6206   """Recreate an instance's missing disks.
6207
6208   """
6209   HPATH = "instance-recreate-disks"
6210   HTYPE = constants.HTYPE_INSTANCE
6211   REQ_BGL = False
6212
6213   def CheckArguments(self):
6214     # normalise the disk list
6215     self.op.disks = sorted(frozenset(self.op.disks))
6216
6217   def ExpandNames(self):
6218     self._ExpandAndLockInstance()
6219     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6220     if self.op.nodes:
6221       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6222       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6223     else:
6224       self.needed_locks[locking.LEVEL_NODE] = []
6225
6226   def DeclareLocks(self, level):
6227     if level == locking.LEVEL_NODE:
6228       # if we replace the nodes, we only need to lock the old primary,
6229       # otherwise we need to lock all nodes for disk re-creation
6230       primary_only = bool(self.op.nodes)
6231       self._LockInstancesNodes(primary_only=primary_only)
6232
6233   def BuildHooksEnv(self):
6234     """Build hooks env.
6235
6236     This runs on master, primary and secondary nodes of the instance.
6237
6238     """
6239     return _BuildInstanceHookEnvByObject(self, self.instance)
6240
6241   def BuildHooksNodes(self):
6242     """Build hooks nodes.
6243
6244     """
6245     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6246     return (nl, nl)
6247
6248   def CheckPrereq(self):
6249     """Check prerequisites.
6250
6251     This checks that the instance is in the cluster and is not running.
6252
6253     """
6254     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6255     assert instance is not None, \
6256       "Cannot retrieve locked instance %s" % self.op.instance_name
6257     if self.op.nodes:
6258       if len(self.op.nodes) != len(instance.all_nodes):
6259         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6260                                    " %d replacement nodes were specified" %
6261                                    (instance.name, len(instance.all_nodes),
6262                                     len(self.op.nodes)),
6263                                    errors.ECODE_INVAL)
6264       assert instance.disk_template != constants.DT_DRBD8 or \
6265           len(self.op.nodes) == 2
6266       assert instance.disk_template != constants.DT_PLAIN or \
6267           len(self.op.nodes) == 1
6268       primary_node = self.op.nodes[0]
6269     else:
6270       primary_node = instance.primary_node
6271     _CheckNodeOnline(self, primary_node)
6272
6273     if instance.disk_template == constants.DT_DISKLESS:
6274       raise errors.OpPrereqError("Instance '%s' has no disks" %
6275                                  self.op.instance_name, errors.ECODE_INVAL)
6276     # if we replace nodes *and* the old primary is offline, we don't
6277     # check
6278     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6279     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6280     if not (self.op.nodes and old_pnode.offline):
6281       _CheckInstanceDown(self, instance, "cannot recreate disks")
6282
6283     if not self.op.disks:
6284       self.op.disks = range(len(instance.disks))
6285     else:
6286       for idx in self.op.disks:
6287         if idx >= len(instance.disks):
6288           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6289                                      errors.ECODE_INVAL)
6290     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6291       raise errors.OpPrereqError("Can't recreate disks partially and"
6292                                  " change the nodes at the same time",
6293                                  errors.ECODE_INVAL)
6294     self.instance = instance
6295
6296   def Exec(self, feedback_fn):
6297     """Recreate the disks.
6298
6299     """
6300     instance = self.instance
6301
6302     to_skip = []
6303     mods = [] # keeps track of needed logical_id changes
6304
6305     for idx, disk in enumerate(instance.disks):
6306       if idx not in self.op.disks: # disk idx has not been passed in
6307         to_skip.append(idx)
6308         continue
6309       # update secondaries for disks, if needed
6310       if self.op.nodes:
6311         if disk.dev_type == constants.LD_DRBD8:
6312           # need to update the nodes and minors
6313           assert len(self.op.nodes) == 2
6314           assert len(disk.logical_id) == 6 # otherwise disk internals
6315                                            # have changed
6316           (_, _, old_port, _, _, old_secret) = disk.logical_id
6317           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6318           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6319                     new_minors[0], new_minors[1], old_secret)
6320           assert len(disk.logical_id) == len(new_id)
6321           mods.append((idx, new_id))
6322
6323     # now that we have passed all asserts above, we can apply the mods
6324     # in a single run (to avoid partial changes)
6325     for idx, new_id in mods:
6326       instance.disks[idx].logical_id = new_id
6327
6328     # change primary node, if needed
6329     if self.op.nodes:
6330       instance.primary_node = self.op.nodes[0]
6331       self.LogWarning("Changing the instance's nodes, you will have to"
6332                       " remove any disks left on the older nodes manually")
6333
6334     if self.op.nodes:
6335       self.cfg.Update(instance, feedback_fn)
6336
6337     _CreateDisks(self, instance, to_skip=to_skip)
6338
6339
6340 class LUInstanceRename(LogicalUnit):
6341   """Rename an instance.
6342
6343   """
6344   HPATH = "instance-rename"
6345   HTYPE = constants.HTYPE_INSTANCE
6346
6347   def CheckArguments(self):
6348     """Check arguments.
6349
6350     """
6351     if self.op.ip_check and not self.op.name_check:
6352       # TODO: make the ip check more flexible and not depend on the name check
6353       raise errors.OpPrereqError("IP address check requires a name check",
6354                                  errors.ECODE_INVAL)
6355
6356   def BuildHooksEnv(self):
6357     """Build hooks env.
6358
6359     This runs on master, primary and secondary nodes of the instance.
6360
6361     """
6362     env = _BuildInstanceHookEnvByObject(self, self.instance)
6363     env["INSTANCE_NEW_NAME"] = self.op.new_name
6364     return env
6365
6366   def BuildHooksNodes(self):
6367     """Build hooks nodes.
6368
6369     """
6370     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6371     return (nl, nl)
6372
6373   def CheckPrereq(self):
6374     """Check prerequisites.
6375
6376     This checks that the instance is in the cluster and is not running.
6377
6378     """
6379     self.op.instance_name = _ExpandInstanceName(self.cfg,
6380                                                 self.op.instance_name)
6381     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6382     assert instance is not None
6383     _CheckNodeOnline(self, instance.primary_node)
6384     _CheckInstanceDown(self, instance, "cannot rename")
6385     self.instance = instance
6386
6387     new_name = self.op.new_name
6388     if self.op.name_check:
6389       hostname = netutils.GetHostname(name=new_name)
6390       if hostname != new_name:
6391         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6392                      hostname.name)
6393       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6394         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6395                                     " same as given hostname '%s'") %
6396                                     (hostname.name, self.op.new_name),
6397                                     errors.ECODE_INVAL)
6398       new_name = self.op.new_name = hostname.name
6399       if (self.op.ip_check and
6400           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6401         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6402                                    (hostname.ip, new_name),
6403                                    errors.ECODE_NOTUNIQUE)
6404
6405     instance_list = self.cfg.GetInstanceList()
6406     if new_name in instance_list and new_name != instance.name:
6407       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6408                                  new_name, errors.ECODE_EXISTS)
6409
6410   def Exec(self, feedback_fn):
6411     """Rename the instance.
6412
6413     """
6414     inst = self.instance
6415     old_name = inst.name
6416
6417     rename_file_storage = False
6418     if (inst.disk_template in constants.DTS_FILEBASED and
6419         self.op.new_name != inst.name):
6420       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6421       rename_file_storage = True
6422
6423     self.cfg.RenameInstance(inst.name, self.op.new_name)
6424     # Change the instance lock. This is definitely safe while we hold the BGL.
6425     # Otherwise the new lock would have to be added in acquired mode.
6426     assert self.REQ_BGL
6427     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6428     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6429
6430     # re-read the instance from the configuration after rename
6431     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6432
6433     if rename_file_storage:
6434       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6435       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6436                                                      old_file_storage_dir,
6437                                                      new_file_storage_dir)
6438       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6439                    " (but the instance has been renamed in Ganeti)" %
6440                    (inst.primary_node, old_file_storage_dir,
6441                     new_file_storage_dir))
6442
6443     _StartInstanceDisks(self, inst, None)
6444     try:
6445       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6446                                                  old_name, self.op.debug_level)
6447       msg = result.fail_msg
6448       if msg:
6449         msg = ("Could not run OS rename script for instance %s on node %s"
6450                " (but the instance has been renamed in Ganeti): %s" %
6451                (inst.name, inst.primary_node, msg))
6452         self.proc.LogWarning(msg)
6453     finally:
6454       _ShutdownInstanceDisks(self, inst)
6455
6456     return inst.name
6457
6458
6459 class LUInstanceRemove(LogicalUnit):
6460   """Remove an instance.
6461
6462   """
6463   HPATH = "instance-remove"
6464   HTYPE = constants.HTYPE_INSTANCE
6465   REQ_BGL = False
6466
6467   def ExpandNames(self):
6468     self._ExpandAndLockInstance()
6469     self.needed_locks[locking.LEVEL_NODE] = []
6470     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6471
6472   def DeclareLocks(self, level):
6473     if level == locking.LEVEL_NODE:
6474       self._LockInstancesNodes()
6475
6476   def BuildHooksEnv(self):
6477     """Build hooks env.
6478
6479     This runs on master, primary and secondary nodes of the instance.
6480
6481     """
6482     env = _BuildInstanceHookEnvByObject(self, self.instance)
6483     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6484     return env
6485
6486   def BuildHooksNodes(self):
6487     """Build hooks nodes.
6488
6489     """
6490     nl = [self.cfg.GetMasterNode()]
6491     nl_post = list(self.instance.all_nodes) + nl
6492     return (nl, nl_post)
6493
6494   def CheckPrereq(self):
6495     """Check prerequisites.
6496
6497     This checks that the instance is in the cluster.
6498
6499     """
6500     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6501     assert self.instance is not None, \
6502       "Cannot retrieve locked instance %s" % self.op.instance_name
6503
6504   def Exec(self, feedback_fn):
6505     """Remove the instance.
6506
6507     """
6508     instance = self.instance
6509     logging.info("Shutting down instance %s on node %s",
6510                  instance.name, instance.primary_node)
6511
6512     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6513                                              self.op.shutdown_timeout)
6514     msg = result.fail_msg
6515     if msg:
6516       if self.op.ignore_failures:
6517         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6518       else:
6519         raise errors.OpExecError("Could not shutdown instance %s on"
6520                                  " node %s: %s" %
6521                                  (instance.name, instance.primary_node, msg))
6522
6523     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6524
6525
6526 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6527   """Utility function to remove an instance.
6528
6529   """
6530   logging.info("Removing block devices for instance %s", instance.name)
6531
6532   if not _RemoveDisks(lu, instance):
6533     if not ignore_failures:
6534       raise errors.OpExecError("Can't remove instance's disks")
6535     feedback_fn("Warning: can't remove instance's disks")
6536
6537   logging.info("Removing instance %s out of cluster config", instance.name)
6538
6539   lu.cfg.RemoveInstance(instance.name)
6540
6541   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6542     "Instance lock removal conflict"
6543
6544   # Remove lock for the instance
6545   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6546
6547
6548 class LUInstanceQuery(NoHooksLU):
6549   """Logical unit for querying instances.
6550
6551   """
6552   # pylint: disable-msg=W0142
6553   REQ_BGL = False
6554
6555   def CheckArguments(self):
6556     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6557                              self.op.output_fields, self.op.use_locking)
6558
6559   def ExpandNames(self):
6560     self.iq.ExpandNames(self)
6561
6562   def DeclareLocks(self, level):
6563     self.iq.DeclareLocks(self, level)
6564
6565   def Exec(self, feedback_fn):
6566     return self.iq.OldStyleQuery(self)
6567
6568
6569 class LUInstanceFailover(LogicalUnit):
6570   """Failover an instance.
6571
6572   """
6573   HPATH = "instance-failover"
6574   HTYPE = constants.HTYPE_INSTANCE
6575   REQ_BGL = False
6576
6577   def CheckArguments(self):
6578     """Check the arguments.
6579
6580     """
6581     self.iallocator = getattr(self.op, "iallocator", None)
6582     self.target_node = getattr(self.op, "target_node", None)
6583
6584   def ExpandNames(self):
6585     self._ExpandAndLockInstance()
6586
6587     if self.op.target_node is not None:
6588       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6589
6590     self.needed_locks[locking.LEVEL_NODE] = []
6591     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6592
6593     ignore_consistency = self.op.ignore_consistency
6594     shutdown_timeout = self.op.shutdown_timeout
6595     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6596                                        cleanup=False,
6597                                        failover=True,
6598                                        ignore_consistency=ignore_consistency,
6599                                        shutdown_timeout=shutdown_timeout)
6600     self.tasklets = [self._migrater]
6601
6602   def DeclareLocks(self, level):
6603     if level == locking.LEVEL_NODE:
6604       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6605       if instance.disk_template in constants.DTS_EXT_MIRROR:
6606         if self.op.target_node is None:
6607           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6608         else:
6609           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6610                                                    self.op.target_node]
6611         del self.recalculate_locks[locking.LEVEL_NODE]
6612       else:
6613         self._LockInstancesNodes()
6614
6615   def BuildHooksEnv(self):
6616     """Build hooks env.
6617
6618     This runs on master, primary and secondary nodes of the instance.
6619
6620     """
6621     instance = self._migrater.instance
6622     source_node = instance.primary_node
6623     target_node = self.op.target_node
6624     env = {
6625       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6626       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6627       "OLD_PRIMARY": source_node,
6628       "NEW_PRIMARY": target_node,
6629       }
6630
6631     if instance.disk_template in constants.DTS_INT_MIRROR:
6632       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6633       env["NEW_SECONDARY"] = source_node
6634     else:
6635       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6636
6637     env.update(_BuildInstanceHookEnvByObject(self, instance))
6638
6639     return env
6640
6641   def BuildHooksNodes(self):
6642     """Build hooks nodes.
6643
6644     """
6645     instance = self._migrater.instance
6646     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6647     return (nl, nl + [instance.primary_node])
6648
6649
6650 class LUInstanceMigrate(LogicalUnit):
6651   """Migrate an instance.
6652
6653   This is migration without shutting down, compared to the failover,
6654   which is done with shutdown.
6655
6656   """
6657   HPATH = "instance-migrate"
6658   HTYPE = constants.HTYPE_INSTANCE
6659   REQ_BGL = False
6660
6661   def ExpandNames(self):
6662     self._ExpandAndLockInstance()
6663
6664     if self.op.target_node is not None:
6665       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6666
6667     self.needed_locks[locking.LEVEL_NODE] = []
6668     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6669
6670     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6671                                        cleanup=self.op.cleanup,
6672                                        failover=False,
6673                                        fallback=self.op.allow_failover)
6674     self.tasklets = [self._migrater]
6675
6676   def DeclareLocks(self, level):
6677     if level == locking.LEVEL_NODE:
6678       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6679       if instance.disk_template in constants.DTS_EXT_MIRROR:
6680         if self.op.target_node is None:
6681           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6682         else:
6683           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6684                                                    self.op.target_node]
6685         del self.recalculate_locks[locking.LEVEL_NODE]
6686       else:
6687         self._LockInstancesNodes()
6688
6689   def BuildHooksEnv(self):
6690     """Build hooks env.
6691
6692     This runs on master, primary and secondary nodes of the instance.
6693
6694     """
6695     instance = self._migrater.instance
6696     source_node = instance.primary_node
6697     target_node = self.op.target_node
6698     env = _BuildInstanceHookEnvByObject(self, instance)
6699     env.update({
6700       "MIGRATE_LIVE": self._migrater.live,
6701       "MIGRATE_CLEANUP": self.op.cleanup,
6702       "OLD_PRIMARY": source_node,
6703       "NEW_PRIMARY": target_node,
6704       })
6705
6706     if instance.disk_template in constants.DTS_INT_MIRROR:
6707       env["OLD_SECONDARY"] = target_node
6708       env["NEW_SECONDARY"] = source_node
6709     else:
6710       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6711
6712     return env
6713
6714   def BuildHooksNodes(self):
6715     """Build hooks nodes.
6716
6717     """
6718     instance = self._migrater.instance
6719     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6720     return (nl, nl + [instance.primary_node])
6721
6722
6723 class LUInstanceMove(LogicalUnit):
6724   """Move an instance by data-copying.
6725
6726   """
6727   HPATH = "instance-move"
6728   HTYPE = constants.HTYPE_INSTANCE
6729   REQ_BGL = False
6730
6731   def ExpandNames(self):
6732     self._ExpandAndLockInstance()
6733     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6734     self.op.target_node = target_node
6735     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6736     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6737
6738   def DeclareLocks(self, level):
6739     if level == locking.LEVEL_NODE:
6740       self._LockInstancesNodes(primary_only=True)
6741
6742   def BuildHooksEnv(self):
6743     """Build hooks env.
6744
6745     This runs on master, primary and secondary nodes of the instance.
6746
6747     """
6748     env = {
6749       "TARGET_NODE": self.op.target_node,
6750       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6751       }
6752     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6753     return env
6754
6755   def BuildHooksNodes(self):
6756     """Build hooks nodes.
6757
6758     """
6759     nl = [
6760       self.cfg.GetMasterNode(),
6761       self.instance.primary_node,
6762       self.op.target_node,
6763       ]
6764     return (nl, nl)
6765
6766   def CheckPrereq(self):
6767     """Check prerequisites.
6768
6769     This checks that the instance is in the cluster.
6770
6771     """
6772     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6773     assert self.instance is not None, \
6774       "Cannot retrieve locked instance %s" % self.op.instance_name
6775
6776     node = self.cfg.GetNodeInfo(self.op.target_node)
6777     assert node is not None, \
6778       "Cannot retrieve locked node %s" % self.op.target_node
6779
6780     self.target_node = target_node = node.name
6781
6782     if target_node == instance.primary_node:
6783       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6784                                  (instance.name, target_node),
6785                                  errors.ECODE_STATE)
6786
6787     bep = self.cfg.GetClusterInfo().FillBE(instance)
6788
6789     for idx, dsk in enumerate(instance.disks):
6790       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6791         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6792                                    " cannot copy" % idx, errors.ECODE_STATE)
6793
6794     _CheckNodeOnline(self, target_node)
6795     _CheckNodeNotDrained(self, target_node)
6796     _CheckNodeVmCapable(self, target_node)
6797
6798     if instance.admin_up:
6799       # check memory requirements on the secondary node
6800       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6801                            instance.name, bep[constants.BE_MEMORY],
6802                            instance.hypervisor)
6803     else:
6804       self.LogInfo("Not checking memory on the secondary node as"
6805                    " instance will not be started")
6806
6807     # check bridge existance
6808     _CheckInstanceBridgesExist(self, instance, node=target_node)
6809
6810   def Exec(self, feedback_fn):
6811     """Move an instance.
6812
6813     The move is done by shutting it down on its present node, copying
6814     the data over (slow) and starting it on the new node.
6815
6816     """
6817     instance = self.instance
6818
6819     source_node = instance.primary_node
6820     target_node = self.target_node
6821
6822     self.LogInfo("Shutting down instance %s on source node %s",
6823                  instance.name, source_node)
6824
6825     result = self.rpc.call_instance_shutdown(source_node, instance,
6826                                              self.op.shutdown_timeout)
6827     msg = result.fail_msg
6828     if msg:
6829       if self.op.ignore_consistency:
6830         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6831                              " Proceeding anyway. Please make sure node"
6832                              " %s is down. Error details: %s",
6833                              instance.name, source_node, source_node, msg)
6834       else:
6835         raise errors.OpExecError("Could not shutdown instance %s on"
6836                                  " node %s: %s" %
6837                                  (instance.name, source_node, msg))
6838
6839     # create the target disks
6840     try:
6841       _CreateDisks(self, instance, target_node=target_node)
6842     except errors.OpExecError:
6843       self.LogWarning("Device creation failed, reverting...")
6844       try:
6845         _RemoveDisks(self, instance, target_node=target_node)
6846       finally:
6847         self.cfg.ReleaseDRBDMinors(instance.name)
6848         raise
6849
6850     cluster_name = self.cfg.GetClusterInfo().cluster_name
6851
6852     errs = []
6853     # activate, get path, copy the data over
6854     for idx, disk in enumerate(instance.disks):
6855       self.LogInfo("Copying data for disk %d", idx)
6856       result = self.rpc.call_blockdev_assemble(target_node, disk,
6857                                                instance.name, True, idx)
6858       if result.fail_msg:
6859         self.LogWarning("Can't assemble newly created disk %d: %s",
6860                         idx, result.fail_msg)
6861         errs.append(result.fail_msg)
6862         break
6863       dev_path = result.payload
6864       result = self.rpc.call_blockdev_export(source_node, disk,
6865                                              target_node, dev_path,
6866                                              cluster_name)
6867       if result.fail_msg:
6868         self.LogWarning("Can't copy data over for disk %d: %s",
6869                         idx, result.fail_msg)
6870         errs.append(result.fail_msg)
6871         break
6872
6873     if errs:
6874       self.LogWarning("Some disks failed to copy, aborting")
6875       try:
6876         _RemoveDisks(self, instance, target_node=target_node)
6877       finally:
6878         self.cfg.ReleaseDRBDMinors(instance.name)
6879         raise errors.OpExecError("Errors during disk copy: %s" %
6880                                  (",".join(errs),))
6881
6882     instance.primary_node = target_node
6883     self.cfg.Update(instance, feedback_fn)
6884
6885     self.LogInfo("Removing the disks on the original node")
6886     _RemoveDisks(self, instance, target_node=source_node)
6887
6888     # Only start the instance if it's marked as up
6889     if instance.admin_up:
6890       self.LogInfo("Starting instance %s on node %s",
6891                    instance.name, target_node)
6892
6893       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6894                                            ignore_secondaries=True)
6895       if not disks_ok:
6896         _ShutdownInstanceDisks(self, instance)
6897         raise errors.OpExecError("Can't activate the instance's disks")
6898
6899       result = self.rpc.call_instance_start(target_node, instance,
6900                                             None, None, False)
6901       msg = result.fail_msg
6902       if msg:
6903         _ShutdownInstanceDisks(self, instance)
6904         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6905                                  (instance.name, target_node, msg))
6906
6907
6908 class LUNodeMigrate(LogicalUnit):
6909   """Migrate all instances from a node.
6910
6911   """
6912   HPATH = "node-migrate"
6913   HTYPE = constants.HTYPE_NODE
6914   REQ_BGL = False
6915
6916   def CheckArguments(self):
6917     pass
6918
6919   def ExpandNames(self):
6920     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6921
6922     self.share_locks = _ShareAll()
6923     self.needed_locks = {
6924       locking.LEVEL_NODE: [self.op.node_name],
6925       }
6926
6927   def BuildHooksEnv(self):
6928     """Build hooks env.
6929
6930     This runs on the master, the primary and all the secondaries.
6931
6932     """
6933     return {
6934       "NODE_NAME": self.op.node_name,
6935       }
6936
6937   def BuildHooksNodes(self):
6938     """Build hooks nodes.
6939
6940     """
6941     nl = [self.cfg.GetMasterNode()]
6942     return (nl, nl)
6943
6944   def CheckPrereq(self):
6945     pass
6946
6947   def Exec(self, feedback_fn):
6948     # Prepare jobs for migration instances
6949     jobs = [
6950       [opcodes.OpInstanceMigrate(instance_name=inst.name,
6951                                  mode=self.op.mode,
6952                                  live=self.op.live,
6953                                  iallocator=self.op.iallocator,
6954                                  target_node=self.op.target_node)]
6955       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6956       ]
6957
6958     # TODO: Run iallocator in this opcode and pass correct placement options to
6959     # OpInstanceMigrate. Since other jobs can modify the cluster between
6960     # running the iallocator and the actual migration, a good consistency model
6961     # will have to be found.
6962
6963     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
6964             frozenset([self.op.node_name]))
6965
6966     return ResultWithJobs(jobs)
6967
6968
6969 class TLMigrateInstance(Tasklet):
6970   """Tasklet class for instance migration.
6971
6972   @type live: boolean
6973   @ivar live: whether the migration will be done live or non-live;
6974       this variable is initalized only after CheckPrereq has run
6975   @type cleanup: boolean
6976   @ivar cleanup: Wheater we cleanup from a failed migration
6977   @type iallocator: string
6978   @ivar iallocator: The iallocator used to determine target_node
6979   @type target_node: string
6980   @ivar target_node: If given, the target_node to reallocate the instance to
6981   @type failover: boolean
6982   @ivar failover: Whether operation results in failover or migration
6983   @type fallback: boolean
6984   @ivar fallback: Whether fallback to failover is allowed if migration not
6985                   possible
6986   @type ignore_consistency: boolean
6987   @ivar ignore_consistency: Wheter we should ignore consistency between source
6988                             and target node
6989   @type shutdown_timeout: int
6990   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6991
6992   """
6993   def __init__(self, lu, instance_name, cleanup=False,
6994                failover=False, fallback=False,
6995                ignore_consistency=False,
6996                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6997     """Initializes this class.
6998
6999     """
7000     Tasklet.__init__(self, lu)
7001
7002     # Parameters
7003     self.instance_name = instance_name
7004     self.cleanup = cleanup
7005     self.live = False # will be overridden later
7006     self.failover = failover
7007     self.fallback = fallback
7008     self.ignore_consistency = ignore_consistency
7009     self.shutdown_timeout = shutdown_timeout
7010
7011   def CheckPrereq(self):
7012     """Check prerequisites.
7013
7014     This checks that the instance is in the cluster.
7015
7016     """
7017     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7018     instance = self.cfg.GetInstanceInfo(instance_name)
7019     assert instance is not None
7020     self.instance = instance
7021
7022     if (not self.cleanup and not instance.admin_up and not self.failover and
7023         self.fallback):
7024       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7025                       " to failover")
7026       self.failover = True
7027
7028     if instance.disk_template not in constants.DTS_MIRRORED:
7029       if self.failover:
7030         text = "failovers"
7031       else:
7032         text = "migrations"
7033       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7034                                  " %s" % (instance.disk_template, text),
7035                                  errors.ECODE_STATE)
7036
7037     if instance.disk_template in constants.DTS_EXT_MIRROR:
7038       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7039
7040       if self.lu.op.iallocator:
7041         self._RunAllocator()
7042       else:
7043         # We set set self.target_node as it is required by
7044         # BuildHooksEnv
7045         self.target_node = self.lu.op.target_node
7046
7047       # self.target_node is already populated, either directly or by the
7048       # iallocator run
7049       target_node = self.target_node
7050       if self.target_node == instance.primary_node:
7051         raise errors.OpPrereqError("Cannot migrate instance %s"
7052                                    " to its primary (%s)" %
7053                                    (instance.name, instance.primary_node))
7054
7055       if len(self.lu.tasklets) == 1:
7056         # It is safe to release locks only when we're the only tasklet
7057         # in the LU
7058         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7059                       keep=[instance.primary_node, self.target_node])
7060
7061     else:
7062       secondary_nodes = instance.secondary_nodes
7063       if not secondary_nodes:
7064         raise errors.ConfigurationError("No secondary node but using"
7065                                         " %s disk template" %
7066                                         instance.disk_template)
7067       target_node = secondary_nodes[0]
7068       if self.lu.op.iallocator or (self.lu.op.target_node and
7069                                    self.lu.op.target_node != target_node):
7070         if self.failover:
7071           text = "failed over"
7072         else:
7073           text = "migrated"
7074         raise errors.OpPrereqError("Instances with disk template %s cannot"
7075                                    " be %s to arbitrary nodes"
7076                                    " (neither an iallocator nor a target"
7077                                    " node can be passed)" %
7078                                    (instance.disk_template, text),
7079                                    errors.ECODE_INVAL)
7080
7081     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7082
7083     # check memory requirements on the secondary node
7084     if not self.failover or instance.admin_up:
7085       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7086                            instance.name, i_be[constants.BE_MEMORY],
7087                            instance.hypervisor)
7088     else:
7089       self.lu.LogInfo("Not checking memory on the secondary node as"
7090                       " instance will not be started")
7091
7092     # check bridge existance
7093     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7094
7095     if not self.cleanup:
7096       _CheckNodeNotDrained(self.lu, target_node)
7097       if not self.failover:
7098         result = self.rpc.call_instance_migratable(instance.primary_node,
7099                                                    instance)
7100         if result.fail_msg and self.fallback:
7101           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7102                           " failover")
7103           self.failover = True
7104         else:
7105           result.Raise("Can't migrate, please use failover",
7106                        prereq=True, ecode=errors.ECODE_STATE)
7107
7108     assert not (self.failover and self.cleanup)
7109
7110     if not self.failover:
7111       if self.lu.op.live is not None and self.lu.op.mode is not None:
7112         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7113                                    " parameters are accepted",
7114                                    errors.ECODE_INVAL)
7115       if self.lu.op.live is not None:
7116         if self.lu.op.live:
7117           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7118         else:
7119           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7120         # reset the 'live' parameter to None so that repeated
7121         # invocations of CheckPrereq do not raise an exception
7122         self.lu.op.live = None
7123       elif self.lu.op.mode is None:
7124         # read the default value from the hypervisor
7125         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7126                                                 skip_globals=False)
7127         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7128
7129       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7130     else:
7131       # Failover is never live
7132       self.live = False
7133
7134   def _RunAllocator(self):
7135     """Run the allocator based on input opcode.
7136
7137     """
7138     ial = IAllocator(self.cfg, self.rpc,
7139                      mode=constants.IALLOCATOR_MODE_RELOC,
7140                      name=self.instance_name,
7141                      # TODO See why hail breaks with a single node below
7142                      relocate_from=[self.instance.primary_node,
7143                                     self.instance.primary_node],
7144                      )
7145
7146     ial.Run(self.lu.op.iallocator)
7147
7148     if not ial.success:
7149       raise errors.OpPrereqError("Can't compute nodes using"
7150                                  " iallocator '%s': %s" %
7151                                  (self.lu.op.iallocator, ial.info),
7152                                  errors.ECODE_NORES)
7153     if len(ial.result) != ial.required_nodes:
7154       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7155                                  " of nodes (%s), required %s" %
7156                                  (self.lu.op.iallocator, len(ial.result),
7157                                   ial.required_nodes), errors.ECODE_FAULT)
7158     self.target_node = ial.result[0]
7159     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7160                  self.instance_name, self.lu.op.iallocator,
7161                  utils.CommaJoin(ial.result))
7162
7163   def _WaitUntilSync(self):
7164     """Poll with custom rpc for disk sync.
7165
7166     This uses our own step-based rpc call.
7167
7168     """
7169     self.feedback_fn("* wait until resync is done")
7170     all_done = False
7171     while not all_done:
7172       all_done = True
7173       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7174                                             self.nodes_ip,
7175                                             self.instance.disks)
7176       min_percent = 100
7177       for node, nres in result.items():
7178         nres.Raise("Cannot resync disks on node %s" % node)
7179         node_done, node_percent = nres.payload
7180         all_done = all_done and node_done
7181         if node_percent is not None:
7182           min_percent = min(min_percent, node_percent)
7183       if not all_done:
7184         if min_percent < 100:
7185           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7186         time.sleep(2)
7187
7188   def _EnsureSecondary(self, node):
7189     """Demote a node to secondary.
7190
7191     """
7192     self.feedback_fn("* switching node %s to secondary mode" % node)
7193
7194     for dev in self.instance.disks:
7195       self.cfg.SetDiskID(dev, node)
7196
7197     result = self.rpc.call_blockdev_close(node, self.instance.name,
7198                                           self.instance.disks)
7199     result.Raise("Cannot change disk to secondary on node %s" % node)
7200
7201   def _GoStandalone(self):
7202     """Disconnect from the network.
7203
7204     """
7205     self.feedback_fn("* changing into standalone mode")
7206     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7207                                                self.instance.disks)
7208     for node, nres in result.items():
7209       nres.Raise("Cannot disconnect disks node %s" % node)
7210
7211   def _GoReconnect(self, multimaster):
7212     """Reconnect to the network.
7213
7214     """
7215     if multimaster:
7216       msg = "dual-master"
7217     else:
7218       msg = "single-master"
7219     self.feedback_fn("* changing disks into %s mode" % msg)
7220     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7221                                            self.instance.disks,
7222                                            self.instance.name, multimaster)
7223     for node, nres in result.items():
7224       nres.Raise("Cannot change disks config on node %s" % node)
7225
7226   def _ExecCleanup(self):
7227     """Try to cleanup after a failed migration.
7228
7229     The cleanup is done by:
7230       - check that the instance is running only on one node
7231         (and update the config if needed)
7232       - change disks on its secondary node to secondary
7233       - wait until disks are fully synchronized
7234       - disconnect from the network
7235       - change disks into single-master mode
7236       - wait again until disks are fully synchronized
7237
7238     """
7239     instance = self.instance
7240     target_node = self.target_node
7241     source_node = self.source_node
7242
7243     # check running on only one node
7244     self.feedback_fn("* checking where the instance actually runs"
7245                      " (if this hangs, the hypervisor might be in"
7246                      " a bad state)")
7247     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7248     for node, result in ins_l.items():
7249       result.Raise("Can't contact node %s" % node)
7250
7251     runningon_source = instance.name in ins_l[source_node].payload
7252     runningon_target = instance.name in ins_l[target_node].payload
7253
7254     if runningon_source and runningon_target:
7255       raise errors.OpExecError("Instance seems to be running on two nodes,"
7256                                " or the hypervisor is confused; you will have"
7257                                " to ensure manually that it runs only on one"
7258                                " and restart this operation")
7259
7260     if not (runningon_source or runningon_target):
7261       raise errors.OpExecError("Instance does not seem to be running at all;"
7262                                " in this case it's safer to repair by"
7263                                " running 'gnt-instance stop' to ensure disk"
7264                                " shutdown, and then restarting it")
7265
7266     if runningon_target:
7267       # the migration has actually succeeded, we need to update the config
7268       self.feedback_fn("* instance running on secondary node (%s),"
7269                        " updating config" % target_node)
7270       instance.primary_node = target_node
7271       self.cfg.Update(instance, self.feedback_fn)
7272       demoted_node = source_node
7273     else:
7274       self.feedback_fn("* instance confirmed to be running on its"
7275                        " primary node (%s)" % source_node)
7276       demoted_node = target_node
7277
7278     if instance.disk_template in constants.DTS_INT_MIRROR:
7279       self._EnsureSecondary(demoted_node)
7280       try:
7281         self._WaitUntilSync()
7282       except errors.OpExecError:
7283         # we ignore here errors, since if the device is standalone, it
7284         # won't be able to sync
7285         pass
7286       self._GoStandalone()
7287       self._GoReconnect(False)
7288       self._WaitUntilSync()
7289
7290     self.feedback_fn("* done")
7291
7292   def _RevertDiskStatus(self):
7293     """Try to revert the disk status after a failed migration.
7294
7295     """
7296     target_node = self.target_node
7297     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7298       return
7299
7300     try:
7301       self._EnsureSecondary(target_node)
7302       self._GoStandalone()
7303       self._GoReconnect(False)
7304       self._WaitUntilSync()
7305     except errors.OpExecError, err:
7306       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7307                          " please try to recover the instance manually;"
7308                          " error '%s'" % str(err))
7309
7310   def _AbortMigration(self):
7311     """Call the hypervisor code to abort a started migration.
7312
7313     """
7314     instance = self.instance
7315     target_node = self.target_node
7316     migration_info = self.migration_info
7317
7318     abort_result = self.rpc.call_finalize_migration(target_node,
7319                                                     instance,
7320                                                     migration_info,
7321                                                     False)
7322     abort_msg = abort_result.fail_msg
7323     if abort_msg:
7324       logging.error("Aborting migration failed on target node %s: %s",
7325                     target_node, abort_msg)
7326       # Don't raise an exception here, as we stil have to try to revert the
7327       # disk status, even if this step failed.
7328
7329   def _ExecMigration(self):
7330     """Migrate an instance.
7331
7332     The migrate is done by:
7333       - change the disks into dual-master mode
7334       - wait until disks are fully synchronized again
7335       - migrate the instance
7336       - change disks on the new secondary node (the old primary) to secondary
7337       - wait until disks are fully synchronized
7338       - change disks into single-master mode
7339
7340     """
7341     instance = self.instance
7342     target_node = self.target_node
7343     source_node = self.source_node
7344
7345     self.feedback_fn("* checking disk consistency between source and target")
7346     for dev in instance.disks:
7347       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7348         raise errors.OpExecError("Disk %s is degraded or not fully"
7349                                  " synchronized on target node,"
7350                                  " aborting migration" % dev.iv_name)
7351
7352     # First get the migration information from the remote node
7353     result = self.rpc.call_migration_info(source_node, instance)
7354     msg = result.fail_msg
7355     if msg:
7356       log_err = ("Failed fetching source migration information from %s: %s" %
7357                  (source_node, msg))
7358       logging.error(log_err)
7359       raise errors.OpExecError(log_err)
7360
7361     self.migration_info = migration_info = result.payload
7362
7363     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7364       # Then switch the disks to master/master mode
7365       self._EnsureSecondary(target_node)
7366       self._GoStandalone()
7367       self._GoReconnect(True)
7368       self._WaitUntilSync()
7369
7370     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7371     result = self.rpc.call_accept_instance(target_node,
7372                                            instance,
7373                                            migration_info,
7374                                            self.nodes_ip[target_node])
7375
7376     msg = result.fail_msg
7377     if msg:
7378       logging.error("Instance pre-migration failed, trying to revert"
7379                     " disk status: %s", msg)
7380       self.feedback_fn("Pre-migration failed, aborting")
7381       self._AbortMigration()
7382       self._RevertDiskStatus()
7383       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7384                                (instance.name, msg))
7385
7386     self.feedback_fn("* migrating instance to %s" % target_node)
7387     result = self.rpc.call_instance_migrate(source_node, instance,
7388                                             self.nodes_ip[target_node],
7389                                             self.live)
7390     msg = result.fail_msg
7391     if msg:
7392       logging.error("Instance migration failed, trying to revert"
7393                     " disk status: %s", msg)
7394       self.feedback_fn("Migration failed, aborting")
7395       self._AbortMigration()
7396       self._RevertDiskStatus()
7397       raise errors.OpExecError("Could not migrate instance %s: %s" %
7398                                (instance.name, msg))
7399
7400     instance.primary_node = target_node
7401     # distribute new instance config to the other nodes
7402     self.cfg.Update(instance, self.feedback_fn)
7403
7404     result = self.rpc.call_finalize_migration(target_node,
7405                                               instance,
7406                                               migration_info,
7407                                               True)
7408     msg = result.fail_msg
7409     if msg:
7410       logging.error("Instance migration succeeded, but finalization failed:"
7411                     " %s", msg)
7412       raise errors.OpExecError("Could not finalize instance migration: %s" %
7413                                msg)
7414
7415     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7416       self._EnsureSecondary(source_node)
7417       self._WaitUntilSync()
7418       self._GoStandalone()
7419       self._GoReconnect(False)
7420       self._WaitUntilSync()
7421
7422     self.feedback_fn("* done")
7423
7424   def _ExecFailover(self):
7425     """Failover an instance.
7426
7427     The failover is done by shutting it down on its present node and
7428     starting it on the secondary.
7429
7430     """
7431     instance = self.instance
7432     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7433
7434     source_node = instance.primary_node
7435     target_node = self.target_node
7436
7437     if instance.admin_up:
7438       self.feedback_fn("* checking disk consistency between source and target")
7439       for dev in instance.disks:
7440         # for drbd, these are drbd over lvm
7441         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7442           if primary_node.offline:
7443             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7444                              " target node %s" %
7445                              (primary_node.name, dev.iv_name, target_node))
7446           elif not self.ignore_consistency:
7447             raise errors.OpExecError("Disk %s is degraded on target node,"
7448                                      " aborting failover" % dev.iv_name)
7449     else:
7450       self.feedback_fn("* not checking disk consistency as instance is not"
7451                        " running")
7452
7453     self.feedback_fn("* shutting down instance on source node")
7454     logging.info("Shutting down instance %s on node %s",
7455                  instance.name, source_node)
7456
7457     result = self.rpc.call_instance_shutdown(source_node, instance,
7458                                              self.shutdown_timeout)
7459     msg = result.fail_msg
7460     if msg:
7461       if self.ignore_consistency or primary_node.offline:
7462         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7463                            " proceeding anyway; please make sure node"
7464                            " %s is down; error details: %s",
7465                            instance.name, source_node, source_node, msg)
7466       else:
7467         raise errors.OpExecError("Could not shutdown instance %s on"
7468                                  " node %s: %s" %
7469                                  (instance.name, source_node, msg))
7470
7471     self.feedback_fn("* deactivating the instance's disks on source node")
7472     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7473       raise errors.OpExecError("Can't shut down the instance's disks")
7474
7475     instance.primary_node = target_node
7476     # distribute new instance config to the other nodes
7477     self.cfg.Update(instance, self.feedback_fn)
7478
7479     # Only start the instance if it's marked as up
7480     if instance.admin_up:
7481       self.feedback_fn("* activating the instance's disks on target node %s" %
7482                        target_node)
7483       logging.info("Starting instance %s on node %s",
7484                    instance.name, target_node)
7485
7486       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7487                                            ignore_secondaries=True)
7488       if not disks_ok:
7489         _ShutdownInstanceDisks(self.lu, instance)
7490         raise errors.OpExecError("Can't activate the instance's disks")
7491
7492       self.feedback_fn("* starting the instance on the target node %s" %
7493                        target_node)
7494       result = self.rpc.call_instance_start(target_node, instance, None, None,
7495                                             False)
7496       msg = result.fail_msg
7497       if msg:
7498         _ShutdownInstanceDisks(self.lu, instance)
7499         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7500                                  (instance.name, target_node, msg))
7501
7502   def Exec(self, feedback_fn):
7503     """Perform the migration.
7504
7505     """
7506     self.feedback_fn = feedback_fn
7507     self.source_node = self.instance.primary_node
7508
7509     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7510     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7511       self.target_node = self.instance.secondary_nodes[0]
7512       # Otherwise self.target_node has been populated either
7513       # directly, or through an iallocator.
7514
7515     self.all_nodes = [self.source_node, self.target_node]
7516     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7517                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7518
7519     if self.failover:
7520       feedback_fn("Failover instance %s" % self.instance.name)
7521       self._ExecFailover()
7522     else:
7523       feedback_fn("Migrating instance %s" % self.instance.name)
7524
7525       if self.cleanup:
7526         return self._ExecCleanup()
7527       else:
7528         return self._ExecMigration()
7529
7530
7531 def _CreateBlockDev(lu, node, instance, device, force_create,
7532                     info, force_open):
7533   """Create a tree of block devices on a given node.
7534
7535   If this device type has to be created on secondaries, create it and
7536   all its children.
7537
7538   If not, just recurse to children keeping the same 'force' value.
7539
7540   @param lu: the lu on whose behalf we execute
7541   @param node: the node on which to create the device
7542   @type instance: L{objects.Instance}
7543   @param instance: the instance which owns the device
7544   @type device: L{objects.Disk}
7545   @param device: the device to create
7546   @type force_create: boolean
7547   @param force_create: whether to force creation of this device; this
7548       will be change to True whenever we find a device which has
7549       CreateOnSecondary() attribute
7550   @param info: the extra 'metadata' we should attach to the device
7551       (this will be represented as a LVM tag)
7552   @type force_open: boolean
7553   @param force_open: this parameter will be passes to the
7554       L{backend.BlockdevCreate} function where it specifies
7555       whether we run on primary or not, and it affects both
7556       the child assembly and the device own Open() execution
7557
7558   """
7559   if device.CreateOnSecondary():
7560     force_create = True
7561
7562   if device.children:
7563     for child in device.children:
7564       _CreateBlockDev(lu, node, instance, child, force_create,
7565                       info, force_open)
7566
7567   if not force_create:
7568     return
7569
7570   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7571
7572
7573 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7574   """Create a single block device on a given node.
7575
7576   This will not recurse over children of the device, so they must be
7577   created in advance.
7578
7579   @param lu: the lu on whose behalf we execute
7580   @param node: the node on which to create the device
7581   @type instance: L{objects.Instance}
7582   @param instance: the instance which owns the device
7583   @type device: L{objects.Disk}
7584   @param device: the device to create
7585   @param info: the extra 'metadata' we should attach to the device
7586       (this will be represented as a LVM tag)
7587   @type force_open: boolean
7588   @param force_open: this parameter will be passes to the
7589       L{backend.BlockdevCreate} function where it specifies
7590       whether we run on primary or not, and it affects both
7591       the child assembly and the device own Open() execution
7592
7593   """
7594   lu.cfg.SetDiskID(device, node)
7595   result = lu.rpc.call_blockdev_create(node, device, device.size,
7596                                        instance.name, force_open, info)
7597   result.Raise("Can't create block device %s on"
7598                " node %s for instance %s" % (device, node, instance.name))
7599   if device.physical_id is None:
7600     device.physical_id = result.payload
7601
7602
7603 def _GenerateUniqueNames(lu, exts):
7604   """Generate a suitable LV name.
7605
7606   This will generate a logical volume name for the given instance.
7607
7608   """
7609   results = []
7610   for val in exts:
7611     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7612     results.append("%s%s" % (new_id, val))
7613   return results
7614
7615
7616 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7617                          iv_name, p_minor, s_minor):
7618   """Generate a drbd8 device complete with its children.
7619
7620   """
7621   assert len(vgnames) == len(names) == 2
7622   port = lu.cfg.AllocatePort()
7623   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7624   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7625                           logical_id=(vgnames[0], names[0]))
7626   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7627                           logical_id=(vgnames[1], names[1]))
7628   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7629                           logical_id=(primary, secondary, port,
7630                                       p_minor, s_minor,
7631                                       shared_secret),
7632                           children=[dev_data, dev_meta],
7633                           iv_name=iv_name)
7634   return drbd_dev
7635
7636
7637 def _GenerateDiskTemplate(lu, template_name,
7638                           instance_name, primary_node,
7639                           secondary_nodes, disk_info,
7640                           file_storage_dir, file_driver,
7641                           base_index, feedback_fn):
7642   """Generate the entire disk layout for a given template type.
7643
7644   """
7645   #TODO: compute space requirements
7646
7647   vgname = lu.cfg.GetVGName()
7648   disk_count = len(disk_info)
7649   disks = []
7650   if template_name == constants.DT_DISKLESS:
7651     pass
7652   elif template_name == constants.DT_PLAIN:
7653     if len(secondary_nodes) != 0:
7654       raise errors.ProgrammerError("Wrong template configuration")
7655
7656     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7657                                       for i in range(disk_count)])
7658     for idx, disk in enumerate(disk_info):
7659       disk_index = idx + base_index
7660       vg = disk.get(constants.IDISK_VG, vgname)
7661       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7662       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7663                               size=disk[constants.IDISK_SIZE],
7664                               logical_id=(vg, names[idx]),
7665                               iv_name="disk/%d" % disk_index,
7666                               mode=disk[constants.IDISK_MODE])
7667       disks.append(disk_dev)
7668   elif template_name == constants.DT_DRBD8:
7669     if len(secondary_nodes) != 1:
7670       raise errors.ProgrammerError("Wrong template configuration")
7671     remote_node = secondary_nodes[0]
7672     minors = lu.cfg.AllocateDRBDMinor(
7673       [primary_node, remote_node] * len(disk_info), instance_name)
7674
7675     names = []
7676     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7677                                                for i in range(disk_count)]):
7678       names.append(lv_prefix + "_data")
7679       names.append(lv_prefix + "_meta")
7680     for idx, disk in enumerate(disk_info):
7681       disk_index = idx + base_index
7682       data_vg = disk.get(constants.IDISK_VG, vgname)
7683       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7684       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7685                                       disk[constants.IDISK_SIZE],
7686                                       [data_vg, meta_vg],
7687                                       names[idx * 2:idx * 2 + 2],
7688                                       "disk/%d" % disk_index,
7689                                       minors[idx * 2], minors[idx * 2 + 1])
7690       disk_dev.mode = disk[constants.IDISK_MODE]
7691       disks.append(disk_dev)
7692   elif template_name == constants.DT_FILE:
7693     if len(secondary_nodes) != 0:
7694       raise errors.ProgrammerError("Wrong template configuration")
7695
7696     opcodes.RequireFileStorage()
7697
7698     for idx, disk in enumerate(disk_info):
7699       disk_index = idx + base_index
7700       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7701                               size=disk[constants.IDISK_SIZE],
7702                               iv_name="disk/%d" % disk_index,
7703                               logical_id=(file_driver,
7704                                           "%s/disk%d" % (file_storage_dir,
7705                                                          disk_index)),
7706                               mode=disk[constants.IDISK_MODE])
7707       disks.append(disk_dev)
7708   elif template_name == constants.DT_SHARED_FILE:
7709     if len(secondary_nodes) != 0:
7710       raise errors.ProgrammerError("Wrong template configuration")
7711
7712     opcodes.RequireSharedFileStorage()
7713
7714     for idx, disk in enumerate(disk_info):
7715       disk_index = idx + base_index
7716       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7717                               size=disk[constants.IDISK_SIZE],
7718                               iv_name="disk/%d" % disk_index,
7719                               logical_id=(file_driver,
7720                                           "%s/disk%d" % (file_storage_dir,
7721                                                          disk_index)),
7722                               mode=disk[constants.IDISK_MODE])
7723       disks.append(disk_dev)
7724   elif template_name == constants.DT_BLOCK:
7725     if len(secondary_nodes) != 0:
7726       raise errors.ProgrammerError("Wrong template configuration")
7727
7728     for idx, disk in enumerate(disk_info):
7729       disk_index = idx + base_index
7730       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7731                               size=disk[constants.IDISK_SIZE],
7732                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7733                                           disk[constants.IDISK_ADOPT]),
7734                               iv_name="disk/%d" % disk_index,
7735                               mode=disk[constants.IDISK_MODE])
7736       disks.append(disk_dev)
7737
7738   else:
7739     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7740   return disks
7741
7742
7743 def _GetInstanceInfoText(instance):
7744   """Compute that text that should be added to the disk's metadata.
7745
7746   """
7747   return "originstname+%s" % instance.name
7748
7749
7750 def _CalcEta(time_taken, written, total_size):
7751   """Calculates the ETA based on size written and total size.
7752
7753   @param time_taken: The time taken so far
7754   @param written: amount written so far
7755   @param total_size: The total size of data to be written
7756   @return: The remaining time in seconds
7757
7758   """
7759   avg_time = time_taken / float(written)
7760   return (total_size - written) * avg_time
7761
7762
7763 def _WipeDisks(lu, instance):
7764   """Wipes instance disks.
7765
7766   @type lu: L{LogicalUnit}
7767   @param lu: the logical unit on whose behalf we execute
7768   @type instance: L{objects.Instance}
7769   @param instance: the instance whose disks we should create
7770   @return: the success of the wipe
7771
7772   """
7773   node = instance.primary_node
7774
7775   for device in instance.disks:
7776     lu.cfg.SetDiskID(device, node)
7777
7778   logging.info("Pause sync of instance %s disks", instance.name)
7779   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7780
7781   for idx, success in enumerate(result.payload):
7782     if not success:
7783       logging.warn("pause-sync of instance %s for disks %d failed",
7784                    instance.name, idx)
7785
7786   try:
7787     for idx, device in enumerate(instance.disks):
7788       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7789       # MAX_WIPE_CHUNK at max
7790       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7791                             constants.MIN_WIPE_CHUNK_PERCENT)
7792       # we _must_ make this an int, otherwise rounding errors will
7793       # occur
7794       wipe_chunk_size = int(wipe_chunk_size)
7795
7796       lu.LogInfo("* Wiping disk %d", idx)
7797       logging.info("Wiping disk %d for instance %s, node %s using"
7798                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7799
7800       offset = 0
7801       size = device.size
7802       last_output = 0
7803       start_time = time.time()
7804
7805       while offset < size:
7806         wipe_size = min(wipe_chunk_size, size - offset)
7807         logging.debug("Wiping disk %d, offset %s, chunk %s",
7808                       idx, offset, wipe_size)
7809         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7810         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7811                      (idx, offset, wipe_size))
7812         now = time.time()
7813         offset += wipe_size
7814         if now - last_output >= 60:
7815           eta = _CalcEta(now - start_time, offset, size)
7816           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7817                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7818           last_output = now
7819   finally:
7820     logging.info("Resume sync of instance %s disks", instance.name)
7821
7822     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7823
7824     for idx, success in enumerate(result.payload):
7825       if not success:
7826         lu.LogWarning("Resume sync of disk %d failed, please have a"
7827                       " look at the status and troubleshoot the issue", idx)
7828         logging.warn("resume-sync of instance %s for disks %d failed",
7829                      instance.name, idx)
7830
7831
7832 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7833   """Create all disks for an instance.
7834
7835   This abstracts away some work from AddInstance.
7836
7837   @type lu: L{LogicalUnit}
7838   @param lu: the logical unit on whose behalf we execute
7839   @type instance: L{objects.Instance}
7840   @param instance: the instance whose disks we should create
7841   @type to_skip: list
7842   @param to_skip: list of indices to skip
7843   @type target_node: string
7844   @param target_node: if passed, overrides the target node for creation
7845   @rtype: boolean
7846   @return: the success of the creation
7847
7848   """
7849   info = _GetInstanceInfoText(instance)
7850   if target_node is None:
7851     pnode = instance.primary_node
7852     all_nodes = instance.all_nodes
7853   else:
7854     pnode = target_node
7855     all_nodes = [pnode]
7856
7857   if instance.disk_template in constants.DTS_FILEBASED:
7858     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7859     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7860
7861     result.Raise("Failed to create directory '%s' on"
7862                  " node %s" % (file_storage_dir, pnode))
7863
7864   # Note: this needs to be kept in sync with adding of disks in
7865   # LUInstanceSetParams
7866   for idx, device in enumerate(instance.disks):
7867     if to_skip and idx in to_skip:
7868       continue
7869     logging.info("Creating volume %s for instance %s",
7870                  device.iv_name, instance.name)
7871     #HARDCODE
7872     for node in all_nodes:
7873       f_create = node == pnode
7874       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7875
7876
7877 def _RemoveDisks(lu, instance, target_node=None):
7878   """Remove all disks for an instance.
7879
7880   This abstracts away some work from `AddInstance()` and
7881   `RemoveInstance()`. Note that in case some of the devices couldn't
7882   be removed, the removal will continue with the other ones (compare
7883   with `_CreateDisks()`).
7884
7885   @type lu: L{LogicalUnit}
7886   @param lu: the logical unit on whose behalf we execute
7887   @type instance: L{objects.Instance}
7888   @param instance: the instance whose disks we should remove
7889   @type target_node: string
7890   @param target_node: used to override the node on which to remove the disks
7891   @rtype: boolean
7892   @return: the success of the removal
7893
7894   """
7895   logging.info("Removing block devices for instance %s", instance.name)
7896
7897   all_result = True
7898   for device in instance.disks:
7899     if target_node:
7900       edata = [(target_node, device)]
7901     else:
7902       edata = device.ComputeNodeTree(instance.primary_node)
7903     for node, disk in edata:
7904       lu.cfg.SetDiskID(disk, node)
7905       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7906       if msg:
7907         lu.LogWarning("Could not remove block device %s on node %s,"
7908                       " continuing anyway: %s", device.iv_name, node, msg)
7909         all_result = False
7910
7911   if instance.disk_template == constants.DT_FILE:
7912     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7913     if target_node:
7914       tgt = target_node
7915     else:
7916       tgt = instance.primary_node
7917     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7918     if result.fail_msg:
7919       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7920                     file_storage_dir, instance.primary_node, result.fail_msg)
7921       all_result = False
7922
7923   return all_result
7924
7925
7926 def _ComputeDiskSizePerVG(disk_template, disks):
7927   """Compute disk size requirements in the volume group
7928
7929   """
7930   def _compute(disks, payload):
7931     """Universal algorithm.
7932
7933     """
7934     vgs = {}
7935     for disk in disks:
7936       vgs[disk[constants.IDISK_VG]] = \
7937         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7938
7939     return vgs
7940
7941   # Required free disk space as a function of disk and swap space
7942   req_size_dict = {
7943     constants.DT_DISKLESS: {},
7944     constants.DT_PLAIN: _compute(disks, 0),
7945     # 128 MB are added for drbd metadata for each disk
7946     constants.DT_DRBD8: _compute(disks, 128),
7947     constants.DT_FILE: {},
7948     constants.DT_SHARED_FILE: {},
7949   }
7950
7951   if disk_template not in req_size_dict:
7952     raise errors.ProgrammerError("Disk template '%s' size requirement"
7953                                  " is unknown" %  disk_template)
7954
7955   return req_size_dict[disk_template]
7956
7957
7958 def _ComputeDiskSize(disk_template, disks):
7959   """Compute disk size requirements in the volume group
7960
7961   """
7962   # Required free disk space as a function of disk and swap space
7963   req_size_dict = {
7964     constants.DT_DISKLESS: None,
7965     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7966     # 128 MB are added for drbd metadata for each disk
7967     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7968     constants.DT_FILE: None,
7969     constants.DT_SHARED_FILE: 0,
7970     constants.DT_BLOCK: 0,
7971   }
7972
7973   if disk_template not in req_size_dict:
7974     raise errors.ProgrammerError("Disk template '%s' size requirement"
7975                                  " is unknown" %  disk_template)
7976
7977   return req_size_dict[disk_template]
7978
7979
7980 def _FilterVmNodes(lu, nodenames):
7981   """Filters out non-vm_capable nodes from a list.
7982
7983   @type lu: L{LogicalUnit}
7984   @param lu: the logical unit for which we check
7985   @type nodenames: list
7986   @param nodenames: the list of nodes on which we should check
7987   @rtype: list
7988   @return: the list of vm-capable nodes
7989
7990   """
7991   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7992   return [name for name in nodenames if name not in vm_nodes]
7993
7994
7995 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7996   """Hypervisor parameter validation.
7997
7998   This function abstract the hypervisor parameter validation to be
7999   used in both instance create and instance modify.
8000
8001   @type lu: L{LogicalUnit}
8002   @param lu: the logical unit for which we check
8003   @type nodenames: list
8004   @param nodenames: the list of nodes on which we should check
8005   @type hvname: string
8006   @param hvname: the name of the hypervisor we should use
8007   @type hvparams: dict
8008   @param hvparams: the parameters which we need to check
8009   @raise errors.OpPrereqError: if the parameters are not valid
8010
8011   """
8012   nodenames = _FilterVmNodes(lu, nodenames)
8013   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8014                                                   hvname,
8015                                                   hvparams)
8016   for node in nodenames:
8017     info = hvinfo[node]
8018     if info.offline:
8019       continue
8020     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8021
8022
8023 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8024   """OS parameters validation.
8025
8026   @type lu: L{LogicalUnit}
8027   @param lu: the logical unit for which we check
8028   @type required: boolean
8029   @param required: whether the validation should fail if the OS is not
8030       found
8031   @type nodenames: list
8032   @param nodenames: the list of nodes on which we should check
8033   @type osname: string
8034   @param osname: the name of the hypervisor we should use
8035   @type osparams: dict
8036   @param osparams: the parameters which we need to check
8037   @raise errors.OpPrereqError: if the parameters are not valid
8038
8039   """
8040   nodenames = _FilterVmNodes(lu, nodenames)
8041   result = lu.rpc.call_os_validate(required, nodenames, osname,
8042                                    [constants.OS_VALIDATE_PARAMETERS],
8043                                    osparams)
8044   for node, nres in result.items():
8045     # we don't check for offline cases since this should be run only
8046     # against the master node and/or an instance's nodes
8047     nres.Raise("OS Parameters validation failed on node %s" % node)
8048     if not nres.payload:
8049       lu.LogInfo("OS %s not found on node %s, validation skipped",
8050                  osname, node)
8051
8052
8053 class LUInstanceCreate(LogicalUnit):
8054   """Create an instance.
8055
8056   """
8057   HPATH = "instance-add"
8058   HTYPE = constants.HTYPE_INSTANCE
8059   REQ_BGL = False
8060
8061   def CheckArguments(self):
8062     """Check arguments.
8063
8064     """
8065     # do not require name_check to ease forward/backward compatibility
8066     # for tools
8067     if self.op.no_install and self.op.start:
8068       self.LogInfo("No-installation mode selected, disabling startup")
8069       self.op.start = False
8070     # validate/normalize the instance name
8071     self.op.instance_name = \
8072       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8073
8074     if self.op.ip_check and not self.op.name_check:
8075       # TODO: make the ip check more flexible and not depend on the name check
8076       raise errors.OpPrereqError("Cannot do IP address check without a name"
8077                                  " check", errors.ECODE_INVAL)
8078
8079     # check nics' parameter names
8080     for nic in self.op.nics:
8081       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8082
8083     # check disks. parameter names and consistent adopt/no-adopt strategy
8084     has_adopt = has_no_adopt = False
8085     for disk in self.op.disks:
8086       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8087       if constants.IDISK_ADOPT in disk:
8088         has_adopt = True
8089       else:
8090         has_no_adopt = True
8091     if has_adopt and has_no_adopt:
8092       raise errors.OpPrereqError("Either all disks are adopted or none is",
8093                                  errors.ECODE_INVAL)
8094     if has_adopt:
8095       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8096         raise errors.OpPrereqError("Disk adoption is not supported for the"
8097                                    " '%s' disk template" %
8098                                    self.op.disk_template,
8099                                    errors.ECODE_INVAL)
8100       if self.op.iallocator is not None:
8101         raise errors.OpPrereqError("Disk adoption not allowed with an"
8102                                    " iallocator script", errors.ECODE_INVAL)
8103       if self.op.mode == constants.INSTANCE_IMPORT:
8104         raise errors.OpPrereqError("Disk adoption not allowed for"
8105                                    " instance import", errors.ECODE_INVAL)
8106     else:
8107       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8108         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8109                                    " but no 'adopt' parameter given" %
8110                                    self.op.disk_template,
8111                                    errors.ECODE_INVAL)
8112
8113     self.adopt_disks = has_adopt
8114
8115     # instance name verification
8116     if self.op.name_check:
8117       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8118       self.op.instance_name = self.hostname1.name
8119       # used in CheckPrereq for ip ping check
8120       self.check_ip = self.hostname1.ip
8121     else:
8122       self.check_ip = None
8123
8124     # file storage checks
8125     if (self.op.file_driver and
8126         not self.op.file_driver in constants.FILE_DRIVER):
8127       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8128                                  self.op.file_driver, errors.ECODE_INVAL)
8129
8130     if self.op.disk_template == constants.DT_FILE:
8131       opcodes.RequireFileStorage()
8132     elif self.op.disk_template == constants.DT_SHARED_FILE:
8133       opcodes.RequireSharedFileStorage()
8134
8135     ### Node/iallocator related checks
8136     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8137
8138     if self.op.pnode is not None:
8139       if self.op.disk_template in constants.DTS_INT_MIRROR:
8140         if self.op.snode is None:
8141           raise errors.OpPrereqError("The networked disk templates need"
8142                                      " a mirror node", errors.ECODE_INVAL)
8143       elif self.op.snode:
8144         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8145                         " template")
8146         self.op.snode = None
8147
8148     self._cds = _GetClusterDomainSecret()
8149
8150     if self.op.mode == constants.INSTANCE_IMPORT:
8151       # On import force_variant must be True, because if we forced it at
8152       # initial install, our only chance when importing it back is that it
8153       # works again!
8154       self.op.force_variant = True
8155
8156       if self.op.no_install:
8157         self.LogInfo("No-installation mode has no effect during import")
8158
8159     elif self.op.mode == constants.INSTANCE_CREATE:
8160       if self.op.os_type is None:
8161         raise errors.OpPrereqError("No guest OS specified",
8162                                    errors.ECODE_INVAL)
8163       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8164         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8165                                    " installation" % self.op.os_type,
8166                                    errors.ECODE_STATE)
8167       if self.op.disk_template is None:
8168         raise errors.OpPrereqError("No disk template specified",
8169                                    errors.ECODE_INVAL)
8170
8171     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8172       # Check handshake to ensure both clusters have the same domain secret
8173       src_handshake = self.op.source_handshake
8174       if not src_handshake:
8175         raise errors.OpPrereqError("Missing source handshake",
8176                                    errors.ECODE_INVAL)
8177
8178       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8179                                                            src_handshake)
8180       if errmsg:
8181         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8182                                    errors.ECODE_INVAL)
8183
8184       # Load and check source CA
8185       self.source_x509_ca_pem = self.op.source_x509_ca
8186       if not self.source_x509_ca_pem:
8187         raise errors.OpPrereqError("Missing source X509 CA",
8188                                    errors.ECODE_INVAL)
8189
8190       try:
8191         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8192                                                     self._cds)
8193       except OpenSSL.crypto.Error, err:
8194         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8195                                    (err, ), errors.ECODE_INVAL)
8196
8197       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8198       if errcode is not None:
8199         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8200                                    errors.ECODE_INVAL)
8201
8202       self.source_x509_ca = cert
8203
8204       src_instance_name = self.op.source_instance_name
8205       if not src_instance_name:
8206         raise errors.OpPrereqError("Missing source instance name",
8207                                    errors.ECODE_INVAL)
8208
8209       self.source_instance_name = \
8210           netutils.GetHostname(name=src_instance_name).name
8211
8212     else:
8213       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8214                                  self.op.mode, errors.ECODE_INVAL)
8215
8216   def ExpandNames(self):
8217     """ExpandNames for CreateInstance.
8218
8219     Figure out the right locks for instance creation.
8220
8221     """
8222     self.needed_locks = {}
8223
8224     instance_name = self.op.instance_name
8225     # this is just a preventive check, but someone might still add this
8226     # instance in the meantime, and creation will fail at lock-add time
8227     if instance_name in self.cfg.GetInstanceList():
8228       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8229                                  instance_name, errors.ECODE_EXISTS)
8230
8231     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8232
8233     if self.op.iallocator:
8234       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8235     else:
8236       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8237       nodelist = [self.op.pnode]
8238       if self.op.snode is not None:
8239         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8240         nodelist.append(self.op.snode)
8241       self.needed_locks[locking.LEVEL_NODE] = nodelist
8242
8243     # in case of import lock the source node too
8244     if self.op.mode == constants.INSTANCE_IMPORT:
8245       src_node = self.op.src_node
8246       src_path = self.op.src_path
8247
8248       if src_path is None:
8249         self.op.src_path = src_path = self.op.instance_name
8250
8251       if src_node is None:
8252         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8253         self.op.src_node = None
8254         if os.path.isabs(src_path):
8255           raise errors.OpPrereqError("Importing an instance from an absolute"
8256                                      " path requires a source node option",
8257                                      errors.ECODE_INVAL)
8258       else:
8259         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8260         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8261           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8262         if not os.path.isabs(src_path):
8263           self.op.src_path = src_path = \
8264             utils.PathJoin(constants.EXPORT_DIR, src_path)
8265
8266   def _RunAllocator(self):
8267     """Run the allocator based on input opcode.
8268
8269     """
8270     nics = [n.ToDict() for n in self.nics]
8271     ial = IAllocator(self.cfg, self.rpc,
8272                      mode=constants.IALLOCATOR_MODE_ALLOC,
8273                      name=self.op.instance_name,
8274                      disk_template=self.op.disk_template,
8275                      tags=self.op.tags,
8276                      os=self.op.os_type,
8277                      vcpus=self.be_full[constants.BE_VCPUS],
8278                      memory=self.be_full[constants.BE_MEMORY],
8279                      disks=self.disks,
8280                      nics=nics,
8281                      hypervisor=self.op.hypervisor,
8282                      )
8283
8284     ial.Run(self.op.iallocator)
8285
8286     if not ial.success:
8287       raise errors.OpPrereqError("Can't compute nodes using"
8288                                  " iallocator '%s': %s" %
8289                                  (self.op.iallocator, ial.info),
8290                                  errors.ECODE_NORES)
8291     if len(ial.result) != ial.required_nodes:
8292       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8293                                  " of nodes (%s), required %s" %
8294                                  (self.op.iallocator, len(ial.result),
8295                                   ial.required_nodes), errors.ECODE_FAULT)
8296     self.op.pnode = ial.result[0]
8297     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8298                  self.op.instance_name, self.op.iallocator,
8299                  utils.CommaJoin(ial.result))
8300     if ial.required_nodes == 2:
8301       self.op.snode = ial.result[1]
8302
8303   def BuildHooksEnv(self):
8304     """Build hooks env.
8305
8306     This runs on master, primary and secondary nodes of the instance.
8307
8308     """
8309     env = {
8310       "ADD_MODE": self.op.mode,
8311       }
8312     if self.op.mode == constants.INSTANCE_IMPORT:
8313       env["SRC_NODE"] = self.op.src_node
8314       env["SRC_PATH"] = self.op.src_path
8315       env["SRC_IMAGES"] = self.src_images
8316
8317     env.update(_BuildInstanceHookEnv(
8318       name=self.op.instance_name,
8319       primary_node=self.op.pnode,
8320       secondary_nodes=self.secondaries,
8321       status=self.op.start,
8322       os_type=self.op.os_type,
8323       memory=self.be_full[constants.BE_MEMORY],
8324       vcpus=self.be_full[constants.BE_VCPUS],
8325       nics=_NICListToTuple(self, self.nics),
8326       disk_template=self.op.disk_template,
8327       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8328              for d in self.disks],
8329       bep=self.be_full,
8330       hvp=self.hv_full,
8331       hypervisor_name=self.op.hypervisor,
8332       tags=self.op.tags,
8333     ))
8334
8335     return env
8336
8337   def BuildHooksNodes(self):
8338     """Build hooks nodes.
8339
8340     """
8341     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8342     return nl, nl
8343
8344   def _ReadExportInfo(self):
8345     """Reads the export information from disk.
8346
8347     It will override the opcode source node and path with the actual
8348     information, if these two were not specified before.
8349
8350     @return: the export information
8351
8352     """
8353     assert self.op.mode == constants.INSTANCE_IMPORT
8354
8355     src_node = self.op.src_node
8356     src_path = self.op.src_path
8357
8358     if src_node is None:
8359       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8360       exp_list = self.rpc.call_export_list(locked_nodes)
8361       found = False
8362       for node in exp_list:
8363         if exp_list[node].fail_msg:
8364           continue
8365         if src_path in exp_list[node].payload:
8366           found = True
8367           self.op.src_node = src_node = node
8368           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8369                                                        src_path)
8370           break
8371       if not found:
8372         raise errors.OpPrereqError("No export found for relative path %s" %
8373                                     src_path, errors.ECODE_INVAL)
8374
8375     _CheckNodeOnline(self, src_node)
8376     result = self.rpc.call_export_info(src_node, src_path)
8377     result.Raise("No export or invalid export found in dir %s" % src_path)
8378
8379     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8380     if not export_info.has_section(constants.INISECT_EXP):
8381       raise errors.ProgrammerError("Corrupted export config",
8382                                    errors.ECODE_ENVIRON)
8383
8384     ei_version = export_info.get(constants.INISECT_EXP, "version")
8385     if (int(ei_version) != constants.EXPORT_VERSION):
8386       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8387                                  (ei_version, constants.EXPORT_VERSION),
8388                                  errors.ECODE_ENVIRON)
8389     return export_info
8390
8391   def _ReadExportParams(self, einfo):
8392     """Use export parameters as defaults.
8393
8394     In case the opcode doesn't specify (as in override) some instance
8395     parameters, then try to use them from the export information, if
8396     that declares them.
8397
8398     """
8399     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8400
8401     if self.op.disk_template is None:
8402       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8403         self.op.disk_template = einfo.get(constants.INISECT_INS,
8404                                           "disk_template")
8405       else:
8406         raise errors.OpPrereqError("No disk template specified and the export"
8407                                    " is missing the disk_template information",
8408                                    errors.ECODE_INVAL)
8409
8410     if not self.op.disks:
8411       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8412         disks = []
8413         # TODO: import the disk iv_name too
8414         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8415           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8416           disks.append({constants.IDISK_SIZE: disk_sz})
8417         self.op.disks = disks
8418       else:
8419         raise errors.OpPrereqError("No disk info specified and the export"
8420                                    " is missing the disk information",
8421                                    errors.ECODE_INVAL)
8422
8423     if (not self.op.nics and
8424         einfo.has_option(constants.INISECT_INS, "nic_count")):
8425       nics = []
8426       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8427         ndict = {}
8428         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8429           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8430           ndict[name] = v
8431         nics.append(ndict)
8432       self.op.nics = nics
8433
8434     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8435       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8436
8437     if (self.op.hypervisor is None and
8438         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8439       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8440
8441     if einfo.has_section(constants.INISECT_HYP):
8442       # use the export parameters but do not override the ones
8443       # specified by the user
8444       for name, value in einfo.items(constants.INISECT_HYP):
8445         if name not in self.op.hvparams:
8446           self.op.hvparams[name] = value
8447
8448     if einfo.has_section(constants.INISECT_BEP):
8449       # use the parameters, without overriding
8450       for name, value in einfo.items(constants.INISECT_BEP):
8451         if name not in self.op.beparams:
8452           self.op.beparams[name] = value
8453     else:
8454       # try to read the parameters old style, from the main section
8455       for name in constants.BES_PARAMETERS:
8456         if (name not in self.op.beparams and
8457             einfo.has_option(constants.INISECT_INS, name)):
8458           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8459
8460     if einfo.has_section(constants.INISECT_OSP):
8461       # use the parameters, without overriding
8462       for name, value in einfo.items(constants.INISECT_OSP):
8463         if name not in self.op.osparams:
8464           self.op.osparams[name] = value
8465
8466   def _RevertToDefaults(self, cluster):
8467     """Revert the instance parameters to the default values.
8468
8469     """
8470     # hvparams
8471     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8472     for name in self.op.hvparams.keys():
8473       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8474         del self.op.hvparams[name]
8475     # beparams
8476     be_defs = cluster.SimpleFillBE({})
8477     for name in self.op.beparams.keys():
8478       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8479         del self.op.beparams[name]
8480     # nic params
8481     nic_defs = cluster.SimpleFillNIC({})
8482     for nic in self.op.nics:
8483       for name in constants.NICS_PARAMETERS:
8484         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8485           del nic[name]
8486     # osparams
8487     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8488     for name in self.op.osparams.keys():
8489       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8490         del self.op.osparams[name]
8491
8492   def _CalculateFileStorageDir(self):
8493     """Calculate final instance file storage dir.
8494
8495     """
8496     # file storage dir calculation/check
8497     self.instance_file_storage_dir = None
8498     if self.op.disk_template in constants.DTS_FILEBASED:
8499       # build the full file storage dir path
8500       joinargs = []
8501
8502       if self.op.disk_template == constants.DT_SHARED_FILE:
8503         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8504       else:
8505         get_fsd_fn = self.cfg.GetFileStorageDir
8506
8507       cfg_storagedir = get_fsd_fn()
8508       if not cfg_storagedir:
8509         raise errors.OpPrereqError("Cluster file storage dir not defined")
8510       joinargs.append(cfg_storagedir)
8511
8512       if self.op.file_storage_dir is not None:
8513         joinargs.append(self.op.file_storage_dir)
8514
8515       joinargs.append(self.op.instance_name)
8516
8517       # pylint: disable-msg=W0142
8518       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8519
8520   def CheckPrereq(self):
8521     """Check prerequisites.
8522
8523     """
8524     self._CalculateFileStorageDir()
8525
8526     if self.op.mode == constants.INSTANCE_IMPORT:
8527       export_info = self._ReadExportInfo()
8528       self._ReadExportParams(export_info)
8529
8530     if (not self.cfg.GetVGName() and
8531         self.op.disk_template not in constants.DTS_NOT_LVM):
8532       raise errors.OpPrereqError("Cluster does not support lvm-based"
8533                                  " instances", errors.ECODE_STATE)
8534
8535     if self.op.hypervisor is None:
8536       self.op.hypervisor = self.cfg.GetHypervisorType()
8537
8538     cluster = self.cfg.GetClusterInfo()
8539     enabled_hvs = cluster.enabled_hypervisors
8540     if self.op.hypervisor not in enabled_hvs:
8541       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8542                                  " cluster (%s)" % (self.op.hypervisor,
8543                                   ",".join(enabled_hvs)),
8544                                  errors.ECODE_STATE)
8545
8546     # Check tag validity
8547     for tag in self.op.tags:
8548       objects.TaggableObject.ValidateTag(tag)
8549
8550     # check hypervisor parameter syntax (locally)
8551     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8552     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8553                                       self.op.hvparams)
8554     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8555     hv_type.CheckParameterSyntax(filled_hvp)
8556     self.hv_full = filled_hvp
8557     # check that we don't specify global parameters on an instance
8558     _CheckGlobalHvParams(self.op.hvparams)
8559
8560     # fill and remember the beparams dict
8561     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8562     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8563
8564     # build os parameters
8565     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8566
8567     # now that hvp/bep are in final format, let's reset to defaults,
8568     # if told to do so
8569     if self.op.identify_defaults:
8570       self._RevertToDefaults(cluster)
8571
8572     # NIC buildup
8573     self.nics = []
8574     for idx, nic in enumerate(self.op.nics):
8575       nic_mode_req = nic.get(constants.INIC_MODE, None)
8576       nic_mode = nic_mode_req
8577       if nic_mode is None:
8578         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8579
8580       # in routed mode, for the first nic, the default ip is 'auto'
8581       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8582         default_ip_mode = constants.VALUE_AUTO
8583       else:
8584         default_ip_mode = constants.VALUE_NONE
8585
8586       # ip validity checks
8587       ip = nic.get(constants.INIC_IP, default_ip_mode)
8588       if ip is None or ip.lower() == constants.VALUE_NONE:
8589         nic_ip = None
8590       elif ip.lower() == constants.VALUE_AUTO:
8591         if not self.op.name_check:
8592           raise errors.OpPrereqError("IP address set to auto but name checks"
8593                                      " have been skipped",
8594                                      errors.ECODE_INVAL)
8595         nic_ip = self.hostname1.ip
8596       else:
8597         if not netutils.IPAddress.IsValid(ip):
8598           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8599                                      errors.ECODE_INVAL)
8600         nic_ip = ip
8601
8602       # TODO: check the ip address for uniqueness
8603       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8604         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8605                                    errors.ECODE_INVAL)
8606
8607       # MAC address verification
8608       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8609       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8610         mac = utils.NormalizeAndValidateMac(mac)
8611
8612         try:
8613           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8614         except errors.ReservationError:
8615           raise errors.OpPrereqError("MAC address %s already in use"
8616                                      " in cluster" % mac,
8617                                      errors.ECODE_NOTUNIQUE)
8618
8619       #  Build nic parameters
8620       link = nic.get(constants.INIC_LINK, None)
8621       nicparams = {}
8622       if nic_mode_req:
8623         nicparams[constants.NIC_MODE] = nic_mode_req
8624       if link:
8625         nicparams[constants.NIC_LINK] = link
8626
8627       check_params = cluster.SimpleFillNIC(nicparams)
8628       objects.NIC.CheckParameterSyntax(check_params)
8629       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8630
8631     # disk checks/pre-build
8632     default_vg = self.cfg.GetVGName()
8633     self.disks = []
8634     for disk in self.op.disks:
8635       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8636       if mode not in constants.DISK_ACCESS_SET:
8637         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8638                                    mode, errors.ECODE_INVAL)
8639       size = disk.get(constants.IDISK_SIZE, None)
8640       if size is None:
8641         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8642       try:
8643         size = int(size)
8644       except (TypeError, ValueError):
8645         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8646                                    errors.ECODE_INVAL)
8647
8648       data_vg = disk.get(constants.IDISK_VG, default_vg)
8649       new_disk = {
8650         constants.IDISK_SIZE: size,
8651         constants.IDISK_MODE: mode,
8652         constants.IDISK_VG: data_vg,
8653         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8654         }
8655       if constants.IDISK_ADOPT in disk:
8656         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8657       self.disks.append(new_disk)
8658
8659     if self.op.mode == constants.INSTANCE_IMPORT:
8660
8661       # Check that the new instance doesn't have less disks than the export
8662       instance_disks = len(self.disks)
8663       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8664       if instance_disks < export_disks:
8665         raise errors.OpPrereqError("Not enough disks to import."
8666                                    " (instance: %d, export: %d)" %
8667                                    (instance_disks, export_disks),
8668                                    errors.ECODE_INVAL)
8669
8670       disk_images = []
8671       for idx in range(export_disks):
8672         option = "disk%d_dump" % idx
8673         if export_info.has_option(constants.INISECT_INS, option):
8674           # FIXME: are the old os-es, disk sizes, etc. useful?
8675           export_name = export_info.get(constants.INISECT_INS, option)
8676           image = utils.PathJoin(self.op.src_path, export_name)
8677           disk_images.append(image)
8678         else:
8679           disk_images.append(False)
8680
8681       self.src_images = disk_images
8682
8683       old_name = export_info.get(constants.INISECT_INS, "name")
8684       try:
8685         exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8686       except (TypeError, ValueError), err:
8687         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8688                                    " an integer: %s" % str(err),
8689                                    errors.ECODE_STATE)
8690       if self.op.instance_name == old_name:
8691         for idx, nic in enumerate(self.nics):
8692           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8693             nic_mac_ini = "nic%d_mac" % idx
8694             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8695
8696     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8697
8698     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8699     if self.op.ip_check:
8700       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8701         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8702                                    (self.check_ip, self.op.instance_name),
8703                                    errors.ECODE_NOTUNIQUE)
8704
8705     #### mac address generation
8706     # By generating here the mac address both the allocator and the hooks get
8707     # the real final mac address rather than the 'auto' or 'generate' value.
8708     # There is a race condition between the generation and the instance object
8709     # creation, which means that we know the mac is valid now, but we're not
8710     # sure it will be when we actually add the instance. If things go bad
8711     # adding the instance will abort because of a duplicate mac, and the
8712     # creation job will fail.
8713     for nic in self.nics:
8714       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8715         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8716
8717     #### allocator run
8718
8719     if self.op.iallocator is not None:
8720       self._RunAllocator()
8721
8722     #### node related checks
8723
8724     # check primary node
8725     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8726     assert self.pnode is not None, \
8727       "Cannot retrieve locked node %s" % self.op.pnode
8728     if pnode.offline:
8729       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8730                                  pnode.name, errors.ECODE_STATE)
8731     if pnode.drained:
8732       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8733                                  pnode.name, errors.ECODE_STATE)
8734     if not pnode.vm_capable:
8735       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8736                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8737
8738     self.secondaries = []
8739
8740     # mirror node verification
8741     if self.op.disk_template in constants.DTS_INT_MIRROR:
8742       if self.op.snode == pnode.name:
8743         raise errors.OpPrereqError("The secondary node cannot be the"
8744                                    " primary node", errors.ECODE_INVAL)
8745       _CheckNodeOnline(self, self.op.snode)
8746       _CheckNodeNotDrained(self, self.op.snode)
8747       _CheckNodeVmCapable(self, self.op.snode)
8748       self.secondaries.append(self.op.snode)
8749
8750     nodenames = [pnode.name] + self.secondaries
8751
8752     if not self.adopt_disks:
8753       # Check lv size requirements, if not adopting
8754       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8755       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8756
8757     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8758       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8759                                 disk[constants.IDISK_ADOPT])
8760                      for disk in self.disks])
8761       if len(all_lvs) != len(self.disks):
8762         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8763                                    errors.ECODE_INVAL)
8764       for lv_name in all_lvs:
8765         try:
8766           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8767           # to ReserveLV uses the same syntax
8768           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8769         except errors.ReservationError:
8770           raise errors.OpPrereqError("LV named %s used by another instance" %
8771                                      lv_name, errors.ECODE_NOTUNIQUE)
8772
8773       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8774       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8775
8776       node_lvs = self.rpc.call_lv_list([pnode.name],
8777                                        vg_names.payload.keys())[pnode.name]
8778       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8779       node_lvs = node_lvs.payload
8780
8781       delta = all_lvs.difference(node_lvs.keys())
8782       if delta:
8783         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8784                                    utils.CommaJoin(delta),
8785                                    errors.ECODE_INVAL)
8786       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8787       if online_lvs:
8788         raise errors.OpPrereqError("Online logical volumes found, cannot"
8789                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8790                                    errors.ECODE_STATE)
8791       # update the size of disk based on what is found
8792       for dsk in self.disks:
8793         dsk[constants.IDISK_SIZE] = \
8794           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8795                                         dsk[constants.IDISK_ADOPT])][0]))
8796
8797     elif self.op.disk_template == constants.DT_BLOCK:
8798       # Normalize and de-duplicate device paths
8799       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8800                        for disk in self.disks])
8801       if len(all_disks) != len(self.disks):
8802         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8803                                    errors.ECODE_INVAL)
8804       baddisks = [d for d in all_disks
8805                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8806       if baddisks:
8807         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8808                                    " cannot be adopted" %
8809                                    (", ".join(baddisks),
8810                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8811                                    errors.ECODE_INVAL)
8812
8813       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8814                                             list(all_disks))[pnode.name]
8815       node_disks.Raise("Cannot get block device information from node %s" %
8816                        pnode.name)
8817       node_disks = node_disks.payload
8818       delta = all_disks.difference(node_disks.keys())
8819       if delta:
8820         raise errors.OpPrereqError("Missing block device(s): %s" %
8821                                    utils.CommaJoin(delta),
8822                                    errors.ECODE_INVAL)
8823       for dsk in self.disks:
8824         dsk[constants.IDISK_SIZE] = \
8825           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8826
8827     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8828
8829     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8830     # check OS parameters (remotely)
8831     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8832
8833     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8834
8835     # memory check on primary node
8836     if self.op.start:
8837       _CheckNodeFreeMemory(self, self.pnode.name,
8838                            "creating instance %s" % self.op.instance_name,
8839                            self.be_full[constants.BE_MEMORY],
8840                            self.op.hypervisor)
8841
8842     self.dry_run_result = list(nodenames)
8843
8844   def Exec(self, feedback_fn):
8845     """Create and add the instance to the cluster.
8846
8847     """
8848     instance = self.op.instance_name
8849     pnode_name = self.pnode.name
8850
8851     ht_kind = self.op.hypervisor
8852     if ht_kind in constants.HTS_REQ_PORT:
8853       network_port = self.cfg.AllocatePort()
8854     else:
8855       network_port = None
8856
8857     disks = _GenerateDiskTemplate(self,
8858                                   self.op.disk_template,
8859                                   instance, pnode_name,
8860                                   self.secondaries,
8861                                   self.disks,
8862                                   self.instance_file_storage_dir,
8863                                   self.op.file_driver,
8864                                   0,
8865                                   feedback_fn)
8866
8867     iobj = objects.Instance(name=instance, os=self.op.os_type,
8868                             primary_node=pnode_name,
8869                             nics=self.nics, disks=disks,
8870                             disk_template=self.op.disk_template,
8871                             admin_up=False,
8872                             network_port=network_port,
8873                             beparams=self.op.beparams,
8874                             hvparams=self.op.hvparams,
8875                             hypervisor=self.op.hypervisor,
8876                             osparams=self.op.osparams,
8877                             )
8878
8879     if self.op.tags:
8880       for tag in self.op.tags:
8881         iobj.AddTag(tag)
8882
8883     if self.adopt_disks:
8884       if self.op.disk_template == constants.DT_PLAIN:
8885         # rename LVs to the newly-generated names; we need to construct
8886         # 'fake' LV disks with the old data, plus the new unique_id
8887         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8888         rename_to = []
8889         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8890           rename_to.append(t_dsk.logical_id)
8891           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8892           self.cfg.SetDiskID(t_dsk, pnode_name)
8893         result = self.rpc.call_blockdev_rename(pnode_name,
8894                                                zip(tmp_disks, rename_to))
8895         result.Raise("Failed to rename adoped LVs")
8896     else:
8897       feedback_fn("* creating instance disks...")
8898       try:
8899         _CreateDisks(self, iobj)
8900       except errors.OpExecError:
8901         self.LogWarning("Device creation failed, reverting...")
8902         try:
8903           _RemoveDisks(self, iobj)
8904         finally:
8905           self.cfg.ReleaseDRBDMinors(instance)
8906           raise
8907
8908     feedback_fn("adding instance %s to cluster config" % instance)
8909
8910     self.cfg.AddInstance(iobj, self.proc.GetECId())
8911
8912     # Declare that we don't want to remove the instance lock anymore, as we've
8913     # added the instance to the config
8914     del self.remove_locks[locking.LEVEL_INSTANCE]
8915
8916     if self.op.mode == constants.INSTANCE_IMPORT:
8917       # Release unused nodes
8918       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8919     else:
8920       # Release all nodes
8921       _ReleaseLocks(self, locking.LEVEL_NODE)
8922
8923     disk_abort = False
8924     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8925       feedback_fn("* wiping instance disks...")
8926       try:
8927         _WipeDisks(self, iobj)
8928       except errors.OpExecError, err:
8929         logging.exception("Wiping disks failed")
8930         self.LogWarning("Wiping instance disks failed (%s)", err)
8931         disk_abort = True
8932
8933     if disk_abort:
8934       # Something is already wrong with the disks, don't do anything else
8935       pass
8936     elif self.op.wait_for_sync:
8937       disk_abort = not _WaitForSync(self, iobj)
8938     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8939       # make sure the disks are not degraded (still sync-ing is ok)
8940       feedback_fn("* checking mirrors status")
8941       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8942     else:
8943       disk_abort = False
8944
8945     if disk_abort:
8946       _RemoveDisks(self, iobj)
8947       self.cfg.RemoveInstance(iobj.name)
8948       # Make sure the instance lock gets removed
8949       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8950       raise errors.OpExecError("There are some degraded disks for"
8951                                " this instance")
8952
8953     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8954       if self.op.mode == constants.INSTANCE_CREATE:
8955         if not self.op.no_install:
8956           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8957                         not self.op.wait_for_sync)
8958           if pause_sync:
8959             feedback_fn("* pausing disk sync to install instance OS")
8960             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8961                                                               iobj.disks, True)
8962             for idx, success in enumerate(result.payload):
8963               if not success:
8964                 logging.warn("pause-sync of instance %s for disk %d failed",
8965                              instance, idx)
8966
8967           feedback_fn("* running the instance OS create scripts...")
8968           # FIXME: pass debug option from opcode to backend
8969           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8970                                                  self.op.debug_level)
8971           if pause_sync:
8972             feedback_fn("* resuming disk sync")
8973             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8974                                                               iobj.disks, False)
8975             for idx, success in enumerate(result.payload):
8976               if not success:
8977                 logging.warn("resume-sync of instance %s for disk %d failed",
8978                              instance, idx)
8979
8980           result.Raise("Could not add os for instance %s"
8981                        " on node %s" % (instance, pnode_name))
8982
8983       elif self.op.mode == constants.INSTANCE_IMPORT:
8984         feedback_fn("* running the instance OS import scripts...")
8985
8986         transfers = []
8987
8988         for idx, image in enumerate(self.src_images):
8989           if not image:
8990             continue
8991
8992           # FIXME: pass debug option from opcode to backend
8993           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8994                                              constants.IEIO_FILE, (image, ),
8995                                              constants.IEIO_SCRIPT,
8996                                              (iobj.disks[idx], idx),
8997                                              None)
8998           transfers.append(dt)
8999
9000         import_result = \
9001           masterd.instance.TransferInstanceData(self, feedback_fn,
9002                                                 self.op.src_node, pnode_name,
9003                                                 self.pnode.secondary_ip,
9004                                                 iobj, transfers)
9005         if not compat.all(import_result):
9006           self.LogWarning("Some disks for instance %s on node %s were not"
9007                           " imported successfully" % (instance, pnode_name))
9008
9009       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9010         feedback_fn("* preparing remote import...")
9011         # The source cluster will stop the instance before attempting to make a
9012         # connection. In some cases stopping an instance can take a long time,
9013         # hence the shutdown timeout is added to the connection timeout.
9014         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9015                            self.op.source_shutdown_timeout)
9016         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9017
9018         assert iobj.primary_node == self.pnode.name
9019         disk_results = \
9020           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9021                                         self.source_x509_ca,
9022                                         self._cds, timeouts)
9023         if not compat.all(disk_results):
9024           # TODO: Should the instance still be started, even if some disks
9025           # failed to import (valid for local imports, too)?
9026           self.LogWarning("Some disks for instance %s on node %s were not"
9027                           " imported successfully" % (instance, pnode_name))
9028
9029         # Run rename script on newly imported instance
9030         assert iobj.name == instance
9031         feedback_fn("Running rename script for %s" % instance)
9032         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9033                                                    self.source_instance_name,
9034                                                    self.op.debug_level)
9035         if result.fail_msg:
9036           self.LogWarning("Failed to run rename script for %s on node"
9037                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9038
9039       else:
9040         # also checked in the prereq part
9041         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9042                                      % self.op.mode)
9043
9044     if self.op.start:
9045       iobj.admin_up = True
9046       self.cfg.Update(iobj, feedback_fn)
9047       logging.info("Starting instance %s on node %s", instance, pnode_name)
9048       feedback_fn("* starting instance...")
9049       result = self.rpc.call_instance_start(pnode_name, iobj,
9050                                             None, None, False)
9051       result.Raise("Could not start instance")
9052
9053     return list(iobj.all_nodes)
9054
9055
9056 class LUInstanceConsole(NoHooksLU):
9057   """Connect to an instance's console.
9058
9059   This is somewhat special in that it returns the command line that
9060   you need to run on the master node in order to connect to the
9061   console.
9062
9063   """
9064   REQ_BGL = False
9065
9066   def ExpandNames(self):
9067     self._ExpandAndLockInstance()
9068
9069   def CheckPrereq(self):
9070     """Check prerequisites.
9071
9072     This checks that the instance is in the cluster.
9073
9074     """
9075     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9076     assert self.instance is not None, \
9077       "Cannot retrieve locked instance %s" % self.op.instance_name
9078     _CheckNodeOnline(self, self.instance.primary_node)
9079
9080   def Exec(self, feedback_fn):
9081     """Connect to the console of an instance
9082
9083     """
9084     instance = self.instance
9085     node = instance.primary_node
9086
9087     node_insts = self.rpc.call_instance_list([node],
9088                                              [instance.hypervisor])[node]
9089     node_insts.Raise("Can't get node information from %s" % node)
9090
9091     if instance.name not in node_insts.payload:
9092       if instance.admin_up:
9093         state = constants.INSTST_ERRORDOWN
9094       else:
9095         state = constants.INSTST_ADMINDOWN
9096       raise errors.OpExecError("Instance %s is not running (state %s)" %
9097                                (instance.name, state))
9098
9099     logging.debug("Connecting to console of %s on %s", instance.name, node)
9100
9101     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9102
9103
9104 def _GetInstanceConsole(cluster, instance):
9105   """Returns console information for an instance.
9106
9107   @type cluster: L{objects.Cluster}
9108   @type instance: L{objects.Instance}
9109   @rtype: dict
9110
9111   """
9112   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9113   # beparams and hvparams are passed separately, to avoid editing the
9114   # instance and then saving the defaults in the instance itself.
9115   hvparams = cluster.FillHV(instance)
9116   beparams = cluster.FillBE(instance)
9117   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9118
9119   assert console.instance == instance.name
9120   assert console.Validate()
9121
9122   return console.ToDict()
9123
9124
9125 class LUInstanceReplaceDisks(LogicalUnit):
9126   """Replace the disks of an instance.
9127
9128   """
9129   HPATH = "mirrors-replace"
9130   HTYPE = constants.HTYPE_INSTANCE
9131   REQ_BGL = False
9132
9133   def CheckArguments(self):
9134     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9135                                   self.op.iallocator)
9136
9137   def ExpandNames(self):
9138     self._ExpandAndLockInstance()
9139
9140     assert locking.LEVEL_NODE not in self.needed_locks
9141     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9142
9143     assert self.op.iallocator is None or self.op.remote_node is None, \
9144       "Conflicting options"
9145
9146     if self.op.remote_node is not None:
9147       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9148
9149       # Warning: do not remove the locking of the new secondary here
9150       # unless DRBD8.AddChildren is changed to work in parallel;
9151       # currently it doesn't since parallel invocations of
9152       # FindUnusedMinor will conflict
9153       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9154       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9155     else:
9156       self.needed_locks[locking.LEVEL_NODE] = []
9157       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9158
9159       if self.op.iallocator is not None:
9160         # iallocator will select a new node in the same group
9161         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9162
9163     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9164                                    self.op.iallocator, self.op.remote_node,
9165                                    self.op.disks, False, self.op.early_release)
9166
9167     self.tasklets = [self.replacer]
9168
9169   def DeclareLocks(self, level):
9170     if level == locking.LEVEL_NODEGROUP:
9171       assert self.op.remote_node is None
9172       assert self.op.iallocator is not None
9173       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9174
9175       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9176       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9177         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9178
9179     elif level == locking.LEVEL_NODE:
9180       if self.op.iallocator is not None:
9181         assert self.op.remote_node is None
9182         assert not self.needed_locks[locking.LEVEL_NODE]
9183
9184         # Lock member nodes of all locked groups
9185         self.needed_locks[locking.LEVEL_NODE] = [node_name
9186           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9187           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9188       else:
9189         self._LockInstancesNodes()
9190
9191   def BuildHooksEnv(self):
9192     """Build hooks env.
9193
9194     This runs on the master, the primary and all the secondaries.
9195
9196     """
9197     instance = self.replacer.instance
9198     env = {
9199       "MODE": self.op.mode,
9200       "NEW_SECONDARY": self.op.remote_node,
9201       "OLD_SECONDARY": instance.secondary_nodes[0],
9202       }
9203     env.update(_BuildInstanceHookEnvByObject(self, instance))
9204     return env
9205
9206   def BuildHooksNodes(self):
9207     """Build hooks nodes.
9208
9209     """
9210     instance = self.replacer.instance
9211     nl = [
9212       self.cfg.GetMasterNode(),
9213       instance.primary_node,
9214       ]
9215     if self.op.remote_node is not None:
9216       nl.append(self.op.remote_node)
9217     return nl, nl
9218
9219   def CheckPrereq(self):
9220     """Check prerequisites.
9221
9222     """
9223     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9224             self.op.iallocator is None)
9225
9226     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9227     if owned_groups:
9228       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9229
9230     return LogicalUnit.CheckPrereq(self)
9231
9232
9233 class TLReplaceDisks(Tasklet):
9234   """Replaces disks for an instance.
9235
9236   Note: Locking is not within the scope of this class.
9237
9238   """
9239   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9240                disks, delay_iallocator, early_release):
9241     """Initializes this class.
9242
9243     """
9244     Tasklet.__init__(self, lu)
9245
9246     # Parameters
9247     self.instance_name = instance_name
9248     self.mode = mode
9249     self.iallocator_name = iallocator_name
9250     self.remote_node = remote_node
9251     self.disks = disks
9252     self.delay_iallocator = delay_iallocator
9253     self.early_release = early_release
9254
9255     # Runtime data
9256     self.instance = None
9257     self.new_node = None
9258     self.target_node = None
9259     self.other_node = None
9260     self.remote_node_info = None
9261     self.node_secondary_ip = None
9262
9263   @staticmethod
9264   def CheckArguments(mode, remote_node, iallocator):
9265     """Helper function for users of this class.
9266
9267     """
9268     # check for valid parameter combination
9269     if mode == constants.REPLACE_DISK_CHG:
9270       if remote_node is None and iallocator is None:
9271         raise errors.OpPrereqError("When changing the secondary either an"
9272                                    " iallocator script must be used or the"
9273                                    " new node given", errors.ECODE_INVAL)
9274
9275       if remote_node is not None and iallocator is not None:
9276         raise errors.OpPrereqError("Give either the iallocator or the new"
9277                                    " secondary, not both", errors.ECODE_INVAL)
9278
9279     elif remote_node is not None or iallocator is not None:
9280       # Not replacing the secondary
9281       raise errors.OpPrereqError("The iallocator and new node options can"
9282                                  " only be used when changing the"
9283                                  " secondary node", errors.ECODE_INVAL)
9284
9285   @staticmethod
9286   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9287     """Compute a new secondary node using an IAllocator.
9288
9289     """
9290     ial = IAllocator(lu.cfg, lu.rpc,
9291                      mode=constants.IALLOCATOR_MODE_RELOC,
9292                      name=instance_name,
9293                      relocate_from=list(relocate_from))
9294
9295     ial.Run(iallocator_name)
9296
9297     if not ial.success:
9298       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9299                                  " %s" % (iallocator_name, ial.info),
9300                                  errors.ECODE_NORES)
9301
9302     if len(ial.result) != ial.required_nodes:
9303       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9304                                  " of nodes (%s), required %s" %
9305                                  (iallocator_name,
9306                                   len(ial.result), ial.required_nodes),
9307                                  errors.ECODE_FAULT)
9308
9309     remote_node_name = ial.result[0]
9310
9311     lu.LogInfo("Selected new secondary for instance '%s': %s",
9312                instance_name, remote_node_name)
9313
9314     return remote_node_name
9315
9316   def _FindFaultyDisks(self, node_name):
9317     """Wrapper for L{_FindFaultyInstanceDisks}.
9318
9319     """
9320     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9321                                     node_name, True)
9322
9323   def _CheckDisksActivated(self, instance):
9324     """Checks if the instance disks are activated.
9325
9326     @param instance: The instance to check disks
9327     @return: True if they are activated, False otherwise
9328
9329     """
9330     nodes = instance.all_nodes
9331
9332     for idx, dev in enumerate(instance.disks):
9333       for node in nodes:
9334         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9335         self.cfg.SetDiskID(dev, node)
9336
9337         result = self.rpc.call_blockdev_find(node, dev)
9338
9339         if result.offline:
9340           continue
9341         elif result.fail_msg or not result.payload:
9342           return False
9343
9344     return True
9345
9346   def CheckPrereq(self):
9347     """Check prerequisites.
9348
9349     This checks that the instance is in the cluster.
9350
9351     """
9352     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9353     assert instance is not None, \
9354       "Cannot retrieve locked instance %s" % self.instance_name
9355
9356     if instance.disk_template != constants.DT_DRBD8:
9357       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9358                                  " instances", errors.ECODE_INVAL)
9359
9360     if len(instance.secondary_nodes) != 1:
9361       raise errors.OpPrereqError("The instance has a strange layout,"
9362                                  " expected one secondary but found %d" %
9363                                  len(instance.secondary_nodes),
9364                                  errors.ECODE_FAULT)
9365
9366     if not self.delay_iallocator:
9367       self._CheckPrereq2()
9368
9369   def _CheckPrereq2(self):
9370     """Check prerequisites, second part.
9371
9372     This function should always be part of CheckPrereq. It was separated and is
9373     now called from Exec because during node evacuation iallocator was only
9374     called with an unmodified cluster model, not taking planned changes into
9375     account.
9376
9377     """
9378     instance = self.instance
9379     secondary_node = instance.secondary_nodes[0]
9380
9381     if self.iallocator_name is None:
9382       remote_node = self.remote_node
9383     else:
9384       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9385                                        instance.name, instance.secondary_nodes)
9386
9387     if remote_node is None:
9388       self.remote_node_info = None
9389     else:
9390       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9391              "Remote node '%s' is not locked" % remote_node
9392
9393       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9394       assert self.remote_node_info is not None, \
9395         "Cannot retrieve locked node %s" % remote_node
9396
9397     if remote_node == self.instance.primary_node:
9398       raise errors.OpPrereqError("The specified node is the primary node of"
9399                                  " the instance", errors.ECODE_INVAL)
9400
9401     if remote_node == secondary_node:
9402       raise errors.OpPrereqError("The specified node is already the"
9403                                  " secondary node of the instance",
9404                                  errors.ECODE_INVAL)
9405
9406     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9407                                     constants.REPLACE_DISK_CHG):
9408       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9409                                  errors.ECODE_INVAL)
9410
9411     if self.mode == constants.REPLACE_DISK_AUTO:
9412       if not self._CheckDisksActivated(instance):
9413         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9414                                    " first" % self.instance_name,
9415                                    errors.ECODE_STATE)
9416       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9417       faulty_secondary = self._FindFaultyDisks(secondary_node)
9418
9419       if faulty_primary and faulty_secondary:
9420         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9421                                    " one node and can not be repaired"
9422                                    " automatically" % self.instance_name,
9423                                    errors.ECODE_STATE)
9424
9425       if faulty_primary:
9426         self.disks = faulty_primary
9427         self.target_node = instance.primary_node
9428         self.other_node = secondary_node
9429         check_nodes = [self.target_node, self.other_node]
9430       elif faulty_secondary:
9431         self.disks = faulty_secondary
9432         self.target_node = secondary_node
9433         self.other_node = instance.primary_node
9434         check_nodes = [self.target_node, self.other_node]
9435       else:
9436         self.disks = []
9437         check_nodes = []
9438
9439     else:
9440       # Non-automatic modes
9441       if self.mode == constants.REPLACE_DISK_PRI:
9442         self.target_node = instance.primary_node
9443         self.other_node = secondary_node
9444         check_nodes = [self.target_node, self.other_node]
9445
9446       elif self.mode == constants.REPLACE_DISK_SEC:
9447         self.target_node = secondary_node
9448         self.other_node = instance.primary_node
9449         check_nodes = [self.target_node, self.other_node]
9450
9451       elif self.mode == constants.REPLACE_DISK_CHG:
9452         self.new_node = remote_node
9453         self.other_node = instance.primary_node
9454         self.target_node = secondary_node
9455         check_nodes = [self.new_node, self.other_node]
9456
9457         _CheckNodeNotDrained(self.lu, remote_node)
9458         _CheckNodeVmCapable(self.lu, remote_node)
9459
9460         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9461         assert old_node_info is not None
9462         if old_node_info.offline and not self.early_release:
9463           # doesn't make sense to delay the release
9464           self.early_release = True
9465           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9466                           " early-release mode", secondary_node)
9467
9468       else:
9469         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9470                                      self.mode)
9471
9472       # If not specified all disks should be replaced
9473       if not self.disks:
9474         self.disks = range(len(self.instance.disks))
9475
9476     for node in check_nodes:
9477       _CheckNodeOnline(self.lu, node)
9478
9479     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9480                                                           self.other_node,
9481                                                           self.target_node]
9482                               if node_name is not None)
9483
9484     # Release unneeded node locks
9485     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9486
9487     # Release any owned node group
9488     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9489       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9490
9491     # Check whether disks are valid
9492     for disk_idx in self.disks:
9493       instance.FindDisk(disk_idx)
9494
9495     # Get secondary node IP addresses
9496     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9497                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9498
9499   def Exec(self, feedback_fn):
9500     """Execute disk replacement.
9501
9502     This dispatches the disk replacement to the appropriate handler.
9503
9504     """
9505     if self.delay_iallocator:
9506       self._CheckPrereq2()
9507
9508     if __debug__:
9509       # Verify owned locks before starting operation
9510       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9511       assert set(owned_nodes) == set(self.node_secondary_ip), \
9512           ("Incorrect node locks, owning %s, expected %s" %
9513            (owned_nodes, self.node_secondary_ip.keys()))
9514
9515       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9516       assert list(owned_instances) == [self.instance_name], \
9517           "Instance '%s' not locked" % self.instance_name
9518
9519       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9520           "Should not own any node group lock at this point"
9521
9522     if not self.disks:
9523       feedback_fn("No disks need replacement")
9524       return
9525
9526     feedback_fn("Replacing disk(s) %s for %s" %
9527                 (utils.CommaJoin(self.disks), self.instance.name))
9528
9529     activate_disks = (not self.instance.admin_up)
9530
9531     # Activate the instance disks if we're replacing them on a down instance
9532     if activate_disks:
9533       _StartInstanceDisks(self.lu, self.instance, True)
9534
9535     try:
9536       # Should we replace the secondary node?
9537       if self.new_node is not None:
9538         fn = self._ExecDrbd8Secondary
9539       else:
9540         fn = self._ExecDrbd8DiskOnly
9541
9542       result = fn(feedback_fn)
9543     finally:
9544       # Deactivate the instance disks if we're replacing them on a
9545       # down instance
9546       if activate_disks:
9547         _SafeShutdownInstanceDisks(self.lu, self.instance)
9548
9549     if __debug__:
9550       # Verify owned locks
9551       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9552       nodes = frozenset(self.node_secondary_ip)
9553       assert ((self.early_release and not owned_nodes) or
9554               (not self.early_release and not (set(owned_nodes) - nodes))), \
9555         ("Not owning the correct locks, early_release=%s, owned=%r,"
9556          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9557
9558     return result
9559
9560   def _CheckVolumeGroup(self, nodes):
9561     self.lu.LogInfo("Checking volume groups")
9562
9563     vgname = self.cfg.GetVGName()
9564
9565     # Make sure volume group exists on all involved nodes
9566     results = self.rpc.call_vg_list(nodes)
9567     if not results:
9568       raise errors.OpExecError("Can't list volume groups on the nodes")
9569
9570     for node in nodes:
9571       res = results[node]
9572       res.Raise("Error checking node %s" % node)
9573       if vgname not in res.payload:
9574         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9575                                  (vgname, node))
9576
9577   def _CheckDisksExistence(self, nodes):
9578     # Check disk existence
9579     for idx, dev in enumerate(self.instance.disks):
9580       if idx not in self.disks:
9581         continue
9582
9583       for node in nodes:
9584         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9585         self.cfg.SetDiskID(dev, node)
9586
9587         result = self.rpc.call_blockdev_find(node, dev)
9588
9589         msg = result.fail_msg
9590         if msg or not result.payload:
9591           if not msg:
9592             msg = "disk not found"
9593           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9594                                    (idx, node, msg))
9595
9596   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9597     for idx, dev in enumerate(self.instance.disks):
9598       if idx not in self.disks:
9599         continue
9600
9601       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9602                       (idx, node_name))
9603
9604       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9605                                    ldisk=ldisk):
9606         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9607                                  " replace disks for instance %s" %
9608                                  (node_name, self.instance.name))
9609
9610   def _CreateNewStorage(self, node_name):
9611     """Create new storage on the primary or secondary node.
9612
9613     This is only used for same-node replaces, not for changing the
9614     secondary node, hence we don't want to modify the existing disk.
9615
9616     """
9617     iv_names = {}
9618
9619     for idx, dev in enumerate(self.instance.disks):
9620       if idx not in self.disks:
9621         continue
9622
9623       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9624
9625       self.cfg.SetDiskID(dev, node_name)
9626
9627       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9628       names = _GenerateUniqueNames(self.lu, lv_names)
9629
9630       vg_data = dev.children[0].logical_id[0]
9631       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9632                              logical_id=(vg_data, names[0]))
9633       vg_meta = dev.children[1].logical_id[0]
9634       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9635                              logical_id=(vg_meta, names[1]))
9636
9637       new_lvs = [lv_data, lv_meta]
9638       old_lvs = [child.Copy() for child in dev.children]
9639       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9640
9641       # we pass force_create=True to force the LVM creation
9642       for new_lv in new_lvs:
9643         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9644                         _GetInstanceInfoText(self.instance), False)
9645
9646     return iv_names
9647
9648   def _CheckDevices(self, node_name, iv_names):
9649     for name, (dev, _, _) in iv_names.iteritems():
9650       self.cfg.SetDiskID(dev, node_name)
9651
9652       result = self.rpc.call_blockdev_find(node_name, dev)
9653
9654       msg = result.fail_msg
9655       if msg or not result.payload:
9656         if not msg:
9657           msg = "disk not found"
9658         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9659                                  (name, msg))
9660
9661       if result.payload.is_degraded:
9662         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9663
9664   def _RemoveOldStorage(self, node_name, iv_names):
9665     for name, (_, old_lvs, _) in iv_names.iteritems():
9666       self.lu.LogInfo("Remove logical volumes for %s" % name)
9667
9668       for lv in old_lvs:
9669         self.cfg.SetDiskID(lv, node_name)
9670
9671         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9672         if msg:
9673           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9674                              hint="remove unused LVs manually")
9675
9676   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9677     """Replace a disk on the primary or secondary for DRBD 8.
9678
9679     The algorithm for replace is quite complicated:
9680
9681       1. for each disk to be replaced:
9682
9683         1. create new LVs on the target node with unique names
9684         1. detach old LVs from the drbd device
9685         1. rename old LVs to name_replaced.<time_t>
9686         1. rename new LVs to old LVs
9687         1. attach the new LVs (with the old names now) to the drbd device
9688
9689       1. wait for sync across all devices
9690
9691       1. for each modified disk:
9692
9693         1. remove old LVs (which have the name name_replaces.<time_t>)
9694
9695     Failures are not very well handled.
9696
9697     """
9698     steps_total = 6
9699
9700     # Step: check device activation
9701     self.lu.LogStep(1, steps_total, "Check device existence")
9702     self._CheckDisksExistence([self.other_node, self.target_node])
9703     self._CheckVolumeGroup([self.target_node, self.other_node])
9704
9705     # Step: check other node consistency
9706     self.lu.LogStep(2, steps_total, "Check peer consistency")
9707     self._CheckDisksConsistency(self.other_node,
9708                                 self.other_node == self.instance.primary_node,
9709                                 False)
9710
9711     # Step: create new storage
9712     self.lu.LogStep(3, steps_total, "Allocate new storage")
9713     iv_names = self._CreateNewStorage(self.target_node)
9714
9715     # Step: for each lv, detach+rename*2+attach
9716     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9717     for dev, old_lvs, new_lvs in iv_names.itervalues():
9718       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9719
9720       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9721                                                      old_lvs)
9722       result.Raise("Can't detach drbd from local storage on node"
9723                    " %s for device %s" % (self.target_node, dev.iv_name))
9724       #dev.children = []
9725       #cfg.Update(instance)
9726
9727       # ok, we created the new LVs, so now we know we have the needed
9728       # storage; as such, we proceed on the target node to rename
9729       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9730       # using the assumption that logical_id == physical_id (which in
9731       # turn is the unique_id on that node)
9732
9733       # FIXME(iustin): use a better name for the replaced LVs
9734       temp_suffix = int(time.time())
9735       ren_fn = lambda d, suff: (d.physical_id[0],
9736                                 d.physical_id[1] + "_replaced-%s" % suff)
9737
9738       # Build the rename list based on what LVs exist on the node
9739       rename_old_to_new = []
9740       for to_ren in old_lvs:
9741         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9742         if not result.fail_msg and result.payload:
9743           # device exists
9744           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9745
9746       self.lu.LogInfo("Renaming the old LVs on the target node")
9747       result = self.rpc.call_blockdev_rename(self.target_node,
9748                                              rename_old_to_new)
9749       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9750
9751       # Now we rename the new LVs to the old LVs
9752       self.lu.LogInfo("Renaming the new LVs on the target node")
9753       rename_new_to_old = [(new, old.physical_id)
9754                            for old, new in zip(old_lvs, new_lvs)]
9755       result = self.rpc.call_blockdev_rename(self.target_node,
9756                                              rename_new_to_old)
9757       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9758
9759       # Intermediate steps of in memory modifications
9760       for old, new in zip(old_lvs, new_lvs):
9761         new.logical_id = old.logical_id
9762         self.cfg.SetDiskID(new, self.target_node)
9763
9764       # We need to modify old_lvs so that removal later removes the
9765       # right LVs, not the newly added ones; note that old_lvs is a
9766       # copy here
9767       for disk in old_lvs:
9768         disk.logical_id = ren_fn(disk, temp_suffix)
9769         self.cfg.SetDiskID(disk, self.target_node)
9770
9771       # Now that the new lvs have the old name, we can add them to the device
9772       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9773       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9774                                                   new_lvs)
9775       msg = result.fail_msg
9776       if msg:
9777         for new_lv in new_lvs:
9778           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9779                                                new_lv).fail_msg
9780           if msg2:
9781             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9782                                hint=("cleanup manually the unused logical"
9783                                      "volumes"))
9784         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9785
9786     cstep = 5
9787     if self.early_release:
9788       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9789       cstep += 1
9790       self._RemoveOldStorage(self.target_node, iv_names)
9791       # WARNING: we release both node locks here, do not do other RPCs
9792       # than WaitForSync to the primary node
9793       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9794                     names=[self.target_node, self.other_node])
9795
9796     # Wait for sync
9797     # This can fail as the old devices are degraded and _WaitForSync
9798     # does a combined result over all disks, so we don't check its return value
9799     self.lu.LogStep(cstep, steps_total, "Sync devices")
9800     cstep += 1
9801     _WaitForSync(self.lu, self.instance)
9802
9803     # Check all devices manually
9804     self._CheckDevices(self.instance.primary_node, iv_names)
9805
9806     # Step: remove old storage
9807     if not self.early_release:
9808       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9809       cstep += 1
9810       self._RemoveOldStorage(self.target_node, iv_names)
9811
9812   def _ExecDrbd8Secondary(self, feedback_fn):
9813     """Replace the secondary node for DRBD 8.
9814
9815     The algorithm for replace is quite complicated:
9816       - for all disks of the instance:
9817         - create new LVs on the new node with same names
9818         - shutdown the drbd device on the old secondary
9819         - disconnect the drbd network on the primary
9820         - create the drbd device on the new secondary
9821         - network attach the drbd on the primary, using an artifice:
9822           the drbd code for Attach() will connect to the network if it
9823           finds a device which is connected to the good local disks but
9824           not network enabled
9825       - wait for sync across all devices
9826       - remove all disks from the old secondary
9827
9828     Failures are not very well handled.
9829
9830     """
9831     steps_total = 6
9832
9833     # Step: check device activation
9834     self.lu.LogStep(1, steps_total, "Check device existence")
9835     self._CheckDisksExistence([self.instance.primary_node])
9836     self._CheckVolumeGroup([self.instance.primary_node])
9837
9838     # Step: check other node consistency
9839     self.lu.LogStep(2, steps_total, "Check peer consistency")
9840     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9841
9842     # Step: create new storage
9843     self.lu.LogStep(3, steps_total, "Allocate new storage")
9844     for idx, dev in enumerate(self.instance.disks):
9845       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9846                       (self.new_node, idx))
9847       # we pass force_create=True to force LVM creation
9848       for new_lv in dev.children:
9849         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9850                         _GetInstanceInfoText(self.instance), False)
9851
9852     # Step 4: dbrd minors and drbd setups changes
9853     # after this, we must manually remove the drbd minors on both the
9854     # error and the success paths
9855     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9856     minors = self.cfg.AllocateDRBDMinor([self.new_node
9857                                          for dev in self.instance.disks],
9858                                         self.instance.name)
9859     logging.debug("Allocated minors %r", minors)
9860
9861     iv_names = {}
9862     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9863       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9864                       (self.new_node, idx))
9865       # create new devices on new_node; note that we create two IDs:
9866       # one without port, so the drbd will be activated without
9867       # networking information on the new node at this stage, and one
9868       # with network, for the latter activation in step 4
9869       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9870       if self.instance.primary_node == o_node1:
9871         p_minor = o_minor1
9872       else:
9873         assert self.instance.primary_node == o_node2, "Three-node instance?"
9874         p_minor = o_minor2
9875
9876       new_alone_id = (self.instance.primary_node, self.new_node, None,
9877                       p_minor, new_minor, o_secret)
9878       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9879                     p_minor, new_minor, o_secret)
9880
9881       iv_names[idx] = (dev, dev.children, new_net_id)
9882       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9883                     new_net_id)
9884       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9885                               logical_id=new_alone_id,
9886                               children=dev.children,
9887                               size=dev.size)
9888       try:
9889         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9890                               _GetInstanceInfoText(self.instance), False)
9891       except errors.GenericError:
9892         self.cfg.ReleaseDRBDMinors(self.instance.name)
9893         raise
9894
9895     # We have new devices, shutdown the drbd on the old secondary
9896     for idx, dev in enumerate(self.instance.disks):
9897       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9898       self.cfg.SetDiskID(dev, self.target_node)
9899       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9900       if msg:
9901         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9902                            "node: %s" % (idx, msg),
9903                            hint=("Please cleanup this device manually as"
9904                                  " soon as possible"))
9905
9906     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9907     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9908                                                self.node_secondary_ip,
9909                                                self.instance.disks)\
9910                                               [self.instance.primary_node]
9911
9912     msg = result.fail_msg
9913     if msg:
9914       # detaches didn't succeed (unlikely)
9915       self.cfg.ReleaseDRBDMinors(self.instance.name)
9916       raise errors.OpExecError("Can't detach the disks from the network on"
9917                                " old node: %s" % (msg,))
9918
9919     # if we managed to detach at least one, we update all the disks of
9920     # the instance to point to the new secondary
9921     self.lu.LogInfo("Updating instance configuration")
9922     for dev, _, new_logical_id in iv_names.itervalues():
9923       dev.logical_id = new_logical_id
9924       self.cfg.SetDiskID(dev, self.instance.primary_node)
9925
9926     self.cfg.Update(self.instance, feedback_fn)
9927
9928     # and now perform the drbd attach
9929     self.lu.LogInfo("Attaching primary drbds to new secondary"
9930                     " (standalone => connected)")
9931     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9932                                             self.new_node],
9933                                            self.node_secondary_ip,
9934                                            self.instance.disks,
9935                                            self.instance.name,
9936                                            False)
9937     for to_node, to_result in result.items():
9938       msg = to_result.fail_msg
9939       if msg:
9940         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9941                            to_node, msg,
9942                            hint=("please do a gnt-instance info to see the"
9943                                  " status of disks"))
9944     cstep = 5
9945     if self.early_release:
9946       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9947       cstep += 1
9948       self._RemoveOldStorage(self.target_node, iv_names)
9949       # WARNING: we release all node locks here, do not do other RPCs
9950       # than WaitForSync to the primary node
9951       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9952                     names=[self.instance.primary_node,
9953                            self.target_node,
9954                            self.new_node])
9955
9956     # Wait for sync
9957     # This can fail as the old devices are degraded and _WaitForSync
9958     # does a combined result over all disks, so we don't check its return value
9959     self.lu.LogStep(cstep, steps_total, "Sync devices")
9960     cstep += 1
9961     _WaitForSync(self.lu, self.instance)
9962
9963     # Check all devices manually
9964     self._CheckDevices(self.instance.primary_node, iv_names)
9965
9966     # Step: remove old storage
9967     if not self.early_release:
9968       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9969       self._RemoveOldStorage(self.target_node, iv_names)
9970
9971
9972 class LURepairNodeStorage(NoHooksLU):
9973   """Repairs the volume group on a node.
9974
9975   """
9976   REQ_BGL = False
9977
9978   def CheckArguments(self):
9979     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9980
9981     storage_type = self.op.storage_type
9982
9983     if (constants.SO_FIX_CONSISTENCY not in
9984         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9985       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9986                                  " repaired" % storage_type,
9987                                  errors.ECODE_INVAL)
9988
9989   def ExpandNames(self):
9990     self.needed_locks = {
9991       locking.LEVEL_NODE: [self.op.node_name],
9992       }
9993
9994   def _CheckFaultyDisks(self, instance, node_name):
9995     """Ensure faulty disks abort the opcode or at least warn."""
9996     try:
9997       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9998                                   node_name, True):
9999         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10000                                    " node '%s'" % (instance.name, node_name),
10001                                    errors.ECODE_STATE)
10002     except errors.OpPrereqError, err:
10003       if self.op.ignore_consistency:
10004         self.proc.LogWarning(str(err.args[0]))
10005       else:
10006         raise
10007
10008   def CheckPrereq(self):
10009     """Check prerequisites.
10010
10011     """
10012     # Check whether any instance on this node has faulty disks
10013     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10014       if not inst.admin_up:
10015         continue
10016       check_nodes = set(inst.all_nodes)
10017       check_nodes.discard(self.op.node_name)
10018       for inst_node_name in check_nodes:
10019         self._CheckFaultyDisks(inst, inst_node_name)
10020
10021   def Exec(self, feedback_fn):
10022     feedback_fn("Repairing storage unit '%s' on %s ..." %
10023                 (self.op.name, self.op.node_name))
10024
10025     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10026     result = self.rpc.call_storage_execute(self.op.node_name,
10027                                            self.op.storage_type, st_args,
10028                                            self.op.name,
10029                                            constants.SO_FIX_CONSISTENCY)
10030     result.Raise("Failed to repair storage unit '%s' on %s" %
10031                  (self.op.name, self.op.node_name))
10032
10033
10034 class LUNodeEvacuate(NoHooksLU):
10035   """Evacuates instances off a list of nodes.
10036
10037   """
10038   REQ_BGL = False
10039
10040   def CheckArguments(self):
10041     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10042
10043   def ExpandNames(self):
10044     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10045
10046     if self.op.remote_node is not None:
10047       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10048       assert self.op.remote_node
10049
10050       if self.op.remote_node == self.op.node_name:
10051         raise errors.OpPrereqError("Can not use evacuated node as a new"
10052                                    " secondary node", errors.ECODE_INVAL)
10053
10054       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10055         raise errors.OpPrereqError("Without the use of an iallocator only"
10056                                    " secondary instances can be evacuated",
10057                                    errors.ECODE_INVAL)
10058
10059     # Declare locks
10060     self.share_locks = _ShareAll()
10061     self.needed_locks = {
10062       locking.LEVEL_INSTANCE: [],
10063       locking.LEVEL_NODEGROUP: [],
10064       locking.LEVEL_NODE: [],
10065       }
10066
10067     if self.op.remote_node is None:
10068       # Iallocator will choose any node(s) in the same group
10069       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10070     else:
10071       group_nodes = frozenset([self.op.remote_node])
10072
10073     # Determine nodes to be locked
10074     self.lock_nodes = set([self.op.node_name]) | group_nodes
10075
10076   def _DetermineInstances(self):
10077     """Builds list of instances to operate on.
10078
10079     """
10080     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10081
10082     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10083       # Primary instances only
10084       inst_fn = _GetNodePrimaryInstances
10085       assert self.op.remote_node is None, \
10086         "Evacuating primary instances requires iallocator"
10087     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10088       # Secondary instances only
10089       inst_fn = _GetNodeSecondaryInstances
10090     else:
10091       # All instances
10092       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10093       inst_fn = _GetNodeInstances
10094
10095     return inst_fn(self.cfg, self.op.node_name)
10096
10097   def DeclareLocks(self, level):
10098     if level == locking.LEVEL_INSTANCE:
10099       # Lock instances optimistically, needs verification once node and group
10100       # locks have been acquired
10101       self.needed_locks[locking.LEVEL_INSTANCE] = \
10102         set(i.name for i in self._DetermineInstances())
10103
10104     elif level == locking.LEVEL_NODEGROUP:
10105       # Lock node groups optimistically, needs verification once nodes have
10106       # been acquired
10107       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10108         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10109
10110     elif level == locking.LEVEL_NODE:
10111       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10112
10113   def CheckPrereq(self):
10114     # Verify locks
10115     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10116     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10117     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10118
10119     assert owned_nodes == self.lock_nodes
10120
10121     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10122     if owned_groups != wanted_groups:
10123       raise errors.OpExecError("Node groups changed since locks were acquired,"
10124                                " current groups are '%s', used to be '%s'" %
10125                                (utils.CommaJoin(wanted_groups),
10126                                 utils.CommaJoin(owned_groups)))
10127
10128     # Determine affected instances
10129     self.instances = self._DetermineInstances()
10130     self.instance_names = [i.name for i in self.instances]
10131
10132     if set(self.instance_names) != owned_instances:
10133       raise errors.OpExecError("Instances on node '%s' changed since locks"
10134                                " were acquired, current instances are '%s',"
10135                                " used to be '%s'" %
10136                                (self.op.node_name,
10137                                 utils.CommaJoin(self.instance_names),
10138                                 utils.CommaJoin(owned_instances)))
10139
10140     if self.instance_names:
10141       self.LogInfo("Evacuating instances from node '%s': %s",
10142                    self.op.node_name,
10143                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10144     else:
10145       self.LogInfo("No instances to evacuate from node '%s'",
10146                    self.op.node_name)
10147
10148     if self.op.remote_node is not None:
10149       for i in self.instances:
10150         if i.primary_node == self.op.remote_node:
10151           raise errors.OpPrereqError("Node %s is the primary node of"
10152                                      " instance %s, cannot use it as"
10153                                      " secondary" %
10154                                      (self.op.remote_node, i.name),
10155                                      errors.ECODE_INVAL)
10156
10157   def Exec(self, feedback_fn):
10158     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10159
10160     if not self.instance_names:
10161       # No instances to evacuate
10162       jobs = []
10163
10164     elif self.op.iallocator is not None:
10165       # TODO: Implement relocation to other group
10166       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10167                        evac_mode=self.op.mode,
10168                        instances=list(self.instance_names))
10169
10170       ial.Run(self.op.iallocator)
10171
10172       if not ial.success:
10173         raise errors.OpPrereqError("Can't compute node evacuation using"
10174                                    " iallocator '%s': %s" %
10175                                    (self.op.iallocator, ial.info),
10176                                    errors.ECODE_NORES)
10177
10178       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10179
10180     elif self.op.remote_node is not None:
10181       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10182       jobs = [
10183         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10184                                         remote_node=self.op.remote_node,
10185                                         disks=[],
10186                                         mode=constants.REPLACE_DISK_CHG,
10187                                         early_release=self.op.early_release)]
10188         for instance_name in self.instance_names
10189         ]
10190
10191     else:
10192       raise errors.ProgrammerError("No iallocator or remote node")
10193
10194     return ResultWithJobs(jobs)
10195
10196
10197 def _SetOpEarlyRelease(early_release, op):
10198   """Sets C{early_release} flag on opcodes if available.
10199
10200   """
10201   try:
10202     op.early_release = early_release
10203   except AttributeError:
10204     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10205
10206   return op
10207
10208
10209 def _NodeEvacDest(use_nodes, group, nodes):
10210   """Returns group or nodes depending on caller's choice.
10211
10212   """
10213   if use_nodes:
10214     return utils.CommaJoin(nodes)
10215   else:
10216     return group
10217
10218
10219 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10220   """Unpacks the result of change-group and node-evacuate iallocator requests.
10221
10222   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10223   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10224
10225   @type lu: L{LogicalUnit}
10226   @param lu: Logical unit instance
10227   @type alloc_result: tuple/list
10228   @param alloc_result: Result from iallocator
10229   @type early_release: bool
10230   @param early_release: Whether to release locks early if possible
10231   @type use_nodes: bool
10232   @param use_nodes: Whether to display node names instead of groups
10233
10234   """
10235   (moved, failed, jobs) = alloc_result
10236
10237   if failed:
10238     lu.LogWarning("Unable to evacuate instances %s",
10239                   utils.CommaJoin("%s (%s)" % (name, reason)
10240                                   for (name, reason) in failed))
10241
10242   if moved:
10243     lu.LogInfo("Instances to be moved: %s",
10244                utils.CommaJoin("%s (to %s)" %
10245                                (name, _NodeEvacDest(use_nodes, group, nodes))
10246                                for (name, group, nodes) in moved))
10247
10248   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10249               map(opcodes.OpCode.LoadOpCode, ops))
10250           for ops in jobs]
10251
10252
10253 class LUInstanceGrowDisk(LogicalUnit):
10254   """Grow a disk of an instance.
10255
10256   """
10257   HPATH = "disk-grow"
10258   HTYPE = constants.HTYPE_INSTANCE
10259   REQ_BGL = False
10260
10261   def ExpandNames(self):
10262     self._ExpandAndLockInstance()
10263     self.needed_locks[locking.LEVEL_NODE] = []
10264     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10265
10266   def DeclareLocks(self, level):
10267     if level == locking.LEVEL_NODE:
10268       self._LockInstancesNodes()
10269
10270   def BuildHooksEnv(self):
10271     """Build hooks env.
10272
10273     This runs on the master, the primary and all the secondaries.
10274
10275     """
10276     env = {
10277       "DISK": self.op.disk,
10278       "AMOUNT": self.op.amount,
10279       }
10280     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10281     return env
10282
10283   def BuildHooksNodes(self):
10284     """Build hooks nodes.
10285
10286     """
10287     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10288     return (nl, nl)
10289
10290   def CheckPrereq(self):
10291     """Check prerequisites.
10292
10293     This checks that the instance is in the cluster.
10294
10295     """
10296     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10297     assert instance is not None, \
10298       "Cannot retrieve locked instance %s" % self.op.instance_name
10299     nodenames = list(instance.all_nodes)
10300     for node in nodenames:
10301       _CheckNodeOnline(self, node)
10302
10303     self.instance = instance
10304
10305     if instance.disk_template not in constants.DTS_GROWABLE:
10306       raise errors.OpPrereqError("Instance's disk layout does not support"
10307                                  " growing", errors.ECODE_INVAL)
10308
10309     self.disk = instance.FindDisk(self.op.disk)
10310
10311     if instance.disk_template not in (constants.DT_FILE,
10312                                       constants.DT_SHARED_FILE):
10313       # TODO: check the free disk space for file, when that feature will be
10314       # supported
10315       _CheckNodesFreeDiskPerVG(self, nodenames,
10316                                self.disk.ComputeGrowth(self.op.amount))
10317
10318   def Exec(self, feedback_fn):
10319     """Execute disk grow.
10320
10321     """
10322     instance = self.instance
10323     disk = self.disk
10324
10325     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10326     if not disks_ok:
10327       raise errors.OpExecError("Cannot activate block device to grow")
10328
10329     # First run all grow ops in dry-run mode
10330     for node in instance.all_nodes:
10331       self.cfg.SetDiskID(disk, node)
10332       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10333       result.Raise("Grow request failed to node %s" % node)
10334
10335     # We know that (as far as we can test) operations across different
10336     # nodes will succeed, time to run it for real
10337     for node in instance.all_nodes:
10338       self.cfg.SetDiskID(disk, node)
10339       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10340       result.Raise("Grow request failed to node %s" % node)
10341
10342       # TODO: Rewrite code to work properly
10343       # DRBD goes into sync mode for a short amount of time after executing the
10344       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10345       # calling "resize" in sync mode fails. Sleeping for a short amount of
10346       # time is a work-around.
10347       time.sleep(5)
10348
10349     disk.RecordGrow(self.op.amount)
10350     self.cfg.Update(instance, feedback_fn)
10351     if self.op.wait_for_sync:
10352       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10353       if disk_abort:
10354         self.proc.LogWarning("Disk sync-ing has not returned a good"
10355                              " status; please check the instance")
10356       if not instance.admin_up:
10357         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10358     elif not instance.admin_up:
10359       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10360                            " not supposed to be running because no wait for"
10361                            " sync mode was requested")
10362
10363
10364 class LUInstanceQueryData(NoHooksLU):
10365   """Query runtime instance data.
10366
10367   """
10368   REQ_BGL = False
10369
10370   def ExpandNames(self):
10371     self.needed_locks = {}
10372
10373     # Use locking if requested or when non-static information is wanted
10374     if not (self.op.static or self.op.use_locking):
10375       self.LogWarning("Non-static data requested, locks need to be acquired")
10376       self.op.use_locking = True
10377
10378     if self.op.instances or not self.op.use_locking:
10379       # Expand instance names right here
10380       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10381     else:
10382       # Will use acquired locks
10383       self.wanted_names = None
10384
10385     if self.op.use_locking:
10386       self.share_locks = _ShareAll()
10387
10388       if self.wanted_names is None:
10389         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10390       else:
10391         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10392
10393       self.needed_locks[locking.LEVEL_NODE] = []
10394       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10395
10396   def DeclareLocks(self, level):
10397     if self.op.use_locking and level == locking.LEVEL_NODE:
10398       self._LockInstancesNodes()
10399
10400   def CheckPrereq(self):
10401     """Check prerequisites.
10402
10403     This only checks the optional instance list against the existing names.
10404
10405     """
10406     if self.wanted_names is None:
10407       assert self.op.use_locking, "Locking was not used"
10408       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10409
10410     self.wanted_instances = \
10411         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10412
10413   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10414     """Returns the status of a block device
10415
10416     """
10417     if self.op.static or not node:
10418       return None
10419
10420     self.cfg.SetDiskID(dev, node)
10421
10422     result = self.rpc.call_blockdev_find(node, dev)
10423     if result.offline:
10424       return None
10425
10426     result.Raise("Can't compute disk status for %s" % instance_name)
10427
10428     status = result.payload
10429     if status is None:
10430       return None
10431
10432     return (status.dev_path, status.major, status.minor,
10433             status.sync_percent, status.estimated_time,
10434             status.is_degraded, status.ldisk_status)
10435
10436   def _ComputeDiskStatus(self, instance, snode, dev):
10437     """Compute block device status.
10438
10439     """
10440     if dev.dev_type in constants.LDS_DRBD:
10441       # we change the snode then (otherwise we use the one passed in)
10442       if dev.logical_id[0] == instance.primary_node:
10443         snode = dev.logical_id[1]
10444       else:
10445         snode = dev.logical_id[0]
10446
10447     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10448                                               instance.name, dev)
10449     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10450
10451     if dev.children:
10452       dev_children = map(compat.partial(self._ComputeDiskStatus,
10453                                         instance, snode),
10454                          dev.children)
10455     else:
10456       dev_children = []
10457
10458     return {
10459       "iv_name": dev.iv_name,
10460       "dev_type": dev.dev_type,
10461       "logical_id": dev.logical_id,
10462       "physical_id": dev.physical_id,
10463       "pstatus": dev_pstatus,
10464       "sstatus": dev_sstatus,
10465       "children": dev_children,
10466       "mode": dev.mode,
10467       "size": dev.size,
10468       }
10469
10470   def Exec(self, feedback_fn):
10471     """Gather and return data"""
10472     result = {}
10473
10474     cluster = self.cfg.GetClusterInfo()
10475
10476     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10477                                           for i in self.wanted_instances)
10478     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10479       if self.op.static or pnode.offline:
10480         remote_state = None
10481         if pnode.offline:
10482           self.LogWarning("Primary node %s is marked offline, returning static"
10483                           " information only for instance %s" %
10484                           (pnode.name, instance.name))
10485       else:
10486         remote_info = self.rpc.call_instance_info(instance.primary_node,
10487                                                   instance.name,
10488                                                   instance.hypervisor)
10489         remote_info.Raise("Error checking node %s" % instance.primary_node)
10490         remote_info = remote_info.payload
10491         if remote_info and "state" in remote_info:
10492           remote_state = "up"
10493         else:
10494           remote_state = "down"
10495
10496       if instance.admin_up:
10497         config_state = "up"
10498       else:
10499         config_state = "down"
10500
10501       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10502                   instance.disks)
10503
10504       result[instance.name] = {
10505         "name": instance.name,
10506         "config_state": config_state,
10507         "run_state": remote_state,
10508         "pnode": instance.primary_node,
10509         "snodes": instance.secondary_nodes,
10510         "os": instance.os,
10511         # this happens to be the same format used for hooks
10512         "nics": _NICListToTuple(self, instance.nics),
10513         "disk_template": instance.disk_template,
10514         "disks": disks,
10515         "hypervisor": instance.hypervisor,
10516         "network_port": instance.network_port,
10517         "hv_instance": instance.hvparams,
10518         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10519         "be_instance": instance.beparams,
10520         "be_actual": cluster.FillBE(instance),
10521         "os_instance": instance.osparams,
10522         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10523         "serial_no": instance.serial_no,
10524         "mtime": instance.mtime,
10525         "ctime": instance.ctime,
10526         "uuid": instance.uuid,
10527         }
10528
10529     return result
10530
10531
10532 class LUInstanceSetParams(LogicalUnit):
10533   """Modifies an instances's parameters.
10534
10535   """
10536   HPATH = "instance-modify"
10537   HTYPE = constants.HTYPE_INSTANCE
10538   REQ_BGL = False
10539
10540   def CheckArguments(self):
10541     if not (self.op.nics or self.op.disks or self.op.disk_template or
10542             self.op.hvparams or self.op.beparams or self.op.os_name):
10543       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10544
10545     if self.op.hvparams:
10546       _CheckGlobalHvParams(self.op.hvparams)
10547
10548     # Disk validation
10549     disk_addremove = 0
10550     for disk_op, disk_dict in self.op.disks:
10551       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10552       if disk_op == constants.DDM_REMOVE:
10553         disk_addremove += 1
10554         continue
10555       elif disk_op == constants.DDM_ADD:
10556         disk_addremove += 1
10557       else:
10558         if not isinstance(disk_op, int):
10559           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10560         if not isinstance(disk_dict, dict):
10561           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10562           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10563
10564       if disk_op == constants.DDM_ADD:
10565         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10566         if mode not in constants.DISK_ACCESS_SET:
10567           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10568                                      errors.ECODE_INVAL)
10569         size = disk_dict.get(constants.IDISK_SIZE, None)
10570         if size is None:
10571           raise errors.OpPrereqError("Required disk parameter size missing",
10572                                      errors.ECODE_INVAL)
10573         try:
10574           size = int(size)
10575         except (TypeError, ValueError), err:
10576           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10577                                      str(err), errors.ECODE_INVAL)
10578         disk_dict[constants.IDISK_SIZE] = size
10579       else:
10580         # modification of disk
10581         if constants.IDISK_SIZE in disk_dict:
10582           raise errors.OpPrereqError("Disk size change not possible, use"
10583                                      " grow-disk", errors.ECODE_INVAL)
10584
10585     if disk_addremove > 1:
10586       raise errors.OpPrereqError("Only one disk add or remove operation"
10587                                  " supported at a time", errors.ECODE_INVAL)
10588
10589     if self.op.disks and self.op.disk_template is not None:
10590       raise errors.OpPrereqError("Disk template conversion and other disk"
10591                                  " changes not supported at the same time",
10592                                  errors.ECODE_INVAL)
10593
10594     if (self.op.disk_template and
10595         self.op.disk_template in constants.DTS_INT_MIRROR and
10596         self.op.remote_node is None):
10597       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10598                                  " one requires specifying a secondary node",
10599                                  errors.ECODE_INVAL)
10600
10601     # NIC validation
10602     nic_addremove = 0
10603     for nic_op, nic_dict in self.op.nics:
10604       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10605       if nic_op == constants.DDM_REMOVE:
10606         nic_addremove += 1
10607         continue
10608       elif nic_op == constants.DDM_ADD:
10609         nic_addremove += 1
10610       else:
10611         if not isinstance(nic_op, int):
10612           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10613         if not isinstance(nic_dict, dict):
10614           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10615           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10616
10617       # nic_dict should be a dict
10618       nic_ip = nic_dict.get(constants.INIC_IP, None)
10619       if nic_ip is not None:
10620         if nic_ip.lower() == constants.VALUE_NONE:
10621           nic_dict[constants.INIC_IP] = None
10622         else:
10623           if not netutils.IPAddress.IsValid(nic_ip):
10624             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10625                                        errors.ECODE_INVAL)
10626
10627       nic_bridge = nic_dict.get("bridge", None)
10628       nic_link = nic_dict.get(constants.INIC_LINK, None)
10629       if nic_bridge and nic_link:
10630         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10631                                    " at the same time", errors.ECODE_INVAL)
10632       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10633         nic_dict["bridge"] = None
10634       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10635         nic_dict[constants.INIC_LINK] = None
10636
10637       if nic_op == constants.DDM_ADD:
10638         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10639         if nic_mac is None:
10640           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10641
10642       if constants.INIC_MAC in nic_dict:
10643         nic_mac = nic_dict[constants.INIC_MAC]
10644         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10645           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10646
10647         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10648           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10649                                      " modifying an existing nic",
10650                                      errors.ECODE_INVAL)
10651
10652     if nic_addremove > 1:
10653       raise errors.OpPrereqError("Only one NIC add or remove operation"
10654                                  " supported at a time", errors.ECODE_INVAL)
10655
10656   def ExpandNames(self):
10657     self._ExpandAndLockInstance()
10658     self.needed_locks[locking.LEVEL_NODE] = []
10659     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10660
10661   def DeclareLocks(self, level):
10662     if level == locking.LEVEL_NODE:
10663       self._LockInstancesNodes()
10664       if self.op.disk_template and self.op.remote_node:
10665         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10666         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10667
10668   def BuildHooksEnv(self):
10669     """Build hooks env.
10670
10671     This runs on the master, primary and secondaries.
10672
10673     """
10674     args = dict()
10675     if constants.BE_MEMORY in self.be_new:
10676       args["memory"] = self.be_new[constants.BE_MEMORY]
10677     if constants.BE_VCPUS in self.be_new:
10678       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10679     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10680     # information at all.
10681     if self.op.nics:
10682       args["nics"] = []
10683       nic_override = dict(self.op.nics)
10684       for idx, nic in enumerate(self.instance.nics):
10685         if idx in nic_override:
10686           this_nic_override = nic_override[idx]
10687         else:
10688           this_nic_override = {}
10689         if constants.INIC_IP in this_nic_override:
10690           ip = this_nic_override[constants.INIC_IP]
10691         else:
10692           ip = nic.ip
10693         if constants.INIC_MAC in this_nic_override:
10694           mac = this_nic_override[constants.INIC_MAC]
10695         else:
10696           mac = nic.mac
10697         if idx in self.nic_pnew:
10698           nicparams = self.nic_pnew[idx]
10699         else:
10700           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10701         mode = nicparams[constants.NIC_MODE]
10702         link = nicparams[constants.NIC_LINK]
10703         args["nics"].append((ip, mac, mode, link))
10704       if constants.DDM_ADD in nic_override:
10705         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10706         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10707         nicparams = self.nic_pnew[constants.DDM_ADD]
10708         mode = nicparams[constants.NIC_MODE]
10709         link = nicparams[constants.NIC_LINK]
10710         args["nics"].append((ip, mac, mode, link))
10711       elif constants.DDM_REMOVE in nic_override:
10712         del args["nics"][-1]
10713
10714     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10715     if self.op.disk_template:
10716       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10717
10718     return env
10719
10720   def BuildHooksNodes(self):
10721     """Build hooks nodes.
10722
10723     """
10724     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10725     return (nl, nl)
10726
10727   def CheckPrereq(self):
10728     """Check prerequisites.
10729
10730     This only checks the instance list against the existing names.
10731
10732     """
10733     # checking the new params on the primary/secondary nodes
10734
10735     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10736     cluster = self.cluster = self.cfg.GetClusterInfo()
10737     assert self.instance is not None, \
10738       "Cannot retrieve locked instance %s" % self.op.instance_name
10739     pnode = instance.primary_node
10740     nodelist = list(instance.all_nodes)
10741
10742     # OS change
10743     if self.op.os_name and not self.op.force:
10744       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10745                       self.op.force_variant)
10746       instance_os = self.op.os_name
10747     else:
10748       instance_os = instance.os
10749
10750     if self.op.disk_template:
10751       if instance.disk_template == self.op.disk_template:
10752         raise errors.OpPrereqError("Instance already has disk template %s" %
10753                                    instance.disk_template, errors.ECODE_INVAL)
10754
10755       if (instance.disk_template,
10756           self.op.disk_template) not in self._DISK_CONVERSIONS:
10757         raise errors.OpPrereqError("Unsupported disk template conversion from"
10758                                    " %s to %s" % (instance.disk_template,
10759                                                   self.op.disk_template),
10760                                    errors.ECODE_INVAL)
10761       _CheckInstanceDown(self, instance, "cannot change disk template")
10762       if self.op.disk_template in constants.DTS_INT_MIRROR:
10763         if self.op.remote_node == pnode:
10764           raise errors.OpPrereqError("Given new secondary node %s is the same"
10765                                      " as the primary node of the instance" %
10766                                      self.op.remote_node, errors.ECODE_STATE)
10767         _CheckNodeOnline(self, self.op.remote_node)
10768         _CheckNodeNotDrained(self, self.op.remote_node)
10769         # FIXME: here we assume that the old instance type is DT_PLAIN
10770         assert instance.disk_template == constants.DT_PLAIN
10771         disks = [{constants.IDISK_SIZE: d.size,
10772                   constants.IDISK_VG: d.logical_id[0]}
10773                  for d in instance.disks]
10774         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10775         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10776
10777     # hvparams processing
10778     if self.op.hvparams:
10779       hv_type = instance.hypervisor
10780       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10781       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10782       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10783
10784       # local check
10785       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10786       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10787       self.hv_new = hv_new # the new actual values
10788       self.hv_inst = i_hvdict # the new dict (without defaults)
10789     else:
10790       self.hv_new = self.hv_inst = {}
10791
10792     # beparams processing
10793     if self.op.beparams:
10794       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10795                                    use_none=True)
10796       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10797       be_new = cluster.SimpleFillBE(i_bedict)
10798       self.be_new = be_new # the new actual values
10799       self.be_inst = i_bedict # the new dict (without defaults)
10800     else:
10801       self.be_new = self.be_inst = {}
10802     be_old = cluster.FillBE(instance)
10803
10804     # osparams processing
10805     if self.op.osparams:
10806       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10807       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10808       self.os_inst = i_osdict # the new dict (without defaults)
10809     else:
10810       self.os_inst = {}
10811
10812     self.warn = []
10813
10814     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10815         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10816       mem_check_list = [pnode]
10817       if be_new[constants.BE_AUTO_BALANCE]:
10818         # either we changed auto_balance to yes or it was from before
10819         mem_check_list.extend(instance.secondary_nodes)
10820       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10821                                                   instance.hypervisor)
10822       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10823                                          instance.hypervisor)
10824       pninfo = nodeinfo[pnode]
10825       msg = pninfo.fail_msg
10826       if msg:
10827         # Assume the primary node is unreachable and go ahead
10828         self.warn.append("Can't get info from primary node %s: %s" %
10829                          (pnode,  msg))
10830       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10831         self.warn.append("Node data from primary node %s doesn't contain"
10832                          " free memory information" % pnode)
10833       elif instance_info.fail_msg:
10834         self.warn.append("Can't get instance runtime information: %s" %
10835                         instance_info.fail_msg)
10836       else:
10837         if instance_info.payload:
10838           current_mem = int(instance_info.payload["memory"])
10839         else:
10840           # Assume instance not running
10841           # (there is a slight race condition here, but it's not very probable,
10842           # and we have no other way to check)
10843           current_mem = 0
10844         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10845                     pninfo.payload["memory_free"])
10846         if miss_mem > 0:
10847           raise errors.OpPrereqError("This change will prevent the instance"
10848                                      " from starting, due to %d MB of memory"
10849                                      " missing on its primary node" % miss_mem,
10850                                      errors.ECODE_NORES)
10851
10852       if be_new[constants.BE_AUTO_BALANCE]:
10853         for node, nres in nodeinfo.items():
10854           if node not in instance.secondary_nodes:
10855             continue
10856           nres.Raise("Can't get info from secondary node %s" % node,
10857                      prereq=True, ecode=errors.ECODE_STATE)
10858           if not isinstance(nres.payload.get("memory_free", None), int):
10859             raise errors.OpPrereqError("Secondary node %s didn't return free"
10860                                        " memory information" % node,
10861                                        errors.ECODE_STATE)
10862           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10863             raise errors.OpPrereqError("This change will prevent the instance"
10864                                        " from failover to its secondary node"
10865                                        " %s, due to not enough memory" % node,
10866                                        errors.ECODE_STATE)
10867
10868     # NIC processing
10869     self.nic_pnew = {}
10870     self.nic_pinst = {}
10871     for nic_op, nic_dict in self.op.nics:
10872       if nic_op == constants.DDM_REMOVE:
10873         if not instance.nics:
10874           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10875                                      errors.ECODE_INVAL)
10876         continue
10877       if nic_op != constants.DDM_ADD:
10878         # an existing nic
10879         if not instance.nics:
10880           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10881                                      " no NICs" % nic_op,
10882                                      errors.ECODE_INVAL)
10883         if nic_op < 0 or nic_op >= len(instance.nics):
10884           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10885                                      " are 0 to %d" %
10886                                      (nic_op, len(instance.nics) - 1),
10887                                      errors.ECODE_INVAL)
10888         old_nic_params = instance.nics[nic_op].nicparams
10889         old_nic_ip = instance.nics[nic_op].ip
10890       else:
10891         old_nic_params = {}
10892         old_nic_ip = None
10893
10894       update_params_dict = dict([(key, nic_dict[key])
10895                                  for key in constants.NICS_PARAMETERS
10896                                  if key in nic_dict])
10897
10898       if "bridge" in nic_dict:
10899         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10900
10901       new_nic_params = _GetUpdatedParams(old_nic_params,
10902                                          update_params_dict)
10903       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10904       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10905       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10906       self.nic_pinst[nic_op] = new_nic_params
10907       self.nic_pnew[nic_op] = new_filled_nic_params
10908       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10909
10910       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10911         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10912         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10913         if msg:
10914           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10915           if self.op.force:
10916             self.warn.append(msg)
10917           else:
10918             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10919       if new_nic_mode == constants.NIC_MODE_ROUTED:
10920         if constants.INIC_IP in nic_dict:
10921           nic_ip = nic_dict[constants.INIC_IP]
10922         else:
10923           nic_ip = old_nic_ip
10924         if nic_ip is None:
10925           raise errors.OpPrereqError("Cannot set the nic ip to None"
10926                                      " on a routed nic", errors.ECODE_INVAL)
10927       if constants.INIC_MAC in nic_dict:
10928         nic_mac = nic_dict[constants.INIC_MAC]
10929         if nic_mac is None:
10930           raise errors.OpPrereqError("Cannot set the nic mac to None",
10931                                      errors.ECODE_INVAL)
10932         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10933           # otherwise generate the mac
10934           nic_dict[constants.INIC_MAC] = \
10935             self.cfg.GenerateMAC(self.proc.GetECId())
10936         else:
10937           # or validate/reserve the current one
10938           try:
10939             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10940           except errors.ReservationError:
10941             raise errors.OpPrereqError("MAC address %s already in use"
10942                                        " in cluster" % nic_mac,
10943                                        errors.ECODE_NOTUNIQUE)
10944
10945     # DISK processing
10946     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10947       raise errors.OpPrereqError("Disk operations not supported for"
10948                                  " diskless instances",
10949                                  errors.ECODE_INVAL)
10950     for disk_op, _ in self.op.disks:
10951       if disk_op == constants.DDM_REMOVE:
10952         if len(instance.disks) == 1:
10953           raise errors.OpPrereqError("Cannot remove the last disk of"
10954                                      " an instance", errors.ECODE_INVAL)
10955         _CheckInstanceDown(self, instance, "cannot remove disks")
10956
10957       if (disk_op == constants.DDM_ADD and
10958           len(instance.disks) >= constants.MAX_DISKS):
10959         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10960                                    " add more" % constants.MAX_DISKS,
10961                                    errors.ECODE_STATE)
10962       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10963         # an existing disk
10964         if disk_op < 0 or disk_op >= len(instance.disks):
10965           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10966                                      " are 0 to %d" %
10967                                      (disk_op, len(instance.disks)),
10968                                      errors.ECODE_INVAL)
10969
10970     return
10971
10972   def _ConvertPlainToDrbd(self, feedback_fn):
10973     """Converts an instance from plain to drbd.
10974
10975     """
10976     feedback_fn("Converting template to drbd")
10977     instance = self.instance
10978     pnode = instance.primary_node
10979     snode = self.op.remote_node
10980
10981     # create a fake disk info for _GenerateDiskTemplate
10982     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10983                   constants.IDISK_VG: d.logical_id[0]}
10984                  for d in instance.disks]
10985     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10986                                       instance.name, pnode, [snode],
10987                                       disk_info, None, None, 0, feedback_fn)
10988     info = _GetInstanceInfoText(instance)
10989     feedback_fn("Creating aditional volumes...")
10990     # first, create the missing data and meta devices
10991     for disk in new_disks:
10992       # unfortunately this is... not too nice
10993       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10994                             info, True)
10995       for child in disk.children:
10996         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10997     # at this stage, all new LVs have been created, we can rename the
10998     # old ones
10999     feedback_fn("Renaming original volumes...")
11000     rename_list = [(o, n.children[0].logical_id)
11001                    for (o, n) in zip(instance.disks, new_disks)]
11002     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11003     result.Raise("Failed to rename original LVs")
11004
11005     feedback_fn("Initializing DRBD devices...")
11006     # all child devices are in place, we can now create the DRBD devices
11007     for disk in new_disks:
11008       for node in [pnode, snode]:
11009         f_create = node == pnode
11010         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11011
11012     # at this point, the instance has been modified
11013     instance.disk_template = constants.DT_DRBD8
11014     instance.disks = new_disks
11015     self.cfg.Update(instance, feedback_fn)
11016
11017     # disks are created, waiting for sync
11018     disk_abort = not _WaitForSync(self, instance,
11019                                   oneshot=not self.op.wait_for_sync)
11020     if disk_abort:
11021       raise errors.OpExecError("There are some degraded disks for"
11022                                " this instance, please cleanup manually")
11023
11024   def _ConvertDrbdToPlain(self, feedback_fn):
11025     """Converts an instance from drbd to plain.
11026
11027     """
11028     instance = self.instance
11029     assert len(instance.secondary_nodes) == 1
11030     pnode = instance.primary_node
11031     snode = instance.secondary_nodes[0]
11032     feedback_fn("Converting template to plain")
11033
11034     old_disks = instance.disks
11035     new_disks = [d.children[0] for d in old_disks]
11036
11037     # copy over size and mode
11038     for parent, child in zip(old_disks, new_disks):
11039       child.size = parent.size
11040       child.mode = parent.mode
11041
11042     # update instance structure
11043     instance.disks = new_disks
11044     instance.disk_template = constants.DT_PLAIN
11045     self.cfg.Update(instance, feedback_fn)
11046
11047     feedback_fn("Removing volumes on the secondary node...")
11048     for disk in old_disks:
11049       self.cfg.SetDiskID(disk, snode)
11050       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11051       if msg:
11052         self.LogWarning("Could not remove block device %s on node %s,"
11053                         " continuing anyway: %s", disk.iv_name, snode, msg)
11054
11055     feedback_fn("Removing unneeded volumes on the primary node...")
11056     for idx, disk in enumerate(old_disks):
11057       meta = disk.children[1]
11058       self.cfg.SetDiskID(meta, pnode)
11059       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11060       if msg:
11061         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11062                         " continuing anyway: %s", idx, pnode, msg)
11063
11064   def Exec(self, feedback_fn):
11065     """Modifies an instance.
11066
11067     All parameters take effect only at the next restart of the instance.
11068
11069     """
11070     # Process here the warnings from CheckPrereq, as we don't have a
11071     # feedback_fn there.
11072     for warn in self.warn:
11073       feedback_fn("WARNING: %s" % warn)
11074
11075     result = []
11076     instance = self.instance
11077     # disk changes
11078     for disk_op, disk_dict in self.op.disks:
11079       if disk_op == constants.DDM_REMOVE:
11080         # remove the last disk
11081         device = instance.disks.pop()
11082         device_idx = len(instance.disks)
11083         for node, disk in device.ComputeNodeTree(instance.primary_node):
11084           self.cfg.SetDiskID(disk, node)
11085           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11086           if msg:
11087             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11088                             " continuing anyway", device_idx, node, msg)
11089         result.append(("disk/%d" % device_idx, "remove"))
11090       elif disk_op == constants.DDM_ADD:
11091         # add a new disk
11092         if instance.disk_template in (constants.DT_FILE,
11093                                         constants.DT_SHARED_FILE):
11094           file_driver, file_path = instance.disks[0].logical_id
11095           file_path = os.path.dirname(file_path)
11096         else:
11097           file_driver = file_path = None
11098         disk_idx_base = len(instance.disks)
11099         new_disk = _GenerateDiskTemplate(self,
11100                                          instance.disk_template,
11101                                          instance.name, instance.primary_node,
11102                                          instance.secondary_nodes,
11103                                          [disk_dict],
11104                                          file_path,
11105                                          file_driver,
11106                                          disk_idx_base, feedback_fn)[0]
11107         instance.disks.append(new_disk)
11108         info = _GetInstanceInfoText(instance)
11109
11110         logging.info("Creating volume %s for instance %s",
11111                      new_disk.iv_name, instance.name)
11112         # Note: this needs to be kept in sync with _CreateDisks
11113         #HARDCODE
11114         for node in instance.all_nodes:
11115           f_create = node == instance.primary_node
11116           try:
11117             _CreateBlockDev(self, node, instance, new_disk,
11118                             f_create, info, f_create)
11119           except errors.OpExecError, err:
11120             self.LogWarning("Failed to create volume %s (%s) on"
11121                             " node %s: %s",
11122                             new_disk.iv_name, new_disk, node, err)
11123         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11124                        (new_disk.size, new_disk.mode)))
11125       else:
11126         # change a given disk
11127         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11128         result.append(("disk.mode/%d" % disk_op,
11129                        disk_dict[constants.IDISK_MODE]))
11130
11131     if self.op.disk_template:
11132       r_shut = _ShutdownInstanceDisks(self, instance)
11133       if not r_shut:
11134         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11135                                  " proceed with disk template conversion")
11136       mode = (instance.disk_template, self.op.disk_template)
11137       try:
11138         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11139       except:
11140         self.cfg.ReleaseDRBDMinors(instance.name)
11141         raise
11142       result.append(("disk_template", self.op.disk_template))
11143
11144     # NIC changes
11145     for nic_op, nic_dict in self.op.nics:
11146       if nic_op == constants.DDM_REMOVE:
11147         # remove the last nic
11148         del instance.nics[-1]
11149         result.append(("nic.%d" % len(instance.nics), "remove"))
11150       elif nic_op == constants.DDM_ADD:
11151         # mac and bridge should be set, by now
11152         mac = nic_dict[constants.INIC_MAC]
11153         ip = nic_dict.get(constants.INIC_IP, None)
11154         nicparams = self.nic_pinst[constants.DDM_ADD]
11155         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11156         instance.nics.append(new_nic)
11157         result.append(("nic.%d" % (len(instance.nics) - 1),
11158                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11159                        (new_nic.mac, new_nic.ip,
11160                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11161                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11162                        )))
11163       else:
11164         for key in (constants.INIC_MAC, constants.INIC_IP):
11165           if key in nic_dict:
11166             setattr(instance.nics[nic_op], key, nic_dict[key])
11167         if nic_op in self.nic_pinst:
11168           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11169         for key, val in nic_dict.iteritems():
11170           result.append(("nic.%s/%d" % (key, nic_op), val))
11171
11172     # hvparams changes
11173     if self.op.hvparams:
11174       instance.hvparams = self.hv_inst
11175       for key, val in self.op.hvparams.iteritems():
11176         result.append(("hv/%s" % key, val))
11177
11178     # beparams changes
11179     if self.op.beparams:
11180       instance.beparams = self.be_inst
11181       for key, val in self.op.beparams.iteritems():
11182         result.append(("be/%s" % key, val))
11183
11184     # OS change
11185     if self.op.os_name:
11186       instance.os = self.op.os_name
11187
11188     # osparams changes
11189     if self.op.osparams:
11190       instance.osparams = self.os_inst
11191       for key, val in self.op.osparams.iteritems():
11192         result.append(("os/%s" % key, val))
11193
11194     self.cfg.Update(instance, feedback_fn)
11195
11196     return result
11197
11198   _DISK_CONVERSIONS = {
11199     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11200     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11201     }
11202
11203
11204 class LUInstanceChangeGroup(LogicalUnit):
11205   HPATH = "instance-change-group"
11206   HTYPE = constants.HTYPE_INSTANCE
11207   REQ_BGL = False
11208
11209   def ExpandNames(self):
11210     self.share_locks = _ShareAll()
11211     self.needed_locks = {
11212       locking.LEVEL_NODEGROUP: [],
11213       locking.LEVEL_NODE: [],
11214       }
11215
11216     self._ExpandAndLockInstance()
11217
11218     if self.op.target_groups:
11219       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11220                                   self.op.target_groups)
11221     else:
11222       self.req_target_uuids = None
11223
11224     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11225
11226   def DeclareLocks(self, level):
11227     if level == locking.LEVEL_NODEGROUP:
11228       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11229
11230       if self.req_target_uuids:
11231         lock_groups = set(self.req_target_uuids)
11232
11233         # Lock all groups used by instance optimistically; this requires going
11234         # via the node before it's locked, requiring verification later on
11235         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11236         lock_groups.update(instance_groups)
11237       else:
11238         # No target groups, need to lock all of them
11239         lock_groups = locking.ALL_SET
11240
11241       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11242
11243     elif level == locking.LEVEL_NODE:
11244       if self.req_target_uuids:
11245         # Lock all nodes used by instances
11246         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11247         self._LockInstancesNodes()
11248
11249         # Lock all nodes in all potential target groups
11250         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11251                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11252         member_nodes = [node_name
11253                         for group in lock_groups
11254                         for node_name in self.cfg.GetNodeGroup(group).members]
11255         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11256       else:
11257         # Lock all nodes as all groups are potential targets
11258         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11259
11260   def CheckPrereq(self):
11261     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11262     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11263     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11264
11265     assert (self.req_target_uuids is None or
11266             owned_groups.issuperset(self.req_target_uuids))
11267     assert owned_instances == set([self.op.instance_name])
11268
11269     # Get instance information
11270     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11271
11272     # Check if node groups for locked instance are still correct
11273     assert owned_nodes.issuperset(self.instance.all_nodes), \
11274       ("Instance %s's nodes changed while we kept the lock" %
11275        self.op.instance_name)
11276
11277     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11278                                            owned_groups)
11279
11280     if self.req_target_uuids:
11281       # User requested specific target groups
11282       self.target_uuids = self.req_target_uuids
11283     else:
11284       # All groups except those used by the instance are potential targets
11285       self.target_uuids = owned_groups - inst_groups
11286
11287     conflicting_groups = self.target_uuids & inst_groups
11288     if conflicting_groups:
11289       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11290                                  " used by the instance '%s'" %
11291                                  (utils.CommaJoin(conflicting_groups),
11292                                   self.op.instance_name),
11293                                  errors.ECODE_INVAL)
11294
11295     if not self.target_uuids:
11296       raise errors.OpPrereqError("There are no possible target groups",
11297                                  errors.ECODE_INVAL)
11298
11299   def BuildHooksEnv(self):
11300     """Build hooks env.
11301
11302     """
11303     assert self.target_uuids
11304
11305     env = {
11306       "TARGET_GROUPS": " ".join(self.target_uuids),
11307       }
11308
11309     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11310
11311     return env
11312
11313   def BuildHooksNodes(self):
11314     """Build hooks nodes.
11315
11316     """
11317     mn = self.cfg.GetMasterNode()
11318     return ([mn], [mn])
11319
11320   def Exec(self, feedback_fn):
11321     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11322
11323     assert instances == [self.op.instance_name], "Instance not locked"
11324
11325     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11326                      instances=instances, target_groups=list(self.target_uuids))
11327
11328     ial.Run(self.op.iallocator)
11329
11330     if not ial.success:
11331       raise errors.OpPrereqError("Can't compute solution for changing group of"
11332                                  " instance '%s' using iallocator '%s': %s" %
11333                                  (self.op.instance_name, self.op.iallocator,
11334                                   ial.info),
11335                                  errors.ECODE_NORES)
11336
11337     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11338
11339     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11340                  " instance '%s'", len(jobs), self.op.instance_name)
11341
11342     return ResultWithJobs(jobs)
11343
11344
11345 class LUBackupQuery(NoHooksLU):
11346   """Query the exports list
11347
11348   """
11349   REQ_BGL = False
11350
11351   def ExpandNames(self):
11352     self.needed_locks = {}
11353     self.share_locks[locking.LEVEL_NODE] = 1
11354     if not self.op.nodes:
11355       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11356     else:
11357       self.needed_locks[locking.LEVEL_NODE] = \
11358         _GetWantedNodes(self, self.op.nodes)
11359
11360   def Exec(self, feedback_fn):
11361     """Compute the list of all the exported system images.
11362
11363     @rtype: dict
11364     @return: a dictionary with the structure node->(export-list)
11365         where export-list is a list of the instances exported on
11366         that node.
11367
11368     """
11369     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11370     rpcresult = self.rpc.call_export_list(self.nodes)
11371     result = {}
11372     for node in rpcresult:
11373       if rpcresult[node].fail_msg:
11374         result[node] = False
11375       else:
11376         result[node] = rpcresult[node].payload
11377
11378     return result
11379
11380
11381 class LUBackupPrepare(NoHooksLU):
11382   """Prepares an instance for an export and returns useful information.
11383
11384   """
11385   REQ_BGL = False
11386
11387   def ExpandNames(self):
11388     self._ExpandAndLockInstance()
11389
11390   def CheckPrereq(self):
11391     """Check prerequisites.
11392
11393     """
11394     instance_name = self.op.instance_name
11395
11396     self.instance = self.cfg.GetInstanceInfo(instance_name)
11397     assert self.instance is not None, \
11398           "Cannot retrieve locked instance %s" % self.op.instance_name
11399     _CheckNodeOnline(self, self.instance.primary_node)
11400
11401     self._cds = _GetClusterDomainSecret()
11402
11403   def Exec(self, feedback_fn):
11404     """Prepares an instance for an export.
11405
11406     """
11407     instance = self.instance
11408
11409     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11410       salt = utils.GenerateSecret(8)
11411
11412       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11413       result = self.rpc.call_x509_cert_create(instance.primary_node,
11414                                               constants.RIE_CERT_VALIDITY)
11415       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11416
11417       (name, cert_pem) = result.payload
11418
11419       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11420                                              cert_pem)
11421
11422       return {
11423         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11424         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11425                           salt),
11426         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11427         }
11428
11429     return None
11430
11431
11432 class LUBackupExport(LogicalUnit):
11433   """Export an instance to an image in the cluster.
11434
11435   """
11436   HPATH = "instance-export"
11437   HTYPE = constants.HTYPE_INSTANCE
11438   REQ_BGL = False
11439
11440   def CheckArguments(self):
11441     """Check the arguments.
11442
11443     """
11444     self.x509_key_name = self.op.x509_key_name
11445     self.dest_x509_ca_pem = self.op.destination_x509_ca
11446
11447     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11448       if not self.x509_key_name:
11449         raise errors.OpPrereqError("Missing X509 key name for encryption",
11450                                    errors.ECODE_INVAL)
11451
11452       if not self.dest_x509_ca_pem:
11453         raise errors.OpPrereqError("Missing destination X509 CA",
11454                                    errors.ECODE_INVAL)
11455
11456   def ExpandNames(self):
11457     self._ExpandAndLockInstance()
11458
11459     # Lock all nodes for local exports
11460     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11461       # FIXME: lock only instance primary and destination node
11462       #
11463       # Sad but true, for now we have do lock all nodes, as we don't know where
11464       # the previous export might be, and in this LU we search for it and
11465       # remove it from its current node. In the future we could fix this by:
11466       #  - making a tasklet to search (share-lock all), then create the
11467       #    new one, then one to remove, after
11468       #  - removing the removal operation altogether
11469       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11470
11471   def DeclareLocks(self, level):
11472     """Last minute lock declaration."""
11473     # All nodes are locked anyway, so nothing to do here.
11474
11475   def BuildHooksEnv(self):
11476     """Build hooks env.
11477
11478     This will run on the master, primary node and target node.
11479
11480     """
11481     env = {
11482       "EXPORT_MODE": self.op.mode,
11483       "EXPORT_NODE": self.op.target_node,
11484       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11485       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11486       # TODO: Generic function for boolean env variables
11487       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11488       }
11489
11490     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11491
11492     return env
11493
11494   def BuildHooksNodes(self):
11495     """Build hooks nodes.
11496
11497     """
11498     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11499
11500     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11501       nl.append(self.op.target_node)
11502
11503     return (nl, nl)
11504
11505   def CheckPrereq(self):
11506     """Check prerequisites.
11507
11508     This checks that the instance and node names are valid.
11509
11510     """
11511     instance_name = self.op.instance_name
11512
11513     self.instance = self.cfg.GetInstanceInfo(instance_name)
11514     assert self.instance is not None, \
11515           "Cannot retrieve locked instance %s" % self.op.instance_name
11516     _CheckNodeOnline(self, self.instance.primary_node)
11517
11518     if (self.op.remove_instance and self.instance.admin_up and
11519         not self.op.shutdown):
11520       raise errors.OpPrereqError("Can not remove instance without shutting it"
11521                                  " down before")
11522
11523     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11524       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11525       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11526       assert self.dst_node is not None
11527
11528       _CheckNodeOnline(self, self.dst_node.name)
11529       _CheckNodeNotDrained(self, self.dst_node.name)
11530
11531       self._cds = None
11532       self.dest_disk_info = None
11533       self.dest_x509_ca = None
11534
11535     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11536       self.dst_node = None
11537
11538       if len(self.op.target_node) != len(self.instance.disks):
11539         raise errors.OpPrereqError(("Received destination information for %s"
11540                                     " disks, but instance %s has %s disks") %
11541                                    (len(self.op.target_node), instance_name,
11542                                     len(self.instance.disks)),
11543                                    errors.ECODE_INVAL)
11544
11545       cds = _GetClusterDomainSecret()
11546
11547       # Check X509 key name
11548       try:
11549         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11550       except (TypeError, ValueError), err:
11551         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11552
11553       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11554         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11555                                    errors.ECODE_INVAL)
11556
11557       # Load and verify CA
11558       try:
11559         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11560       except OpenSSL.crypto.Error, err:
11561         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11562                                    (err, ), errors.ECODE_INVAL)
11563
11564       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11565       if errcode is not None:
11566         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11567                                    (msg, ), errors.ECODE_INVAL)
11568
11569       self.dest_x509_ca = cert
11570
11571       # Verify target information
11572       disk_info = []
11573       for idx, disk_data in enumerate(self.op.target_node):
11574         try:
11575           (host, port, magic) = \
11576             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11577         except errors.GenericError, err:
11578           raise errors.OpPrereqError("Target info for disk %s: %s" %
11579                                      (idx, err), errors.ECODE_INVAL)
11580
11581         disk_info.append((host, port, magic))
11582
11583       assert len(disk_info) == len(self.op.target_node)
11584       self.dest_disk_info = disk_info
11585
11586     else:
11587       raise errors.ProgrammerError("Unhandled export mode %r" %
11588                                    self.op.mode)
11589
11590     # instance disk type verification
11591     # TODO: Implement export support for file-based disks
11592     for disk in self.instance.disks:
11593       if disk.dev_type == constants.LD_FILE:
11594         raise errors.OpPrereqError("Export not supported for instances with"
11595                                    " file-based disks", errors.ECODE_INVAL)
11596
11597   def _CleanupExports(self, feedback_fn):
11598     """Removes exports of current instance from all other nodes.
11599
11600     If an instance in a cluster with nodes A..D was exported to node C, its
11601     exports will be removed from the nodes A, B and D.
11602
11603     """
11604     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11605
11606     nodelist = self.cfg.GetNodeList()
11607     nodelist.remove(self.dst_node.name)
11608
11609     # on one-node clusters nodelist will be empty after the removal
11610     # if we proceed the backup would be removed because OpBackupQuery
11611     # substitutes an empty list with the full cluster node list.
11612     iname = self.instance.name
11613     if nodelist:
11614       feedback_fn("Removing old exports for instance %s" % iname)
11615       exportlist = self.rpc.call_export_list(nodelist)
11616       for node in exportlist:
11617         if exportlist[node].fail_msg:
11618           continue
11619         if iname in exportlist[node].payload:
11620           msg = self.rpc.call_export_remove(node, iname).fail_msg
11621           if msg:
11622             self.LogWarning("Could not remove older export for instance %s"
11623                             " on node %s: %s", iname, node, msg)
11624
11625   def Exec(self, feedback_fn):
11626     """Export an instance to an image in the cluster.
11627
11628     """
11629     assert self.op.mode in constants.EXPORT_MODES
11630
11631     instance = self.instance
11632     src_node = instance.primary_node
11633
11634     if self.op.shutdown:
11635       # shutdown the instance, but not the disks
11636       feedback_fn("Shutting down instance %s" % instance.name)
11637       result = self.rpc.call_instance_shutdown(src_node, instance,
11638                                                self.op.shutdown_timeout)
11639       # TODO: Maybe ignore failures if ignore_remove_failures is set
11640       result.Raise("Could not shutdown instance %s on"
11641                    " node %s" % (instance.name, src_node))
11642
11643     # set the disks ID correctly since call_instance_start needs the
11644     # correct drbd minor to create the symlinks
11645     for disk in instance.disks:
11646       self.cfg.SetDiskID(disk, src_node)
11647
11648     activate_disks = (not instance.admin_up)
11649
11650     if activate_disks:
11651       # Activate the instance disks if we'exporting a stopped instance
11652       feedback_fn("Activating disks for %s" % instance.name)
11653       _StartInstanceDisks(self, instance, None)
11654
11655     try:
11656       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11657                                                      instance)
11658
11659       helper.CreateSnapshots()
11660       try:
11661         if (self.op.shutdown and instance.admin_up and
11662             not self.op.remove_instance):
11663           assert not activate_disks
11664           feedback_fn("Starting instance %s" % instance.name)
11665           result = self.rpc.call_instance_start(src_node, instance,
11666                                                 None, None, False)
11667           msg = result.fail_msg
11668           if msg:
11669             feedback_fn("Failed to start instance: %s" % msg)
11670             _ShutdownInstanceDisks(self, instance)
11671             raise errors.OpExecError("Could not start instance: %s" % msg)
11672
11673         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11674           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11675         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11676           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11677           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11678
11679           (key_name, _, _) = self.x509_key_name
11680
11681           dest_ca_pem = \
11682             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11683                                             self.dest_x509_ca)
11684
11685           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11686                                                      key_name, dest_ca_pem,
11687                                                      timeouts)
11688       finally:
11689         helper.Cleanup()
11690
11691       # Check for backwards compatibility
11692       assert len(dresults) == len(instance.disks)
11693       assert compat.all(isinstance(i, bool) for i in dresults), \
11694              "Not all results are boolean: %r" % dresults
11695
11696     finally:
11697       if activate_disks:
11698         feedback_fn("Deactivating disks for %s" % instance.name)
11699         _ShutdownInstanceDisks(self, instance)
11700
11701     if not (compat.all(dresults) and fin_resu):
11702       failures = []
11703       if not fin_resu:
11704         failures.append("export finalization")
11705       if not compat.all(dresults):
11706         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11707                                if not dsk)
11708         failures.append("disk export: disk(s) %s" % fdsk)
11709
11710       raise errors.OpExecError("Export failed, errors in %s" %
11711                                utils.CommaJoin(failures))
11712
11713     # At this point, the export was successful, we can cleanup/finish
11714
11715     # Remove instance if requested
11716     if self.op.remove_instance:
11717       feedback_fn("Removing instance %s" % instance.name)
11718       _RemoveInstance(self, feedback_fn, instance,
11719                       self.op.ignore_remove_failures)
11720
11721     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11722       self._CleanupExports(feedback_fn)
11723
11724     return fin_resu, dresults
11725
11726
11727 class LUBackupRemove(NoHooksLU):
11728   """Remove exports related to the named instance.
11729
11730   """
11731   REQ_BGL = False
11732
11733   def ExpandNames(self):
11734     self.needed_locks = {}
11735     # We need all nodes to be locked in order for RemoveExport to work, but we
11736     # don't need to lock the instance itself, as nothing will happen to it (and
11737     # we can remove exports also for a removed instance)
11738     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11739
11740   def Exec(self, feedback_fn):
11741     """Remove any export.
11742
11743     """
11744     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11745     # If the instance was not found we'll try with the name that was passed in.
11746     # This will only work if it was an FQDN, though.
11747     fqdn_warn = False
11748     if not instance_name:
11749       fqdn_warn = True
11750       instance_name = self.op.instance_name
11751
11752     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11753     exportlist = self.rpc.call_export_list(locked_nodes)
11754     found = False
11755     for node in exportlist:
11756       msg = exportlist[node].fail_msg
11757       if msg:
11758         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11759         continue
11760       if instance_name in exportlist[node].payload:
11761         found = True
11762         result = self.rpc.call_export_remove(node, instance_name)
11763         msg = result.fail_msg
11764         if msg:
11765           logging.error("Could not remove export for instance %s"
11766                         " on node %s: %s", instance_name, node, msg)
11767
11768     if fqdn_warn and not found:
11769       feedback_fn("Export not found. If trying to remove an export belonging"
11770                   " to a deleted instance please use its Fully Qualified"
11771                   " Domain Name.")
11772
11773
11774 class LUGroupAdd(LogicalUnit):
11775   """Logical unit for creating node groups.
11776
11777   """
11778   HPATH = "group-add"
11779   HTYPE = constants.HTYPE_GROUP
11780   REQ_BGL = False
11781
11782   def ExpandNames(self):
11783     # We need the new group's UUID here so that we can create and acquire the
11784     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11785     # that it should not check whether the UUID exists in the configuration.
11786     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11787     self.needed_locks = {}
11788     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11789
11790   def CheckPrereq(self):
11791     """Check prerequisites.
11792
11793     This checks that the given group name is not an existing node group
11794     already.
11795
11796     """
11797     try:
11798       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11799     except errors.OpPrereqError:
11800       pass
11801     else:
11802       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11803                                  " node group (UUID: %s)" %
11804                                  (self.op.group_name, existing_uuid),
11805                                  errors.ECODE_EXISTS)
11806
11807     if self.op.ndparams:
11808       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11809
11810   def BuildHooksEnv(self):
11811     """Build hooks env.
11812
11813     """
11814     return {
11815       "GROUP_NAME": self.op.group_name,
11816       }
11817
11818   def BuildHooksNodes(self):
11819     """Build hooks nodes.
11820
11821     """
11822     mn = self.cfg.GetMasterNode()
11823     return ([mn], [mn])
11824
11825   def Exec(self, feedback_fn):
11826     """Add the node group to the cluster.
11827
11828     """
11829     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11830                                   uuid=self.group_uuid,
11831                                   alloc_policy=self.op.alloc_policy,
11832                                   ndparams=self.op.ndparams)
11833
11834     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11835     del self.remove_locks[locking.LEVEL_NODEGROUP]
11836
11837
11838 class LUGroupAssignNodes(NoHooksLU):
11839   """Logical unit for assigning nodes to groups.
11840
11841   """
11842   REQ_BGL = False
11843
11844   def ExpandNames(self):
11845     # These raise errors.OpPrereqError on their own:
11846     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11847     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11848
11849     # We want to lock all the affected nodes and groups. We have readily
11850     # available the list of nodes, and the *destination* group. To gather the
11851     # list of "source" groups, we need to fetch node information later on.
11852     self.needed_locks = {
11853       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11854       locking.LEVEL_NODE: self.op.nodes,
11855       }
11856
11857   def DeclareLocks(self, level):
11858     if level == locking.LEVEL_NODEGROUP:
11859       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11860
11861       # Try to get all affected nodes' groups without having the group or node
11862       # lock yet. Needs verification later in the code flow.
11863       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11864
11865       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11866
11867   def CheckPrereq(self):
11868     """Check prerequisites.
11869
11870     """
11871     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11872     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11873             frozenset(self.op.nodes))
11874
11875     expected_locks = (set([self.group_uuid]) |
11876                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11877     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11878     if actual_locks != expected_locks:
11879       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11880                                " current groups are '%s', used to be '%s'" %
11881                                (utils.CommaJoin(expected_locks),
11882                                 utils.CommaJoin(actual_locks)))
11883
11884     self.node_data = self.cfg.GetAllNodesInfo()
11885     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11886     instance_data = self.cfg.GetAllInstancesInfo()
11887
11888     if self.group is None:
11889       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11890                                (self.op.group_name, self.group_uuid))
11891
11892     (new_splits, previous_splits) = \
11893       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11894                                              for node in self.op.nodes],
11895                                             self.node_data, instance_data)
11896
11897     if new_splits:
11898       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11899
11900       if not self.op.force:
11901         raise errors.OpExecError("The following instances get split by this"
11902                                  " change and --force was not given: %s" %
11903                                  fmt_new_splits)
11904       else:
11905         self.LogWarning("This operation will split the following instances: %s",
11906                         fmt_new_splits)
11907
11908         if previous_splits:
11909           self.LogWarning("In addition, these already-split instances continue"
11910                           " to be split across groups: %s",
11911                           utils.CommaJoin(utils.NiceSort(previous_splits)))
11912
11913   def Exec(self, feedback_fn):
11914     """Assign nodes to a new group.
11915
11916     """
11917     for node in self.op.nodes:
11918       self.node_data[node].group = self.group_uuid
11919
11920     # FIXME: Depends on side-effects of modifying the result of
11921     # C{cfg.GetAllNodesInfo}
11922
11923     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11924
11925   @staticmethod
11926   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11927     """Check for split instances after a node assignment.
11928
11929     This method considers a series of node assignments as an atomic operation,
11930     and returns information about split instances after applying the set of
11931     changes.
11932
11933     In particular, it returns information about newly split instances, and
11934     instances that were already split, and remain so after the change.
11935
11936     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11937     considered.
11938
11939     @type changes: list of (node_name, new_group_uuid) pairs.
11940     @param changes: list of node assignments to consider.
11941     @param node_data: a dict with data for all nodes
11942     @param instance_data: a dict with all instances to consider
11943     @rtype: a two-tuple
11944     @return: a list of instances that were previously okay and result split as a
11945       consequence of this change, and a list of instances that were previously
11946       split and this change does not fix.
11947
11948     """
11949     changed_nodes = dict((node, group) for node, group in changes
11950                          if node_data[node].group != group)
11951
11952     all_split_instances = set()
11953     previously_split_instances = set()
11954
11955     def InstanceNodes(instance):
11956       return [instance.primary_node] + list(instance.secondary_nodes)
11957
11958     for inst in instance_data.values():
11959       if inst.disk_template not in constants.DTS_INT_MIRROR:
11960         continue
11961
11962       instance_nodes = InstanceNodes(inst)
11963
11964       if len(set(node_data[node].group for node in instance_nodes)) > 1:
11965         previously_split_instances.add(inst.name)
11966
11967       if len(set(changed_nodes.get(node, node_data[node].group)
11968                  for node in instance_nodes)) > 1:
11969         all_split_instances.add(inst.name)
11970
11971     return (list(all_split_instances - previously_split_instances),
11972             list(previously_split_instances & all_split_instances))
11973
11974
11975 class _GroupQuery(_QueryBase):
11976   FIELDS = query.GROUP_FIELDS
11977
11978   def ExpandNames(self, lu):
11979     lu.needed_locks = {}
11980
11981     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11982     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11983
11984     if not self.names:
11985       self.wanted = [name_to_uuid[name]
11986                      for name in utils.NiceSort(name_to_uuid.keys())]
11987     else:
11988       # Accept names to be either names or UUIDs.
11989       missing = []
11990       self.wanted = []
11991       all_uuid = frozenset(self._all_groups.keys())
11992
11993       for name in self.names:
11994         if name in all_uuid:
11995           self.wanted.append(name)
11996         elif name in name_to_uuid:
11997           self.wanted.append(name_to_uuid[name])
11998         else:
11999           missing.append(name)
12000
12001       if missing:
12002         raise errors.OpPrereqError("Some groups do not exist: %s" %
12003                                    utils.CommaJoin(missing),
12004                                    errors.ECODE_NOENT)
12005
12006   def DeclareLocks(self, lu, level):
12007     pass
12008
12009   def _GetQueryData(self, lu):
12010     """Computes the list of node groups and their attributes.
12011
12012     """
12013     do_nodes = query.GQ_NODE in self.requested_data
12014     do_instances = query.GQ_INST in self.requested_data
12015
12016     group_to_nodes = None
12017     group_to_instances = None
12018
12019     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12020     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12021     # latter GetAllInstancesInfo() is not enough, for we have to go through
12022     # instance->node. Hence, we will need to process nodes even if we only need
12023     # instance information.
12024     if do_nodes or do_instances:
12025       all_nodes = lu.cfg.GetAllNodesInfo()
12026       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12027       node_to_group = {}
12028
12029       for node in all_nodes.values():
12030         if node.group in group_to_nodes:
12031           group_to_nodes[node.group].append(node.name)
12032           node_to_group[node.name] = node.group
12033
12034       if do_instances:
12035         all_instances = lu.cfg.GetAllInstancesInfo()
12036         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12037
12038         for instance in all_instances.values():
12039           node = instance.primary_node
12040           if node in node_to_group:
12041             group_to_instances[node_to_group[node]].append(instance.name)
12042
12043         if not do_nodes:
12044           # Do not pass on node information if it was not requested.
12045           group_to_nodes = None
12046
12047     return query.GroupQueryData([self._all_groups[uuid]
12048                                  for uuid in self.wanted],
12049                                 group_to_nodes, group_to_instances)
12050
12051
12052 class LUGroupQuery(NoHooksLU):
12053   """Logical unit for querying node groups.
12054
12055   """
12056   REQ_BGL = False
12057
12058   def CheckArguments(self):
12059     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12060                           self.op.output_fields, False)
12061
12062   def ExpandNames(self):
12063     self.gq.ExpandNames(self)
12064
12065   def DeclareLocks(self, level):
12066     self.gq.DeclareLocks(self, level)
12067
12068   def Exec(self, feedback_fn):
12069     return self.gq.OldStyleQuery(self)
12070
12071
12072 class LUGroupSetParams(LogicalUnit):
12073   """Modifies the parameters of a node group.
12074
12075   """
12076   HPATH = "group-modify"
12077   HTYPE = constants.HTYPE_GROUP
12078   REQ_BGL = False
12079
12080   def CheckArguments(self):
12081     all_changes = [
12082       self.op.ndparams,
12083       self.op.alloc_policy,
12084       ]
12085
12086     if all_changes.count(None) == len(all_changes):
12087       raise errors.OpPrereqError("Please pass at least one modification",
12088                                  errors.ECODE_INVAL)
12089
12090   def ExpandNames(self):
12091     # This raises errors.OpPrereqError on its own:
12092     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12093
12094     self.needed_locks = {
12095       locking.LEVEL_NODEGROUP: [self.group_uuid],
12096       }
12097
12098   def CheckPrereq(self):
12099     """Check prerequisites.
12100
12101     """
12102     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12103
12104     if self.group is None:
12105       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12106                                (self.op.group_name, self.group_uuid))
12107
12108     if self.op.ndparams:
12109       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12110       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12111       self.new_ndparams = new_ndparams
12112
12113   def BuildHooksEnv(self):
12114     """Build hooks env.
12115
12116     """
12117     return {
12118       "GROUP_NAME": self.op.group_name,
12119       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12120       }
12121
12122   def BuildHooksNodes(self):
12123     """Build hooks nodes.
12124
12125     """
12126     mn = self.cfg.GetMasterNode()
12127     return ([mn], [mn])
12128
12129   def Exec(self, feedback_fn):
12130     """Modifies the node group.
12131
12132     """
12133     result = []
12134
12135     if self.op.ndparams:
12136       self.group.ndparams = self.new_ndparams
12137       result.append(("ndparams", str(self.group.ndparams)))
12138
12139     if self.op.alloc_policy:
12140       self.group.alloc_policy = self.op.alloc_policy
12141
12142     self.cfg.Update(self.group, feedback_fn)
12143     return result
12144
12145
12146
12147 class LUGroupRemove(LogicalUnit):
12148   HPATH = "group-remove"
12149   HTYPE = constants.HTYPE_GROUP
12150   REQ_BGL = False
12151
12152   def ExpandNames(self):
12153     # This will raises errors.OpPrereqError on its own:
12154     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12155     self.needed_locks = {
12156       locking.LEVEL_NODEGROUP: [self.group_uuid],
12157       }
12158
12159   def CheckPrereq(self):
12160     """Check prerequisites.
12161
12162     This checks that the given group name exists as a node group, that is
12163     empty (i.e., contains no nodes), and that is not the last group of the
12164     cluster.
12165
12166     """
12167     # Verify that the group is empty.
12168     group_nodes = [node.name
12169                    for node in self.cfg.GetAllNodesInfo().values()
12170                    if node.group == self.group_uuid]
12171
12172     if group_nodes:
12173       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12174                                  " nodes: %s" %
12175                                  (self.op.group_name,
12176                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12177                                  errors.ECODE_STATE)
12178
12179     # Verify the cluster would not be left group-less.
12180     if len(self.cfg.GetNodeGroupList()) == 1:
12181       raise errors.OpPrereqError("Group '%s' is the only group,"
12182                                  " cannot be removed" %
12183                                  self.op.group_name,
12184                                  errors.ECODE_STATE)
12185
12186   def BuildHooksEnv(self):
12187     """Build hooks env.
12188
12189     """
12190     return {
12191       "GROUP_NAME": self.op.group_name,
12192       }
12193
12194   def BuildHooksNodes(self):
12195     """Build hooks nodes.
12196
12197     """
12198     mn = self.cfg.GetMasterNode()
12199     return ([mn], [mn])
12200
12201   def Exec(self, feedback_fn):
12202     """Remove the node group.
12203
12204     """
12205     try:
12206       self.cfg.RemoveNodeGroup(self.group_uuid)
12207     except errors.ConfigurationError:
12208       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12209                                (self.op.group_name, self.group_uuid))
12210
12211     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12212
12213
12214 class LUGroupRename(LogicalUnit):
12215   HPATH = "group-rename"
12216   HTYPE = constants.HTYPE_GROUP
12217   REQ_BGL = False
12218
12219   def ExpandNames(self):
12220     # This raises errors.OpPrereqError on its own:
12221     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12222
12223     self.needed_locks = {
12224       locking.LEVEL_NODEGROUP: [self.group_uuid],
12225       }
12226
12227   def CheckPrereq(self):
12228     """Check prerequisites.
12229
12230     Ensures requested new name is not yet used.
12231
12232     """
12233     try:
12234       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12235     except errors.OpPrereqError:
12236       pass
12237     else:
12238       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12239                                  " node group (UUID: %s)" %
12240                                  (self.op.new_name, new_name_uuid),
12241                                  errors.ECODE_EXISTS)
12242
12243   def BuildHooksEnv(self):
12244     """Build hooks env.
12245
12246     """
12247     return {
12248       "OLD_NAME": self.op.group_name,
12249       "NEW_NAME": self.op.new_name,
12250       }
12251
12252   def BuildHooksNodes(self):
12253     """Build hooks nodes.
12254
12255     """
12256     mn = self.cfg.GetMasterNode()
12257
12258     all_nodes = self.cfg.GetAllNodesInfo()
12259     all_nodes.pop(mn, None)
12260
12261     run_nodes = [mn]
12262     run_nodes.extend(node.name for node in all_nodes.values()
12263                      if node.group == self.group_uuid)
12264
12265     return (run_nodes, run_nodes)
12266
12267   def Exec(self, feedback_fn):
12268     """Rename the node group.
12269
12270     """
12271     group = self.cfg.GetNodeGroup(self.group_uuid)
12272
12273     if group is None:
12274       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12275                                (self.op.group_name, self.group_uuid))
12276
12277     group.name = self.op.new_name
12278     self.cfg.Update(group, feedback_fn)
12279
12280     return self.op.new_name
12281
12282
12283 class LUGroupEvacuate(LogicalUnit):
12284   HPATH = "group-evacuate"
12285   HTYPE = constants.HTYPE_GROUP
12286   REQ_BGL = False
12287
12288   def ExpandNames(self):
12289     # This raises errors.OpPrereqError on its own:
12290     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12291
12292     if self.op.target_groups:
12293       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12294                                   self.op.target_groups)
12295     else:
12296       self.req_target_uuids = []
12297
12298     if self.group_uuid in self.req_target_uuids:
12299       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12300                                  " as a target group (targets are %s)" %
12301                                  (self.group_uuid,
12302                                   utils.CommaJoin(self.req_target_uuids)),
12303                                  errors.ECODE_INVAL)
12304
12305     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12306
12307     self.share_locks = _ShareAll()
12308     self.needed_locks = {
12309       locking.LEVEL_INSTANCE: [],
12310       locking.LEVEL_NODEGROUP: [],
12311       locking.LEVEL_NODE: [],
12312       }
12313
12314   def DeclareLocks(self, level):
12315     if level == locking.LEVEL_INSTANCE:
12316       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12317
12318       # Lock instances optimistically, needs verification once node and group
12319       # locks have been acquired
12320       self.needed_locks[locking.LEVEL_INSTANCE] = \
12321         self.cfg.GetNodeGroupInstances(self.group_uuid)
12322
12323     elif level == locking.LEVEL_NODEGROUP:
12324       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12325
12326       if self.req_target_uuids:
12327         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12328
12329         # Lock all groups used by instances optimistically; this requires going
12330         # via the node before it's locked, requiring verification later on
12331         lock_groups.update(group_uuid
12332                            for instance_name in
12333                              self.owned_locks(locking.LEVEL_INSTANCE)
12334                            for group_uuid in
12335                              self.cfg.GetInstanceNodeGroups(instance_name))
12336       else:
12337         # No target groups, need to lock all of them
12338         lock_groups = locking.ALL_SET
12339
12340       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12341
12342     elif level == locking.LEVEL_NODE:
12343       # This will only lock the nodes in the group to be evacuated which
12344       # contain actual instances
12345       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12346       self._LockInstancesNodes()
12347
12348       # Lock all nodes in group to be evacuated and target groups
12349       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12350       assert self.group_uuid in owned_groups
12351       member_nodes = [node_name
12352                       for group in owned_groups
12353                       for node_name in self.cfg.GetNodeGroup(group).members]
12354       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12355
12356   def CheckPrereq(self):
12357     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12358     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12359     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12360
12361     assert owned_groups.issuperset(self.req_target_uuids)
12362     assert self.group_uuid in owned_groups
12363
12364     # Check if locked instances are still correct
12365     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12366
12367     # Get instance information
12368     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12369
12370     # Check if node groups for locked instances are still correct
12371     for instance_name in owned_instances:
12372       inst = self.instances[instance_name]
12373       assert owned_nodes.issuperset(inst.all_nodes), \
12374         "Instance %s's nodes changed while we kept the lock" % instance_name
12375
12376       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12377                                              owned_groups)
12378
12379       assert self.group_uuid in inst_groups, \
12380         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12381
12382     if self.req_target_uuids:
12383       # User requested specific target groups
12384       self.target_uuids = self.req_target_uuids
12385     else:
12386       # All groups except the one to be evacuated are potential targets
12387       self.target_uuids = [group_uuid for group_uuid in owned_groups
12388                            if group_uuid != self.group_uuid]
12389
12390       if not self.target_uuids:
12391         raise errors.OpPrereqError("There are no possible target groups",
12392                                    errors.ECODE_INVAL)
12393
12394   def BuildHooksEnv(self):
12395     """Build hooks env.
12396
12397     """
12398     return {
12399       "GROUP_NAME": self.op.group_name,
12400       "TARGET_GROUPS": " ".join(self.target_uuids),
12401       }
12402
12403   def BuildHooksNodes(self):
12404     """Build hooks nodes.
12405
12406     """
12407     mn = self.cfg.GetMasterNode()
12408
12409     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12410
12411     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12412
12413     return (run_nodes, run_nodes)
12414
12415   def Exec(self, feedback_fn):
12416     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12417
12418     assert self.group_uuid not in self.target_uuids
12419
12420     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12421                      instances=instances, target_groups=self.target_uuids)
12422
12423     ial.Run(self.op.iallocator)
12424
12425     if not ial.success:
12426       raise errors.OpPrereqError("Can't compute group evacuation using"
12427                                  " iallocator '%s': %s" %
12428                                  (self.op.iallocator, ial.info),
12429                                  errors.ECODE_NORES)
12430
12431     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12432
12433     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12434                  len(jobs), self.op.group_name)
12435
12436     return ResultWithJobs(jobs)
12437
12438
12439 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12440   """Generic tags LU.
12441
12442   This is an abstract class which is the parent of all the other tags LUs.
12443
12444   """
12445   def ExpandNames(self):
12446     self.group_uuid = None
12447     self.needed_locks = {}
12448     if self.op.kind == constants.TAG_NODE:
12449       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12450       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12451     elif self.op.kind == constants.TAG_INSTANCE:
12452       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12453       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12454     elif self.op.kind == constants.TAG_NODEGROUP:
12455       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12456
12457     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12458     # not possible to acquire the BGL based on opcode parameters)
12459
12460   def CheckPrereq(self):
12461     """Check prerequisites.
12462
12463     """
12464     if self.op.kind == constants.TAG_CLUSTER:
12465       self.target = self.cfg.GetClusterInfo()
12466     elif self.op.kind == constants.TAG_NODE:
12467       self.target = self.cfg.GetNodeInfo(self.op.name)
12468     elif self.op.kind == constants.TAG_INSTANCE:
12469       self.target = self.cfg.GetInstanceInfo(self.op.name)
12470     elif self.op.kind == constants.TAG_NODEGROUP:
12471       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12472     else:
12473       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12474                                  str(self.op.kind), errors.ECODE_INVAL)
12475
12476
12477 class LUTagsGet(TagsLU):
12478   """Returns the tags of a given object.
12479
12480   """
12481   REQ_BGL = False
12482
12483   def ExpandNames(self):
12484     TagsLU.ExpandNames(self)
12485
12486     # Share locks as this is only a read operation
12487     self.share_locks = _ShareAll()
12488
12489   def Exec(self, feedback_fn):
12490     """Returns the tag list.
12491
12492     """
12493     return list(self.target.GetTags())
12494
12495
12496 class LUTagsSearch(NoHooksLU):
12497   """Searches the tags for a given pattern.
12498
12499   """
12500   REQ_BGL = False
12501
12502   def ExpandNames(self):
12503     self.needed_locks = {}
12504
12505   def CheckPrereq(self):
12506     """Check prerequisites.
12507
12508     This checks the pattern passed for validity by compiling it.
12509
12510     """
12511     try:
12512       self.re = re.compile(self.op.pattern)
12513     except re.error, err:
12514       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12515                                  (self.op.pattern, err), errors.ECODE_INVAL)
12516
12517   def Exec(self, feedback_fn):
12518     """Returns the tag list.
12519
12520     """
12521     cfg = self.cfg
12522     tgts = [("/cluster", cfg.GetClusterInfo())]
12523     ilist = cfg.GetAllInstancesInfo().values()
12524     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12525     nlist = cfg.GetAllNodesInfo().values()
12526     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12527     tgts.extend(("/nodegroup/%s" % n.name, n)
12528                 for n in cfg.GetAllNodeGroupsInfo().values())
12529     results = []
12530     for path, target in tgts:
12531       for tag in target.GetTags():
12532         if self.re.search(tag):
12533           results.append((path, tag))
12534     return results
12535
12536
12537 class LUTagsSet(TagsLU):
12538   """Sets a tag on a given object.
12539
12540   """
12541   REQ_BGL = False
12542
12543   def CheckPrereq(self):
12544     """Check prerequisites.
12545
12546     This checks the type and length of the tag name and value.
12547
12548     """
12549     TagsLU.CheckPrereq(self)
12550     for tag in self.op.tags:
12551       objects.TaggableObject.ValidateTag(tag)
12552
12553   def Exec(self, feedback_fn):
12554     """Sets the tag.
12555
12556     """
12557     try:
12558       for tag in self.op.tags:
12559         self.target.AddTag(tag)
12560     except errors.TagError, err:
12561       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12562     self.cfg.Update(self.target, feedback_fn)
12563
12564
12565 class LUTagsDel(TagsLU):
12566   """Delete a list of tags from a given object.
12567
12568   """
12569   REQ_BGL = False
12570
12571   def CheckPrereq(self):
12572     """Check prerequisites.
12573
12574     This checks that we have the given tag.
12575
12576     """
12577     TagsLU.CheckPrereq(self)
12578     for tag in self.op.tags:
12579       objects.TaggableObject.ValidateTag(tag)
12580     del_tags = frozenset(self.op.tags)
12581     cur_tags = self.target.GetTags()
12582
12583     diff_tags = del_tags - cur_tags
12584     if diff_tags:
12585       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12586       raise errors.OpPrereqError("Tag(s) %s not found" %
12587                                  (utils.CommaJoin(diff_names), ),
12588                                  errors.ECODE_NOENT)
12589
12590   def Exec(self, feedback_fn):
12591     """Remove the tag from the object.
12592
12593     """
12594     for tag in self.op.tags:
12595       self.target.RemoveTag(tag)
12596     self.cfg.Update(self.target, feedback_fn)
12597
12598
12599 class LUTestDelay(NoHooksLU):
12600   """Sleep for a specified amount of time.
12601
12602   This LU sleeps on the master and/or nodes for a specified amount of
12603   time.
12604
12605   """
12606   REQ_BGL = False
12607
12608   def ExpandNames(self):
12609     """Expand names and set required locks.
12610
12611     This expands the node list, if any.
12612
12613     """
12614     self.needed_locks = {}
12615     if self.op.on_nodes:
12616       # _GetWantedNodes can be used here, but is not always appropriate to use
12617       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12618       # more information.
12619       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12620       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12621
12622   def _TestDelay(self):
12623     """Do the actual sleep.
12624
12625     """
12626     if self.op.on_master:
12627       if not utils.TestDelay(self.op.duration):
12628         raise errors.OpExecError("Error during master delay test")
12629     if self.op.on_nodes:
12630       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12631       for node, node_result in result.items():
12632         node_result.Raise("Failure during rpc call to node %s" % node)
12633
12634   def Exec(self, feedback_fn):
12635     """Execute the test delay opcode, with the wanted repetitions.
12636
12637     """
12638     if self.op.repeat == 0:
12639       self._TestDelay()
12640     else:
12641       top_value = self.op.repeat - 1
12642       for i in range(self.op.repeat):
12643         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12644         self._TestDelay()
12645
12646
12647 class LUTestJqueue(NoHooksLU):
12648   """Utility LU to test some aspects of the job queue.
12649
12650   """
12651   REQ_BGL = False
12652
12653   # Must be lower than default timeout for WaitForJobChange to see whether it
12654   # notices changed jobs
12655   _CLIENT_CONNECT_TIMEOUT = 20.0
12656   _CLIENT_CONFIRM_TIMEOUT = 60.0
12657
12658   @classmethod
12659   def _NotifyUsingSocket(cls, cb, errcls):
12660     """Opens a Unix socket and waits for another program to connect.
12661
12662     @type cb: callable
12663     @param cb: Callback to send socket name to client
12664     @type errcls: class
12665     @param errcls: Exception class to use for errors
12666
12667     """
12668     # Using a temporary directory as there's no easy way to create temporary
12669     # sockets without writing a custom loop around tempfile.mktemp and
12670     # socket.bind
12671     tmpdir = tempfile.mkdtemp()
12672     try:
12673       tmpsock = utils.PathJoin(tmpdir, "sock")
12674
12675       logging.debug("Creating temporary socket at %s", tmpsock)
12676       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12677       try:
12678         sock.bind(tmpsock)
12679         sock.listen(1)
12680
12681         # Send details to client
12682         cb(tmpsock)
12683
12684         # Wait for client to connect before continuing
12685         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12686         try:
12687           (conn, _) = sock.accept()
12688         except socket.error, err:
12689           raise errcls("Client didn't connect in time (%s)" % err)
12690       finally:
12691         sock.close()
12692     finally:
12693       # Remove as soon as client is connected
12694       shutil.rmtree(tmpdir)
12695
12696     # Wait for client to close
12697     try:
12698       try:
12699         # pylint: disable-msg=E1101
12700         # Instance of '_socketobject' has no ... member
12701         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12702         conn.recv(1)
12703       except socket.error, err:
12704         raise errcls("Client failed to confirm notification (%s)" % err)
12705     finally:
12706       conn.close()
12707
12708   def _SendNotification(self, test, arg, sockname):
12709     """Sends a notification to the client.
12710
12711     @type test: string
12712     @param test: Test name
12713     @param arg: Test argument (depends on test)
12714     @type sockname: string
12715     @param sockname: Socket path
12716
12717     """
12718     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12719
12720   def _Notify(self, prereq, test, arg):
12721     """Notifies the client of a test.
12722
12723     @type prereq: bool
12724     @param prereq: Whether this is a prereq-phase test
12725     @type test: string
12726     @param test: Test name
12727     @param arg: Test argument (depends on test)
12728
12729     """
12730     if prereq:
12731       errcls = errors.OpPrereqError
12732     else:
12733       errcls = errors.OpExecError
12734
12735     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12736                                                   test, arg),
12737                                    errcls)
12738
12739   def CheckArguments(self):
12740     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12741     self.expandnames_calls = 0
12742
12743   def ExpandNames(self):
12744     checkargs_calls = getattr(self, "checkargs_calls", 0)
12745     if checkargs_calls < 1:
12746       raise errors.ProgrammerError("CheckArguments was not called")
12747
12748     self.expandnames_calls += 1
12749
12750     if self.op.notify_waitlock:
12751       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12752
12753     self.LogInfo("Expanding names")
12754
12755     # Get lock on master node (just to get a lock, not for a particular reason)
12756     self.needed_locks = {
12757       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12758       }
12759
12760   def Exec(self, feedback_fn):
12761     if self.expandnames_calls < 1:
12762       raise errors.ProgrammerError("ExpandNames was not called")
12763
12764     if self.op.notify_exec:
12765       self._Notify(False, constants.JQT_EXEC, None)
12766
12767     self.LogInfo("Executing")
12768
12769     if self.op.log_messages:
12770       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12771       for idx, msg in enumerate(self.op.log_messages):
12772         self.LogInfo("Sending log message %s", idx + 1)
12773         feedback_fn(constants.JQT_MSGPREFIX + msg)
12774         # Report how many test messages have been sent
12775         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12776
12777     if self.op.fail:
12778       raise errors.OpExecError("Opcode failure was requested")
12779
12780     return True
12781
12782
12783 class IAllocator(object):
12784   """IAllocator framework.
12785
12786   An IAllocator instance has three sets of attributes:
12787     - cfg that is needed to query the cluster
12788     - input data (all members of the _KEYS class attribute are required)
12789     - four buffer attributes (in|out_data|text), that represent the
12790       input (to the external script) in text and data structure format,
12791       and the output from it, again in two formats
12792     - the result variables from the script (success, info, nodes) for
12793       easy usage
12794
12795   """
12796   # pylint: disable-msg=R0902
12797   # lots of instance attributes
12798
12799   def __init__(self, cfg, rpc, mode, **kwargs):
12800     self.cfg = cfg
12801     self.rpc = rpc
12802     # init buffer variables
12803     self.in_text = self.out_text = self.in_data = self.out_data = None
12804     # init all input fields so that pylint is happy
12805     self.mode = mode
12806     self.memory = self.disks = self.disk_template = None
12807     self.os = self.tags = self.nics = self.vcpus = None
12808     self.hypervisor = None
12809     self.relocate_from = None
12810     self.name = None
12811     self.instances = None
12812     self.evac_mode = None
12813     self.target_groups = []
12814     # computed fields
12815     self.required_nodes = None
12816     # init result fields
12817     self.success = self.info = self.result = None
12818
12819     try:
12820       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12821     except KeyError:
12822       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12823                                    " IAllocator" % self.mode)
12824
12825     keyset = [n for (n, _) in keydata]
12826
12827     for key in kwargs:
12828       if key not in keyset:
12829         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12830                                      " IAllocator" % key)
12831       setattr(self, key, kwargs[key])
12832
12833     for key in keyset:
12834       if key not in kwargs:
12835         raise errors.ProgrammerError("Missing input parameter '%s' to"
12836                                      " IAllocator" % key)
12837     self._BuildInputData(compat.partial(fn, self), keydata)
12838
12839   def _ComputeClusterData(self):
12840     """Compute the generic allocator input data.
12841
12842     This is the data that is independent of the actual operation.
12843
12844     """
12845     cfg = self.cfg
12846     cluster_info = cfg.GetClusterInfo()
12847     # cluster data
12848     data = {
12849       "version": constants.IALLOCATOR_VERSION,
12850       "cluster_name": cfg.GetClusterName(),
12851       "cluster_tags": list(cluster_info.GetTags()),
12852       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12853       # we don't have job IDs
12854       }
12855     ninfo = cfg.GetAllNodesInfo()
12856     iinfo = cfg.GetAllInstancesInfo().values()
12857     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12858
12859     # node data
12860     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12861
12862     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12863       hypervisor_name = self.hypervisor
12864     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12865       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12866     else:
12867       hypervisor_name = cluster_info.enabled_hypervisors[0]
12868
12869     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12870                                         hypervisor_name)
12871     node_iinfo = \
12872       self.rpc.call_all_instances_info(node_list,
12873                                        cluster_info.enabled_hypervisors)
12874
12875     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12876
12877     config_ndata = self._ComputeBasicNodeData(ninfo)
12878     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12879                                                  i_list, config_ndata)
12880     assert len(data["nodes"]) == len(ninfo), \
12881         "Incomplete node data computed"
12882
12883     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12884
12885     self.in_data = data
12886
12887   @staticmethod
12888   def _ComputeNodeGroupData(cfg):
12889     """Compute node groups data.
12890
12891     """
12892     ng = dict((guuid, {
12893       "name": gdata.name,
12894       "alloc_policy": gdata.alloc_policy,
12895       })
12896       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12897
12898     return ng
12899
12900   @staticmethod
12901   def _ComputeBasicNodeData(node_cfg):
12902     """Compute global node data.
12903
12904     @rtype: dict
12905     @returns: a dict of name: (node dict, node config)
12906
12907     """
12908     # fill in static (config-based) values
12909     node_results = dict((ninfo.name, {
12910       "tags": list(ninfo.GetTags()),
12911       "primary_ip": ninfo.primary_ip,
12912       "secondary_ip": ninfo.secondary_ip,
12913       "offline": ninfo.offline,
12914       "drained": ninfo.drained,
12915       "master_candidate": ninfo.master_candidate,
12916       "group": ninfo.group,
12917       "master_capable": ninfo.master_capable,
12918       "vm_capable": ninfo.vm_capable,
12919       })
12920       for ninfo in node_cfg.values())
12921
12922     return node_results
12923
12924   @staticmethod
12925   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12926                               node_results):
12927     """Compute global node data.
12928
12929     @param node_results: the basic node structures as filled from the config
12930
12931     """
12932     # make a copy of the current dict
12933     node_results = dict(node_results)
12934     for nname, nresult in node_data.items():
12935       assert nname in node_results, "Missing basic data for node %s" % nname
12936       ninfo = node_cfg[nname]
12937
12938       if not (ninfo.offline or ninfo.drained):
12939         nresult.Raise("Can't get data for node %s" % nname)
12940         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12941                                 nname)
12942         remote_info = nresult.payload
12943
12944         for attr in ["memory_total", "memory_free", "memory_dom0",
12945                      "vg_size", "vg_free", "cpu_total"]:
12946           if attr not in remote_info:
12947             raise errors.OpExecError("Node '%s' didn't return attribute"
12948                                      " '%s'" % (nname, attr))
12949           if not isinstance(remote_info[attr], int):
12950             raise errors.OpExecError("Node '%s' returned invalid value"
12951                                      " for '%s': %s" %
12952                                      (nname, attr, remote_info[attr]))
12953         # compute memory used by primary instances
12954         i_p_mem = i_p_up_mem = 0
12955         for iinfo, beinfo in i_list:
12956           if iinfo.primary_node == nname:
12957             i_p_mem += beinfo[constants.BE_MEMORY]
12958             if iinfo.name not in node_iinfo[nname].payload:
12959               i_used_mem = 0
12960             else:
12961               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12962             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12963             remote_info["memory_free"] -= max(0, i_mem_diff)
12964
12965             if iinfo.admin_up:
12966               i_p_up_mem += beinfo[constants.BE_MEMORY]
12967
12968         # compute memory used by instances
12969         pnr_dyn = {
12970           "total_memory": remote_info["memory_total"],
12971           "reserved_memory": remote_info["memory_dom0"],
12972           "free_memory": remote_info["memory_free"],
12973           "total_disk": remote_info["vg_size"],
12974           "free_disk": remote_info["vg_free"],
12975           "total_cpus": remote_info["cpu_total"],
12976           "i_pri_memory": i_p_mem,
12977           "i_pri_up_memory": i_p_up_mem,
12978           }
12979         pnr_dyn.update(node_results[nname])
12980         node_results[nname] = pnr_dyn
12981
12982     return node_results
12983
12984   @staticmethod
12985   def _ComputeInstanceData(cluster_info, i_list):
12986     """Compute global instance data.
12987
12988     """
12989     instance_data = {}
12990     for iinfo, beinfo in i_list:
12991       nic_data = []
12992       for nic in iinfo.nics:
12993         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12994         nic_dict = {
12995           "mac": nic.mac,
12996           "ip": nic.ip,
12997           "mode": filled_params[constants.NIC_MODE],
12998           "link": filled_params[constants.NIC_LINK],
12999           }
13000         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13001           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13002         nic_data.append(nic_dict)
13003       pir = {
13004         "tags": list(iinfo.GetTags()),
13005         "admin_up": iinfo.admin_up,
13006         "vcpus": beinfo[constants.BE_VCPUS],
13007         "memory": beinfo[constants.BE_MEMORY],
13008         "os": iinfo.os,
13009         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13010         "nics": nic_data,
13011         "disks": [{constants.IDISK_SIZE: dsk.size,
13012                    constants.IDISK_MODE: dsk.mode}
13013                   for dsk in iinfo.disks],
13014         "disk_template": iinfo.disk_template,
13015         "hypervisor": iinfo.hypervisor,
13016         }
13017       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13018                                                  pir["disks"])
13019       instance_data[iinfo.name] = pir
13020
13021     return instance_data
13022
13023   def _AddNewInstance(self):
13024     """Add new instance data to allocator structure.
13025
13026     This in combination with _AllocatorGetClusterData will create the
13027     correct structure needed as input for the allocator.
13028
13029     The checks for the completeness of the opcode must have already been
13030     done.
13031
13032     """
13033     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13034
13035     if self.disk_template in constants.DTS_INT_MIRROR:
13036       self.required_nodes = 2
13037     else:
13038       self.required_nodes = 1
13039
13040     request = {
13041       "name": self.name,
13042       "disk_template": self.disk_template,
13043       "tags": self.tags,
13044       "os": self.os,
13045       "vcpus": self.vcpus,
13046       "memory": self.memory,
13047       "disks": self.disks,
13048       "disk_space_total": disk_space,
13049       "nics": self.nics,
13050       "required_nodes": self.required_nodes,
13051       "hypervisor": self.hypervisor,
13052       }
13053
13054     return request
13055
13056   def _AddRelocateInstance(self):
13057     """Add relocate instance data to allocator structure.
13058
13059     This in combination with _IAllocatorGetClusterData will create the
13060     correct structure needed as input for the allocator.
13061
13062     The checks for the completeness of the opcode must have already been
13063     done.
13064
13065     """
13066     instance = self.cfg.GetInstanceInfo(self.name)
13067     if instance is None:
13068       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13069                                    " IAllocator" % self.name)
13070
13071     if instance.disk_template not in constants.DTS_MIRRORED:
13072       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13073                                  errors.ECODE_INVAL)
13074
13075     if instance.disk_template in constants.DTS_INT_MIRROR and \
13076         len(instance.secondary_nodes) != 1:
13077       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13078                                  errors.ECODE_STATE)
13079
13080     self.required_nodes = 1
13081     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13082     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13083
13084     request = {
13085       "name": self.name,
13086       "disk_space_total": disk_space,
13087       "required_nodes": self.required_nodes,
13088       "relocate_from": self.relocate_from,
13089       }
13090     return request
13091
13092   def _AddNodeEvacuate(self):
13093     """Get data for node-evacuate requests.
13094
13095     """
13096     return {
13097       "instances": self.instances,
13098       "evac_mode": self.evac_mode,
13099       }
13100
13101   def _AddChangeGroup(self):
13102     """Get data for node-evacuate requests.
13103
13104     """
13105     return {
13106       "instances": self.instances,
13107       "target_groups": self.target_groups,
13108       }
13109
13110   def _BuildInputData(self, fn, keydata):
13111     """Build input data structures.
13112
13113     """
13114     self._ComputeClusterData()
13115
13116     request = fn()
13117     request["type"] = self.mode
13118     for keyname, keytype in keydata:
13119       if keyname not in request:
13120         raise errors.ProgrammerError("Request parameter %s is missing" %
13121                                      keyname)
13122       val = request[keyname]
13123       if not keytype(val):
13124         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13125                                      " validation, value %s, expected"
13126                                      " type %s" % (keyname, val, keytype))
13127     self.in_data["request"] = request
13128
13129     self.in_text = serializer.Dump(self.in_data)
13130
13131   _STRING_LIST = ht.TListOf(ht.TString)
13132   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13133      # pylint: disable-msg=E1101
13134      # Class '...' has no 'OP_ID' member
13135      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13136                           opcodes.OpInstanceMigrate.OP_ID,
13137                           opcodes.OpInstanceReplaceDisks.OP_ID])
13138      })))
13139
13140   _NEVAC_MOVED = \
13141     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13142                        ht.TItems([ht.TNonEmptyString,
13143                                   ht.TNonEmptyString,
13144                                   ht.TListOf(ht.TNonEmptyString),
13145                                  ])))
13146   _NEVAC_FAILED = \
13147     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13148                        ht.TItems([ht.TNonEmptyString,
13149                                   ht.TMaybeString,
13150                                  ])))
13151   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13152                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13153
13154   _MODE_DATA = {
13155     constants.IALLOCATOR_MODE_ALLOC:
13156       (_AddNewInstance,
13157        [
13158         ("name", ht.TString),
13159         ("memory", ht.TInt),
13160         ("disks", ht.TListOf(ht.TDict)),
13161         ("disk_template", ht.TString),
13162         ("os", ht.TString),
13163         ("tags", _STRING_LIST),
13164         ("nics", ht.TListOf(ht.TDict)),
13165         ("vcpus", ht.TInt),
13166         ("hypervisor", ht.TString),
13167         ], ht.TList),
13168     constants.IALLOCATOR_MODE_RELOC:
13169       (_AddRelocateInstance,
13170        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13171        ht.TList),
13172      constants.IALLOCATOR_MODE_NODE_EVAC:
13173       (_AddNodeEvacuate, [
13174         ("instances", _STRING_LIST),
13175         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13176         ], _NEVAC_RESULT),
13177      constants.IALLOCATOR_MODE_CHG_GROUP:
13178       (_AddChangeGroup, [
13179         ("instances", _STRING_LIST),
13180         ("target_groups", _STRING_LIST),
13181         ], _NEVAC_RESULT),
13182     }
13183
13184   def Run(self, name, validate=True, call_fn=None):
13185     """Run an instance allocator and return the results.
13186
13187     """
13188     if call_fn is None:
13189       call_fn = self.rpc.call_iallocator_runner
13190
13191     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13192     result.Raise("Failure while running the iallocator script")
13193
13194     self.out_text = result.payload
13195     if validate:
13196       self._ValidateResult()
13197
13198   def _ValidateResult(self):
13199     """Process the allocator results.
13200
13201     This will process and if successful save the result in
13202     self.out_data and the other parameters.
13203
13204     """
13205     try:
13206       rdict = serializer.Load(self.out_text)
13207     except Exception, err:
13208       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13209
13210     if not isinstance(rdict, dict):
13211       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13212
13213     # TODO: remove backwards compatiblity in later versions
13214     if "nodes" in rdict and "result" not in rdict:
13215       rdict["result"] = rdict["nodes"]
13216       del rdict["nodes"]
13217
13218     for key in "success", "info", "result":
13219       if key not in rdict:
13220         raise errors.OpExecError("Can't parse iallocator results:"
13221                                  " missing key '%s'" % key)
13222       setattr(self, key, rdict[key])
13223
13224     if not self._result_check(self.result):
13225       raise errors.OpExecError("Iallocator returned invalid result,"
13226                                " expected %s, got %s" %
13227                                (self._result_check, self.result),
13228                                errors.ECODE_INVAL)
13229
13230     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13231       assert self.relocate_from is not None
13232       assert self.required_nodes == 1
13233
13234       node2group = dict((name, ndata["group"])
13235                         for (name, ndata) in self.in_data["nodes"].items())
13236
13237       fn = compat.partial(self._NodesToGroups, node2group,
13238                           self.in_data["nodegroups"])
13239
13240       instance = self.cfg.GetInstanceInfo(self.name)
13241       request_groups = fn(self.relocate_from + [instance.primary_node])
13242       result_groups = fn(rdict["result"] + [instance.primary_node])
13243
13244       if self.success and not set(result_groups).issubset(request_groups):
13245         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13246                                  " differ from original groups (%s)" %
13247                                  (utils.CommaJoin(result_groups),
13248                                   utils.CommaJoin(request_groups)))
13249
13250     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13251       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13252
13253     self.out_data = rdict
13254
13255   @staticmethod
13256   def _NodesToGroups(node2group, groups, nodes):
13257     """Returns a list of unique group names for a list of nodes.
13258
13259     @type node2group: dict
13260     @param node2group: Map from node name to group UUID
13261     @type groups: dict
13262     @param groups: Group information
13263     @type nodes: list
13264     @param nodes: Node names
13265
13266     """
13267     result = set()
13268
13269     for node in nodes:
13270       try:
13271         group_uuid = node2group[node]
13272       except KeyError:
13273         # Ignore unknown node
13274         pass
13275       else:
13276         try:
13277           group = groups[group_uuid]
13278         except KeyError:
13279           # Can't find group, let's use UUID
13280           group_name = group_uuid
13281         else:
13282           group_name = group["name"]
13283
13284         result.add(group_name)
13285
13286     return sorted(result)
13287
13288
13289 class LUTestAllocator(NoHooksLU):
13290   """Run allocator tests.
13291
13292   This LU runs the allocator tests
13293
13294   """
13295   def CheckPrereq(self):
13296     """Check prerequisites.
13297
13298     This checks the opcode parameters depending on the director and mode test.
13299
13300     """
13301     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13302       for attr in ["memory", "disks", "disk_template",
13303                    "os", "tags", "nics", "vcpus"]:
13304         if not hasattr(self.op, attr):
13305           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13306                                      attr, errors.ECODE_INVAL)
13307       iname = self.cfg.ExpandInstanceName(self.op.name)
13308       if iname is not None:
13309         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13310                                    iname, errors.ECODE_EXISTS)
13311       if not isinstance(self.op.nics, list):
13312         raise errors.OpPrereqError("Invalid parameter 'nics'",
13313                                    errors.ECODE_INVAL)
13314       if not isinstance(self.op.disks, list):
13315         raise errors.OpPrereqError("Invalid parameter 'disks'",
13316                                    errors.ECODE_INVAL)
13317       for row in self.op.disks:
13318         if (not isinstance(row, dict) or
13319             constants.IDISK_SIZE not in row or
13320             not isinstance(row[constants.IDISK_SIZE], int) or
13321             constants.IDISK_MODE not in row or
13322             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13323           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13324                                      " parameter", errors.ECODE_INVAL)
13325       if self.op.hypervisor is None:
13326         self.op.hypervisor = self.cfg.GetHypervisorType()
13327     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13328       fname = _ExpandInstanceName(self.cfg, self.op.name)
13329       self.op.name = fname
13330       self.relocate_from = \
13331           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13332     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13333                           constants.IALLOCATOR_MODE_NODE_EVAC):
13334       if not self.op.instances:
13335         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13336       self.op.instances = _GetWantedInstances(self, self.op.instances)
13337     else:
13338       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13339                                  self.op.mode, errors.ECODE_INVAL)
13340
13341     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13342       if self.op.allocator is None:
13343         raise errors.OpPrereqError("Missing allocator name",
13344                                    errors.ECODE_INVAL)
13345     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13346       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13347                                  self.op.direction, errors.ECODE_INVAL)
13348
13349   def Exec(self, feedback_fn):
13350     """Run the allocator test.
13351
13352     """
13353     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13354       ial = IAllocator(self.cfg, self.rpc,
13355                        mode=self.op.mode,
13356                        name=self.op.name,
13357                        memory=self.op.memory,
13358                        disks=self.op.disks,
13359                        disk_template=self.op.disk_template,
13360                        os=self.op.os,
13361                        tags=self.op.tags,
13362                        nics=self.op.nics,
13363                        vcpus=self.op.vcpus,
13364                        hypervisor=self.op.hypervisor,
13365                        )
13366     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13367       ial = IAllocator(self.cfg, self.rpc,
13368                        mode=self.op.mode,
13369                        name=self.op.name,
13370                        relocate_from=list(self.relocate_from),
13371                        )
13372     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13373       ial = IAllocator(self.cfg, self.rpc,
13374                        mode=self.op.mode,
13375                        instances=self.op.instances,
13376                        target_groups=self.op.target_groups)
13377     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13378       ial = IAllocator(self.cfg, self.rpc,
13379                        mode=self.op.mode,
13380                        instances=self.op.instances,
13381                        evac_mode=self.op.evac_mode)
13382     else:
13383       raise errors.ProgrammerError("Uncatched mode %s in"
13384                                    " LUTestAllocator.Exec", self.op.mode)
13385
13386     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13387       result = ial.in_text
13388     else:
13389       ial.Run(self.op.allocator, validate=False)
13390       result = ial.out_text
13391     return result
13392
13393
13394 #: Query type implementations
13395 _QUERY_IMPL = {
13396   constants.QR_INSTANCE: _InstanceQuery,
13397   constants.QR_NODE: _NodeQuery,
13398   constants.QR_GROUP: _GroupQuery,
13399   constants.QR_OS: _OsQuery,
13400   }
13401
13402 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13403
13404
13405 def _GetQueryImplementation(name):
13406   """Returns the implemtnation for a query type.
13407
13408   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13409
13410   """
13411   try:
13412     return _QUERY_IMPL[name]
13413   except KeyError:
13414     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13415                                errors.ECODE_INVAL)