code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable-msg=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable-msg=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 137     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable-msg=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, filter_, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 587   """Checks if the instances in a node group are still correct.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type group_uuid: string
 592   @param group_uuid: Node group UUID
 593   @type owned_instances: set or frozenset
 594   @param owned_instances: List of currently owned instances
 595
 596   """
 597   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 598   if owned_instances != wanted_instances:
 599     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 600                                " locks were acquired, wanted '%s', have '%s';"
 601                                " retry the operation" %
 602                                (group_uuid,
 603                                 utils.CommaJoin(wanted_instances),
 604                                 utils.CommaJoin(owned_instances)),
 605                                errors.ECODE_STATE)
 606
 607   return wanted_instances
 608
 609
 610 def _SupportsOob(cfg, node):
 611   """Tells if node supports OOB.
 612
 613   @type cfg: L{config.ConfigWriter}
 614   @param cfg: The cluster configuration
 615   @type node: L{objects.Node}
 616   @param node: The node
 617   @return: The OOB script if supported or an empty string otherwise
 618
 619   """
 620   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 621
 622
 623 def _GetWantedNodes(lu, nodes):
 624   """Returns list of checked and expanded node names.
 625
 626   @type lu: L{LogicalUnit}
 627   @param lu: the logical unit on whose behalf we execute
 628   @type nodes: list
 629   @param nodes: list of node names or None for all nodes
 630   @rtype: list
 631   @return: the list of nodes, sorted
 632   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 633
 634   """
 635   if nodes:
 636     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 637
 638   return utils.NiceSort(lu.cfg.GetNodeList())
 639
 640
 641 def _GetWantedInstances(lu, instances):
 642   """Returns list of checked and expanded instance names.
 643
 644   @type lu: L{LogicalUnit}
 645   @param lu: the logical unit on whose behalf we execute
 646   @type instances: list
 647   @param instances: list of instance names or None for all instances
 648   @rtype: list
 649   @return: the list of instances, sorted
 650   @raise errors.OpPrereqError: if the instances parameter is wrong type
 651   @raise errors.OpPrereqError: if any of the passed instances is not found
 652
 653   """
 654   if instances:
 655     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 656   else:
 657     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 658   return wanted
 659
 660
 661 def _GetUpdatedParams(old_params, update_dict,
 662                       use_default=True, use_none=False):
 663   """Return the new version of a parameter dictionary.
 664
 665   @type old_params: dict
 666   @param old_params: old parameters
 667   @type update_dict: dict
 668   @param update_dict: dict containing new parameter values, or
 669       constants.VALUE_DEFAULT to reset the parameter to its default
 670       value
 671   @param use_default: boolean
 672   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 673       values as 'to be deleted' values
 674   @param use_none: boolean
 675   @type use_none: whether to recognise C{None} values as 'to be
 676       deleted' values
 677   @rtype: dict
 678   @return: the new parameter dictionary
 679
 680   """
 681   params_copy = copy.deepcopy(old_params)
 682   for key, val in update_dict.iteritems():
 683     if ((use_default and val == constants.VALUE_DEFAULT) or
 684         (use_none and val is None)):
 685       try:
 686         del params_copy[key]
 687       except KeyError:
 688         pass
 689     else:
 690       params_copy[key] = val
 691   return params_copy
 692
 693
 694 def _ReleaseLocks(lu, level, names=None, keep=None):
 695   """Releases locks owned by an LU.
 696
 697   @type lu: L{LogicalUnit}
 698   @param level: Lock level
 699   @type names: list or None
 700   @param names: Names of locks to release
 701   @type keep: list or None
 702   @param keep: Names of locks to retain
 703
 704   """
 705   assert not (keep is not None and names is not None), \
 706          "Only one of the 'names' and the 'keep' parameters can be given"
 707
 708   if names is not None:
 709     should_release = names.__contains__
 710   elif keep:
 711     should_release = lambda name: name not in keep
 712   else:
 713     should_release = None
 714
 715   if should_release:
 716     retain = []
 717     release = []
 718
 719     # Determine which locks to release
 720     for name in lu.owned_locks(level):
 721       if should_release(name):
 722         release.append(name)
 723       else:
 724         retain.append(name)
 725
 726     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 727
 728     # Release just some locks
 729     lu.glm.release(level, names=release)
 730
 731     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 732   else:
 733     # Release everything
 734     lu.glm.release(level)
 735
 736     assert not lu.glm.is_owned(level), "No locks should be owned"
 737
 738
 739 def _MapInstanceDisksToNodes(instances):
 740   """Creates a map from (node, volume) to instance name.
 741
 742   @type instances: list of L{objects.Instance}
 743   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 744
 745   """
 746   return dict(((node, vol), inst.name)
 747               for inst in instances
 748               for (node, vols) in inst.MapLVsByNode().items()
 749               for vol in vols)
 750
 751
 752 def _RunPostHook(lu, node_name):
 753   """Runs the post-hook for an opcode on a single node.
 754
 755   """
 756   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 757   try:
 758     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 759   except:
 760     # pylint: disable-msg=W0702
 761     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 762
 763
 764 def _CheckOutputFields(static, dynamic, selected):
 765   """Checks whether all selected fields are valid.
 766
 767   @type static: L{utils.FieldSet}
 768   @param static: static fields set
 769   @type dynamic: L{utils.FieldSet}
 770   @param dynamic: dynamic fields set
 771
 772   """
 773   f = utils.FieldSet()
 774   f.Extend(static)
 775   f.Extend(dynamic)
 776
 777   delta = f.NonMatching(selected)
 778   if delta:
 779     raise errors.OpPrereqError("Unknown output fields selected: %s"
 780                                % ",".join(delta), errors.ECODE_INVAL)
 781
 782
 783 def _CheckGlobalHvParams(params):
 784   """Validates that given hypervisor params are not global ones.
 785
 786   This will ensure that instances don't get customised versions of
 787   global params.
 788
 789   """
 790   used_globals = constants.HVC_GLOBALS.intersection(params)
 791   if used_globals:
 792     msg = ("The following hypervisor parameters are global and cannot"
 793            " be customized at instance level, please modify them at"
 794            " cluster level: %s" % utils.CommaJoin(used_globals))
 795     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 796
 797
 798 def _CheckNodeOnline(lu, node, msg=None):
 799   """Ensure that a given node is online.
 800
 801   @param lu: the LU on behalf of which we make the check
 802   @param node: the node to check
 803   @param msg: if passed, should be a message to replace the default one
 804   @raise errors.OpPrereqError: if the node is offline
 805
 806   """
 807   if msg is None:
 808     msg = "Can't use offline node"
 809   if lu.cfg.GetNodeInfo(node).offline:
 810     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 811
 812
 813 def _CheckNodeNotDrained(lu, node):
 814   """Ensure that a given node is not drained.
 815
 816   @param lu: the LU on behalf of which we make the check
 817   @param node: the node to check
 818   @raise errors.OpPrereqError: if the node is drained
 819
 820   """
 821   if lu.cfg.GetNodeInfo(node).drained:
 822     raise errors.OpPrereqError("Can't use drained node %s" % node,
 823                                errors.ECODE_STATE)
 824
 825
 826 def _CheckNodeVmCapable(lu, node):
 827   """Ensure that a given node is vm capable.
 828
 829   @param lu: the LU on behalf of which we make the check
 830   @param node: the node to check
 831   @raise errors.OpPrereqError: if the node is not vm capable
 832
 833   """
 834   if not lu.cfg.GetNodeInfo(node).vm_capable:
 835     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 836                                errors.ECODE_STATE)
 837
 838
 839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 840   """Ensure that a node supports a given OS.
 841
 842   @param lu: the LU on behalf of which we make the check
 843   @param node: the node to check
 844   @param os_name: the OS to query about
 845   @param force_variant: whether to ignore variant errors
 846   @raise errors.OpPrereqError: if the node is not supporting the OS
 847
 848   """
 849   result = lu.rpc.call_os_get(node, os_name)
 850   result.Raise("OS '%s' not in supported OS list for node %s" %
 851                (os_name, node),
 852                prereq=True, ecode=errors.ECODE_INVAL)
 853   if not force_variant:
 854     _CheckOSVariant(result.payload, os_name)
 855
 856
 857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 858   """Ensure that a node has the given secondary ip.
 859
 860   @type lu: L{LogicalUnit}
 861   @param lu: the LU on behalf of which we make the check
 862   @type node: string
 863   @param node: the node to check
 864   @type secondary_ip: string
 865   @param secondary_ip: the ip to check
 866   @type prereq: boolean
 867   @param prereq: whether to throw a prerequisite or an execute error
 868   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 869   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 870
 871   """
 872   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 873   result.Raise("Failure checking secondary ip on node %s" % node,
 874                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 875   if not result.payload:
 876     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 877            " please fix and re-run this command" % secondary_ip)
 878     if prereq:
 879       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 880     else:
 881       raise errors.OpExecError(msg)
 882
 883
 884 def _GetClusterDomainSecret():
 885   """Reads the cluster domain secret.
 886
 887   """
 888   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 889                                strict=True)
 890
 891
 892 def _CheckInstanceDown(lu, instance, reason):
 893   """Ensure that an instance is not running."""
 894   if instance.admin_up:
 895     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 896                                (instance.name, reason), errors.ECODE_STATE)
 897
 898   pnode = instance.primary_node
 899   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 900   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 901               prereq=True, ecode=errors.ECODE_ENVIRON)
 902
 903   if instance.name in ins_l.payload:
 904     raise errors.OpPrereqError("Instance %s is running, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907
 908 def _ExpandItemName(fn, name, kind):
 909   """Expand an item name.
 910
 911   @param fn: the function to use for expansion
 912   @param name: requested item name
 913   @param kind: text description ('Node' or 'Instance')
 914   @return: the resolved (full) name
 915   @raise errors.OpPrereqError: if the item is not found
 916
 917   """
 918   full_name = fn(name)
 919   if full_name is None:
 920     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 921                                errors.ECODE_NOENT)
 922   return full_name
 923
 924
 925 def _ExpandNodeName(cfg, name):
 926   """Wrapper over L{_ExpandItemName} for nodes."""
 927   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 928
 929
 930 def _ExpandInstanceName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for instance."""
 932   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 933
 934
 935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 936                           memory, vcpus, nics, disk_template, disks,
 937                           bep, hvp, hypervisor_name, tags):
 938   """Builds instance related env variables for hooks
 939
 940   This builds the hook environment from individual variables.
 941
 942   @type name: string
 943   @param name: the name of the instance
 944   @type primary_node: string
 945   @param primary_node: the name of the instance's primary node
 946   @type secondary_nodes: list
 947   @param secondary_nodes: list of secondary nodes as strings
 948   @type os_type: string
 949   @param os_type: the name of the instance's OS
 950   @type status: boolean
 951   @param status: the should_run status of the instance
 952   @type memory: string
 953   @param memory: the memory size of the instance
 954   @type vcpus: string
 955   @param vcpus: the count of VCPUs the instance has
 956   @type nics: list
 957   @param nics: list of tuples (ip, mac, mode, link) representing
 958       the NICs the instance has
 959   @type disk_template: string
 960   @param disk_template: the disk template of the instance
 961   @type disks: list
 962   @param disks: the list of (size, mode) pairs
 963   @type bep: dict
 964   @param bep: the backend parameters for the instance
 965   @type hvp: dict
 966   @param hvp: the hypervisor parameters for the instance
 967   @type hypervisor_name: string
 968   @param hypervisor_name: the hypervisor for the instance
 969   @type tags: list
 970   @param tags: list of instance tags as strings
 971   @rtype: dict
 972   @return: the hook environment for this instance
 973
 974   """
 975   if status:
 976     str_status = "up"
 977   else:
 978     str_status = "down"
 979   env = {
 980     "OP_TARGET": name,
 981     "INSTANCE_NAME": name,
 982     "INSTANCE_PRIMARY": primary_node,
 983     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 984     "INSTANCE_OS_TYPE": os_type,
 985     "INSTANCE_STATUS": str_status,
 986     "INSTANCE_MEMORY": memory,
 987     "INSTANCE_VCPUS": vcpus,
 988     "INSTANCE_DISK_TEMPLATE": disk_template,
 989     "INSTANCE_HYPERVISOR": hypervisor_name,
 990   }
 991
 992   if nics:
 993     nic_count = len(nics)
 994     for idx, (ip, mac, mode, link) in enumerate(nics):
 995       if ip is None:
 996         ip = ""
 997       env["INSTANCE_NIC%d_IP" % idx] = ip
 998       env["INSTANCE_NIC%d_MAC" % idx] = mac
 999       env["INSTANCE_NIC%d_MODE" % idx] = mode
1000       env["INSTANCE_NIC%d_LINK" % idx] = link
1001       if mode == constants.NIC_MODE_BRIDGED:
1002         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003   else:
1004     nic_count = 0
1005
1006   env["INSTANCE_NIC_COUNT"] = nic_count
1007
1008   if disks:
1009     disk_count = len(disks)
1010     for idx, (size, mode) in enumerate(disks):
1011       env["INSTANCE_DISK%d_SIZE" % idx] = size
1012       env["INSTANCE_DISK%d_MODE" % idx] = mode
1013   else:
1014     disk_count = 0
1015
1016   env["INSTANCE_DISK_COUNT"] = disk_count
1017
1018   if not tags:
1019     tags = []
1020
1021   env["INSTANCE_TAGS"] = " ".join(tags)
1022
1023   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024     for key, value in source.items():
1025       env["INSTANCE_%s_%s" % (kind, key)] = value
1026
1027   return env
1028
1029
1030 def _NICListToTuple(lu, nics):
1031   """Build a list of nic information tuples.
1032
1033   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034   value in LUInstanceQueryData.
1035
1036   @type lu:  L{LogicalUnit}
1037   @param lu: the logical unit on whose behalf we execute
1038   @type nics: list of L{objects.NIC}
1039   @param nics: list of nics to convert to hooks tuples
1040
1041   """
1042   hooks_nics = []
1043   cluster = lu.cfg.GetClusterInfo()
1044   for nic in nics:
1045     ip = nic.ip
1046     mac = nic.mac
1047     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048     mode = filled_params[constants.NIC_MODE]
1049     link = filled_params[constants.NIC_LINK]
1050     hooks_nics.append((ip, mac, mode, link))
1051   return hooks_nics
1052
1053
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055   """Builds instance related env variables for hooks from an object.
1056
1057   @type lu: L{LogicalUnit}
1058   @param lu: the logical unit on whose behalf we execute
1059   @type instance: L{objects.Instance}
1060   @param instance: the instance for which we should build the
1061       environment
1062   @type override: dict
1063   @param override: dictionary with key/values that will override
1064       our values
1065   @rtype: dict
1066   @return: the hook environment dictionary
1067
1068   """
1069   cluster = lu.cfg.GetClusterInfo()
1070   bep = cluster.FillBE(instance)
1071   hvp = cluster.FillHV(instance)
1072   args = {
1073     "name": instance.name,
1074     "primary_node": instance.primary_node,
1075     "secondary_nodes": instance.secondary_nodes,
1076     "os_type": instance.os,
1077     "status": instance.admin_up,
1078     "memory": bep[constants.BE_MEMORY],
1079     "vcpus": bep[constants.BE_VCPUS],
1080     "nics": _NICListToTuple(lu, instance.nics),
1081     "disk_template": instance.disk_template,
1082     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083     "bep": bep,
1084     "hvp": hvp,
1085     "hypervisor_name": instance.hypervisor,
1086     "tags": instance.tags,
1087   }
1088   if override:
1089     args.update(override)
1090   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1091
1092
1093 def _AdjustCandidatePool(lu, exceptions):
1094   """Adjust the candidate pool after node operations.
1095
1096   """
1097   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098   if mod_list:
1099     lu.LogInfo("Promoted nodes to master candidate role: %s",
1100                utils.CommaJoin(node.name for node in mod_list))
1101     for name in mod_list:
1102       lu.context.ReaddNode(name)
1103   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104   if mc_now > mc_max:
1105     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106                (mc_now, mc_max))
1107
1108
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110   """Decide whether I should promote myself as a master candidate.
1111
1112   """
1113   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115   # the new node will increase mc_max with one, so:
1116   mc_should = min(mc_should + 1, cp_size)
1117   return mc_now < mc_should
1118
1119
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121   """Check that the brigdes needed by a list of nics exist.
1122
1123   """
1124   cluster = lu.cfg.GetClusterInfo()
1125   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126   brlist = [params[constants.NIC_LINK] for params in paramslist
1127             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128   if brlist:
1129     result = lu.rpc.call_bridges_exist(target_node, brlist)
1130     result.Raise("Error checking bridges on destination node '%s'" %
1131                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132
1133
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135   """Check that the brigdes needed by an instance exist.
1136
1137   """
1138   if node is None:
1139     node = instance.primary_node
1140   _CheckNicsBridgesExist(lu, instance.nics, node)
1141
1142
1143 def _CheckOSVariant(os_obj, name):
1144   """Check whether an OS name conforms to the os variants specification.
1145
1146   @type os_obj: L{objects.OS}
1147   @param os_obj: OS object to check
1148   @type name: string
1149   @param name: OS name passed by the user, to check for validity
1150
1151   """
1152   variant = objects.OS.GetVariant(name)
1153   if not os_obj.supported_variants:
1154     if variant:
1155       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156                                  " passed)" % (os_obj.name, variant),
1157                                  errors.ECODE_INVAL)
1158     return
1159   if not variant:
1160     raise errors.OpPrereqError("OS name must include a variant",
1161                                errors.ECODE_INVAL)
1162
1163   if variant not in os_obj.supported_variants:
1164     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165
1166
1167 def _GetNodeInstancesInner(cfg, fn):
1168   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169
1170
1171 def _GetNodeInstances(cfg, node_name):
1172   """Returns a list of all primary and secondary instances on a node.
1173
1174   """
1175
1176   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177
1178
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180   """Returns primary instances on a node.
1181
1182   """
1183   return _GetNodeInstancesInner(cfg,
1184                                 lambda inst: node_name == inst.primary_node)
1185
1186
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188   """Returns secondary instances on a node.
1189
1190   """
1191   return _GetNodeInstancesInner(cfg,
1192                                 lambda inst: node_name in inst.secondary_nodes)
1193
1194
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196   """Returns the arguments for a storage type.
1197
1198   """
1199   # Special case for file storage
1200   if storage_type == constants.ST_FILE:
1201     # storage.FileStorage wants a list of storage directories
1202     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203
1204   return []
1205
1206
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208   faulty = []
1209
1210   for dev in instance.disks:
1211     cfg.SetDiskID(dev, node_name)
1212
1213   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214   result.Raise("Failed to get disk status from node %s" % node_name,
1215                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216
1217   for idx, bdev_status in enumerate(result.payload):
1218     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219       faulty.append(idx)
1220
1221   return faulty
1222
1223
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225   """Check the sanity of iallocator and node arguments and use the
1226   cluster-wide iallocator if appropriate.
1227
1228   Check that at most one of (iallocator, node) is specified. If none is
1229   specified, then the LU's opcode's iallocator slot is filled with the
1230   cluster-wide default iallocator.
1231
1232   @type iallocator_slot: string
1233   @param iallocator_slot: the name of the opcode iallocator slot
1234   @type node_slot: string
1235   @param node_slot: the name of the opcode target node slot
1236
1237   """
1238   node = getattr(lu.op, node_slot, None)
1239   iallocator = getattr(lu.op, iallocator_slot, None)
1240
1241   if node is not None and iallocator is not None:
1242     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243                                errors.ECODE_INVAL)
1244   elif node is None and iallocator is None:
1245     default_iallocator = lu.cfg.GetDefaultIAllocator()
1246     if default_iallocator:
1247       setattr(lu.op, iallocator_slot, default_iallocator)
1248     else:
1249       raise errors.OpPrereqError("No iallocator or node given and no"
1250                                  " cluster-wide default iallocator found;"
1251                                  " please specify either an iallocator or a"
1252                                  " node, or set a cluster-wide default"
1253                                  " iallocator")
1254
1255
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257   """Decides on which iallocator to use.
1258
1259   @type cfg: L{config.ConfigWriter}
1260   @param cfg: Cluster configuration object
1261   @type iallocator: string or None
1262   @param iallocator: Iallocator specified in opcode
1263   @rtype: string
1264   @return: Iallocator name
1265
1266   """
1267   if not iallocator:
1268     # Use default iallocator
1269     iallocator = cfg.GetDefaultIAllocator()
1270
1271   if not iallocator:
1272     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273                                " opcode nor as a cluster-wide default",
1274                                errors.ECODE_INVAL)
1275
1276   return iallocator
1277
1278
1279 class LUClusterPostInit(LogicalUnit):
1280   """Logical unit for running hooks after cluster initialization.
1281
1282   """
1283   HPATH = "cluster-init"
1284   HTYPE = constants.HTYPE_CLUSTER
1285
1286   def BuildHooksEnv(self):
1287     """Build hooks env.
1288
1289     """
1290     return {
1291       "OP_TARGET": self.cfg.GetClusterName(),
1292       }
1293
1294   def BuildHooksNodes(self):
1295     """Build hooks nodes.
1296
1297     """
1298     return ([], [self.cfg.GetMasterNode()])
1299
1300   def Exec(self, feedback_fn):
1301     """Nothing to do.
1302
1303     """
1304     return True
1305
1306
1307 class LUClusterDestroy(LogicalUnit):
1308   """Logical unit for destroying the cluster.
1309
1310   """
1311   HPATH = "cluster-destroy"
1312   HTYPE = constants.HTYPE_CLUSTER
1313
1314   def BuildHooksEnv(self):
1315     """Build hooks env.
1316
1317     """
1318     return {
1319       "OP_TARGET": self.cfg.GetClusterName(),
1320       }
1321
1322   def BuildHooksNodes(self):
1323     """Build hooks nodes.
1324
1325     """
1326     return ([], [])
1327
1328   def CheckPrereq(self):
1329     """Check prerequisites.
1330
1331     This checks whether the cluster is empty.
1332
1333     Any errors are signaled by raising errors.OpPrereqError.
1334
1335     """
1336     master = self.cfg.GetMasterNode()
1337
1338     nodelist = self.cfg.GetNodeList()
1339     if len(nodelist) != 1 or nodelist[0] != master:
1340       raise errors.OpPrereqError("There are still %d node(s) in"
1341                                  " this cluster." % (len(nodelist) - 1),
1342                                  errors.ECODE_INVAL)
1343     instancelist = self.cfg.GetInstanceList()
1344     if instancelist:
1345       raise errors.OpPrereqError("There are still %d instance(s) in"
1346                                  " this cluster." % len(instancelist),
1347                                  errors.ECODE_INVAL)
1348
1349   def Exec(self, feedback_fn):
1350     """Destroys the cluster.
1351
1352     """
1353     master = self.cfg.GetMasterNode()
1354
1355     # Run post hooks on master node before it's removed
1356     _RunPostHook(self, master)
1357
1358     result = self.rpc.call_node_stop_master(master, False)
1359     result.Raise("Could not disable the master role")
1360
1361     return master
1362
1363
1364 def _VerifyCertificate(filename):
1365   """Verifies a certificate for L{LUClusterVerifyConfig}.
1366
1367   @type filename: string
1368   @param filename: Path to PEM file
1369
1370   """
1371   try:
1372     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373                                            utils.ReadFile(filename))
1374   except Exception, err: # pylint: disable-msg=W0703
1375     return (LUClusterVerifyConfig.ETYPE_ERROR,
1376             "Failed to load X509 certificate %s: %s" % (filename, err))
1377
1378   (errcode, msg) = \
1379     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380                                 constants.SSL_CERT_EXPIRATION_ERROR)
1381
1382   if msg:
1383     fnamemsg = "While verifying %s: %s" % (filename, msg)
1384   else:
1385     fnamemsg = None
1386
1387   if errcode is None:
1388     return (None, fnamemsg)
1389   elif errcode == utils.CERT_WARNING:
1390     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391   elif errcode == utils.CERT_ERROR:
1392     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393
1394   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395
1396
1397 def _GetAllHypervisorParameters(cluster, instances):
1398   """Compute the set of all hypervisor parameters.
1399
1400   @type cluster: L{objects.Cluster}
1401   @param cluster: the cluster object
1402   @param instances: list of L{objects.Instance}
1403   @param instances: additional instances from which to obtain parameters
1404   @rtype: list of (origin, hypervisor, parameters)
1405   @return: a list with all parameters found, indicating the hypervisor they
1406        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407
1408   """
1409   hvp_data = []
1410
1411   for hv_name in cluster.enabled_hypervisors:
1412     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413
1414   for os_name, os_hvp in cluster.os_hvp.items():
1415     for hv_name, hv_params in os_hvp.items():
1416       if hv_params:
1417         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419
1420   # TODO: collapse identical parameter values in a single one
1421   for instance in instances:
1422     if instance.hvparams:
1423       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424                        cluster.FillHV(instance)))
1425
1426   return hvp_data
1427
1428
1429 class _VerifyErrors(object):
1430   """Mix-in for cluster/group verify LUs.
1431
1432   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433   self.op and self._feedback_fn to be available.)
1434
1435   """
1436   TCLUSTER = "cluster"
1437   TNODE = "node"
1438   TINSTANCE = "instance"
1439
1440   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452   ENODEDRBD = (TNODE, "ENODEDRBD")
1453   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456   ENODEHV = (TNODE, "ENODEHV")
1457   ENODELVM = (TNODE, "ENODELVM")
1458   ENODEN1 = (TNODE, "ENODEN1")
1459   ENODENET = (TNODE, "ENODENET")
1460   ENODEOS = (TNODE, "ENODEOS")
1461   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463   ENODERPC = (TNODE, "ENODERPC")
1464   ENODESSH = (TNODE, "ENODESSH")
1465   ENODEVERSION = (TNODE, "ENODEVERSION")
1466   ENODESETUP = (TNODE, "ENODESETUP")
1467   ENODETIME = (TNODE, "ENODETIME")
1468   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469
1470   ETYPE_FIELD = "code"
1471   ETYPE_ERROR = "ERROR"
1472   ETYPE_WARNING = "WARNING"
1473
1474   def _Error(self, ecode, item, msg, *args, **kwargs):
1475     """Format an error message.
1476
1477     Based on the opcode's error_codes parameter, either format a
1478     parseable error code, or a simpler error string.
1479
1480     This must be called only from Exec and functions called from Exec.
1481
1482     """
1483     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484     itype, etxt = ecode
1485     # first complete the msg
1486     if args:
1487       msg = msg % args
1488     # then format the whole message
1489     if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1490       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491     else:
1492       if item:
1493         item = " " + item
1494       else:
1495         item = ""
1496       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497     # and finally report it via the feedback_fn
1498     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1499
1500   def _ErrorIf(self, cond, *args, **kwargs):
1501     """Log an error message if the passed condition is True.
1502
1503     """
1504     cond = (bool(cond)
1505             or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1506     if cond:
1507       self._Error(*args, **kwargs)
1508     # do not mark the operation as failed for WARN cases only
1509     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510       self.bad = self.bad or cond
1511
1512
1513 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1514   """Verifies the cluster config.
1515
1516   """
1517   REQ_BGL = True
1518
1519   def _VerifyHVP(self, hvp_data):
1520     """Verifies locally the syntax of the hypervisor parameters.
1521
1522     """
1523     for item, hv_name, hv_params in hvp_data:
1524       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1525              (item, hv_name))
1526       try:
1527         hv_class = hypervisor.GetHypervisor(hv_name)
1528         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1529         hv_class.CheckParameterSyntax(hv_params)
1530       except errors.GenericError, err:
1531         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1532
1533   def ExpandNames(self):
1534     # Information can be safely retrieved as the BGL is acquired in exclusive
1535     # mode
1536     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1537     self.all_node_info = self.cfg.GetAllNodesInfo()
1538     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1539     self.needed_locks = {}
1540
1541   def Exec(self, feedback_fn):
1542     """Verify integrity of cluster, performing various test on nodes.
1543
1544     """
1545     self.bad = False
1546     self._feedback_fn = feedback_fn
1547
1548     feedback_fn("* Verifying cluster config")
1549
1550     for msg in self.cfg.VerifyConfig():
1551       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1552
1553     feedback_fn("* Verifying cluster certificate files")
1554
1555     for cert_filename in constants.ALL_CERT_FILES:
1556       (errcode, msg) = _VerifyCertificate(cert_filename)
1557       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1558
1559     feedback_fn("* Verifying hypervisor parameters")
1560
1561     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1562                                                 self.all_inst_info.values()))
1563
1564     feedback_fn("* Verifying all nodes belong to an existing group")
1565
1566     # We do this verification here because, should this bogus circumstance
1567     # occur, it would never be caught by VerifyGroup, which only acts on
1568     # nodes/instances reachable from existing node groups.
1569
1570     dangling_nodes = set(node.name for node in self.all_node_info.values()
1571                          if node.group not in self.all_group_info)
1572
1573     dangling_instances = {}
1574     no_node_instances = []
1575
1576     for inst in self.all_inst_info.values():
1577       if inst.primary_node in dangling_nodes:
1578         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1579       elif inst.primary_node not in self.all_node_info:
1580         no_node_instances.append(inst.name)
1581
1582     pretty_dangling = [
1583         "%s (%s)" %
1584         (node.name,
1585          utils.CommaJoin(dangling_instances.get(node.name,
1586                                                 ["no instances"])))
1587         for node in dangling_nodes]
1588
1589     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1590                   "the following nodes (and their instances) belong to a non"
1591                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1592
1593     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1594                   "the following instances have a non-existing primary-node:"
1595                   " %s", utils.CommaJoin(no_node_instances))
1596
1597     return (not self.bad, [g.name for g in self.all_group_info.values()])
1598
1599
1600 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1601   """Verifies the status of a node group.
1602
1603   """
1604   HPATH = "cluster-verify"
1605   HTYPE = constants.HTYPE_CLUSTER
1606   REQ_BGL = False
1607
1608   _HOOKS_INDENT_RE = re.compile("^", re.M)
1609
1610   class NodeImage(object):
1611     """A class representing the logical and physical status of a node.
1612
1613     @type name: string
1614     @ivar name: the node name to which this object refers
1615     @ivar volumes: a structure as returned from
1616         L{ganeti.backend.GetVolumeList} (runtime)
1617     @ivar instances: a list of running instances (runtime)
1618     @ivar pinst: list of configured primary instances (config)
1619     @ivar sinst: list of configured secondary instances (config)
1620     @ivar sbp: dictionary of {primary-node: list of instances} for all
1621         instances for which this node is secondary (config)
1622     @ivar mfree: free memory, as reported by hypervisor (runtime)
1623     @ivar dfree: free disk, as reported by the node (runtime)
1624     @ivar offline: the offline status (config)
1625     @type rpc_fail: boolean
1626     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1627         not whether the individual keys were correct) (runtime)
1628     @type lvm_fail: boolean
1629     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1630     @type hyp_fail: boolean
1631     @ivar hyp_fail: whether the RPC call didn't return the instance list
1632     @type ghost: boolean
1633     @ivar ghost: whether this is a known node or not (config)
1634     @type os_fail: boolean
1635     @ivar os_fail: whether the RPC call didn't return valid OS data
1636     @type oslist: list
1637     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1638     @type vm_capable: boolean
1639     @ivar vm_capable: whether the node can host instances
1640
1641     """
1642     def __init__(self, offline=False, name=None, vm_capable=True):
1643       self.name = name
1644       self.volumes = {}
1645       self.instances = []
1646       self.pinst = []
1647       self.sinst = []
1648       self.sbp = {}
1649       self.mfree = 0
1650       self.dfree = 0
1651       self.offline = offline
1652       self.vm_capable = vm_capable
1653       self.rpc_fail = False
1654       self.lvm_fail = False
1655       self.hyp_fail = False
1656       self.ghost = False
1657       self.os_fail = False
1658       self.oslist = {}
1659
1660   def ExpandNames(self):
1661     # This raises errors.OpPrereqError on its own:
1662     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1663
1664     # Get instances in node group; this is unsafe and needs verification later
1665     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1666
1667     self.needed_locks = {
1668       locking.LEVEL_INSTANCE: inst_names,
1669       locking.LEVEL_NODEGROUP: [self.group_uuid],
1670       locking.LEVEL_NODE: [],
1671       }
1672
1673     self.share_locks = _ShareAll()
1674
1675   def DeclareLocks(self, level):
1676     if level == locking.LEVEL_NODE:
1677       # Get members of node group; this is unsafe and needs verification later
1678       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1679
1680       all_inst_info = self.cfg.GetAllInstancesInfo()
1681
1682       # In Exec(), we warn about mirrored instances that have primary and
1683       # secondary living in separate node groups. To fully verify that
1684       # volumes for these instances are healthy, we will need to do an
1685       # extra call to their secondaries. We ensure here those nodes will
1686       # be locked.
1687       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1688         # Important: access only the instances whose lock is owned
1689         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1690           nodes.update(all_inst_info[inst].secondary_nodes)
1691
1692       self.needed_locks[locking.LEVEL_NODE] = nodes
1693
1694   def CheckPrereq(self):
1695     group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1696     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1697
1698     unlocked_nodes = \
1699         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1700
1701     unlocked_instances = \
1702         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1703
1704     if unlocked_nodes:
1705       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1706                                  utils.CommaJoin(unlocked_nodes))
1707
1708     if unlocked_instances:
1709       raise errors.OpPrereqError("Missing lock for instances: %s" %
1710                                  utils.CommaJoin(unlocked_instances))
1711
1712     self.all_node_info = self.cfg.GetAllNodesInfo()
1713     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1714
1715     self.my_node_names = utils.NiceSort(group_nodes)
1716     self.my_inst_names = utils.NiceSort(group_instances)
1717
1718     self.my_node_info = dict((name, self.all_node_info[name])
1719                              for name in self.my_node_names)
1720
1721     self.my_inst_info = dict((name, self.all_inst_info[name])
1722                              for name in self.my_inst_names)
1723
1724     # We detect here the nodes that will need the extra RPC calls for verifying
1725     # split LV volumes; they should be locked.
1726     extra_lv_nodes = set()
1727
1728     for inst in self.my_inst_info.values():
1729       if inst.disk_template in constants.DTS_INT_MIRROR:
1730         group = self.my_node_info[inst.primary_node].group
1731         for nname in inst.secondary_nodes:
1732           if self.all_node_info[nname].group != group:
1733             extra_lv_nodes.add(nname)
1734
1735     unlocked_lv_nodes = \
1736         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1737
1738     if unlocked_lv_nodes:
1739       raise errors.OpPrereqError("these nodes could be locked: %s" %
1740                                  utils.CommaJoin(unlocked_lv_nodes))
1741     self.extra_lv_nodes = list(extra_lv_nodes)
1742
1743   def _VerifyNode(self, ninfo, nresult):
1744     """Perform some basic validation on data returned from a node.
1745
1746       - check the result data structure is well formed and has all the
1747         mandatory fields
1748       - check ganeti version
1749
1750     @type ninfo: L{objects.Node}
1751     @param ninfo: the node to check
1752     @param nresult: the results from the node
1753     @rtype: boolean
1754     @return: whether overall this call was successful (and we can expect
1755          reasonable values in the respose)
1756
1757     """
1758     node = ninfo.name
1759     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1760
1761     # main result, nresult should be a non-empty dict
1762     test = not nresult or not isinstance(nresult, dict)
1763     _ErrorIf(test, self.ENODERPC, node,
1764                   "unable to verify node: no data returned")
1765     if test:
1766       return False
1767
1768     # compares ganeti version
1769     local_version = constants.PROTOCOL_VERSION
1770     remote_version = nresult.get("version", None)
1771     test = not (remote_version and
1772                 isinstance(remote_version, (list, tuple)) and
1773                 len(remote_version) == 2)
1774     _ErrorIf(test, self.ENODERPC, node,
1775              "connection to node returned invalid data")
1776     if test:
1777       return False
1778
1779     test = local_version != remote_version[0]
1780     _ErrorIf(test, self.ENODEVERSION, node,
1781              "incompatible protocol versions: master %s,"
1782              " node %s", local_version, remote_version[0])
1783     if test:
1784       return False
1785
1786     # node seems compatible, we can actually try to look into its results
1787
1788     # full package version
1789     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1790                   self.ENODEVERSION, node,
1791                   "software version mismatch: master %s, node %s",
1792                   constants.RELEASE_VERSION, remote_version[1],
1793                   code=self.ETYPE_WARNING)
1794
1795     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1796     if ninfo.vm_capable and isinstance(hyp_result, dict):
1797       for hv_name, hv_result in hyp_result.iteritems():
1798         test = hv_result is not None
1799         _ErrorIf(test, self.ENODEHV, node,
1800                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1801
1802     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1803     if ninfo.vm_capable and isinstance(hvp_result, list):
1804       for item, hv_name, hv_result in hvp_result:
1805         _ErrorIf(True, self.ENODEHV, node,
1806                  "hypervisor %s parameter verify failure (source %s): %s",
1807                  hv_name, item, hv_result)
1808
1809     test = nresult.get(constants.NV_NODESETUP,
1810                        ["Missing NODESETUP results"])
1811     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1812              "; ".join(test))
1813
1814     return True
1815
1816   def _VerifyNodeTime(self, ninfo, nresult,
1817                       nvinfo_starttime, nvinfo_endtime):
1818     """Check the node time.
1819
1820     @type ninfo: L{objects.Node}
1821     @param ninfo: the node to check
1822     @param nresult: the remote results for the node
1823     @param nvinfo_starttime: the start time of the RPC call
1824     @param nvinfo_endtime: the end time of the RPC call
1825
1826     """
1827     node = ninfo.name
1828     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1829
1830     ntime = nresult.get(constants.NV_TIME, None)
1831     try:
1832       ntime_merged = utils.MergeTime(ntime)
1833     except (ValueError, TypeError):
1834       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1835       return
1836
1837     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1838       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1839     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1840       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1841     else:
1842       ntime_diff = None
1843
1844     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1845              "Node time diverges by at least %s from master node time",
1846              ntime_diff)
1847
1848   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1849     """Check the node LVM results.
1850
1851     @type ninfo: L{objects.Node}
1852     @param ninfo: the node to check
1853     @param nresult: the remote results for the node
1854     @param vg_name: the configured VG name
1855
1856     """
1857     if vg_name is None:
1858       return
1859
1860     node = ninfo.name
1861     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1862
1863     # checks vg existence and size > 20G
1864     vglist = nresult.get(constants.NV_VGLIST, None)
1865     test = not vglist
1866     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1867     if not test:
1868       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1869                                             constants.MIN_VG_SIZE)
1870       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1871
1872     # check pv names
1873     pvlist = nresult.get(constants.NV_PVLIST, None)
1874     test = pvlist is None
1875     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1876     if not test:
1877       # check that ':' is not present in PV names, since it's a
1878       # special character for lvcreate (denotes the range of PEs to
1879       # use on the PV)
1880       for _, pvname, owner_vg in pvlist:
1881         test = ":" in pvname
1882         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1883                  " '%s' of VG '%s'", pvname, owner_vg)
1884
1885   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1886     """Check the node bridges.
1887
1888     @type ninfo: L{objects.Node}
1889     @param ninfo: the node to check
1890     @param nresult: the remote results for the node
1891     @param bridges: the expected list of bridges
1892
1893     """
1894     if not bridges:
1895       return
1896
1897     node = ninfo.name
1898     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1899
1900     missing = nresult.get(constants.NV_BRIDGES, None)
1901     test = not isinstance(missing, list)
1902     _ErrorIf(test, self.ENODENET, node,
1903              "did not return valid bridge information")
1904     if not test:
1905       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1906                utils.CommaJoin(sorted(missing)))
1907
1908   def _VerifyNodeNetwork(self, ninfo, nresult):
1909     """Check the node network connectivity results.
1910
1911     @type ninfo: L{objects.Node}
1912     @param ninfo: the node to check
1913     @param nresult: the remote results for the node
1914
1915     """
1916     node = ninfo.name
1917     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1918
1919     test = constants.NV_NODELIST not in nresult
1920     _ErrorIf(test, self.ENODESSH, node,
1921              "node hasn't returned node ssh connectivity data")
1922     if not test:
1923       if nresult[constants.NV_NODELIST]:
1924         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1925           _ErrorIf(True, self.ENODESSH, node,
1926                    "ssh communication with node '%s': %s", a_node, a_msg)
1927
1928     test = constants.NV_NODENETTEST not in nresult
1929     _ErrorIf(test, self.ENODENET, node,
1930              "node hasn't returned node tcp connectivity data")
1931     if not test:
1932       if nresult[constants.NV_NODENETTEST]:
1933         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1934         for anode in nlist:
1935           _ErrorIf(True, self.ENODENET, node,
1936                    "tcp communication with node '%s': %s",
1937                    anode, nresult[constants.NV_NODENETTEST][anode])
1938
1939     test = constants.NV_MASTERIP not in nresult
1940     _ErrorIf(test, self.ENODENET, node,
1941              "node hasn't returned node master IP reachability data")
1942     if not test:
1943       if not nresult[constants.NV_MASTERIP]:
1944         if node == self.master_node:
1945           msg = "the master node cannot reach the master IP (not configured?)"
1946         else:
1947           msg = "cannot reach the master IP"
1948         _ErrorIf(True, self.ENODENET, node, msg)
1949
1950   def _VerifyInstance(self, instance, instanceconfig, node_image,
1951                       diskstatus):
1952     """Verify an instance.
1953
1954     This function checks to see if the required block devices are
1955     available on the instance's node.
1956
1957     """
1958     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1959     node_current = instanceconfig.primary_node
1960
1961     node_vol_should = {}
1962     instanceconfig.MapLVsByNode(node_vol_should)
1963
1964     for node in node_vol_should:
1965       n_img = node_image[node]
1966       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1967         # ignore missing volumes on offline or broken nodes
1968         continue
1969       for volume in node_vol_should[node]:
1970         test = volume not in n_img.volumes
1971         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1972                  "volume %s missing on node %s", volume, node)
1973
1974     if instanceconfig.admin_up:
1975       pri_img = node_image[node_current]
1976       test = instance not in pri_img.instances and not pri_img.offline
1977       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1978                "instance not running on its primary node %s",
1979                node_current)
1980
1981     diskdata = [(nname, success, status, idx)
1982                 for (nname, disks) in diskstatus.items()
1983                 for idx, (success, status) in enumerate(disks)]
1984
1985     for nname, success, bdev_status, idx in diskdata:
1986       # the 'ghost node' construction in Exec() ensures that we have a
1987       # node here
1988       snode = node_image[nname]
1989       bad_snode = snode.ghost or snode.offline
1990       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1991                self.EINSTANCEFAULTYDISK, instance,
1992                "couldn't retrieve status for disk/%s on %s: %s",
1993                idx, nname, bdev_status)
1994       _ErrorIf((instanceconfig.admin_up and success and
1995                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1996                self.EINSTANCEFAULTYDISK, instance,
1997                "disk/%s on %s is faulty", idx, nname)
1998
1999   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2000     """Verify if there are any unknown volumes in the cluster.
2001
2002     The .os, .swap and backup volumes are ignored. All other volumes are
2003     reported as unknown.
2004
2005     @type reserved: L{ganeti.utils.FieldSet}
2006     @param reserved: a FieldSet of reserved volume names
2007
2008     """
2009     for node, n_img in node_image.items():
2010       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2011         # skip non-healthy nodes
2012         continue
2013       for volume in n_img.volumes:
2014         test = ((node not in node_vol_should or
2015                 volume not in node_vol_should[node]) and
2016                 not reserved.Matches(volume))
2017         self._ErrorIf(test, self.ENODEORPHANLV, node,
2018                       "volume %s is unknown", volume)
2019
2020   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2021     """Verify N+1 Memory Resilience.
2022
2023     Check that if one single node dies we can still start all the
2024     instances it was primary for.
2025
2026     """
2027     cluster_info = self.cfg.GetClusterInfo()
2028     for node, n_img in node_image.items():
2029       # This code checks that every node which is now listed as
2030       # secondary has enough memory to host all instances it is
2031       # supposed to should a single other node in the cluster fail.
2032       # FIXME: not ready for failover to an arbitrary node
2033       # FIXME: does not support file-backed instances
2034       # WARNING: we currently take into account down instances as well
2035       # as up ones, considering that even if they're down someone
2036       # might want to start them even in the event of a node failure.
2037       if n_img.offline:
2038         # we're skipping offline nodes from the N+1 warning, since
2039         # most likely we don't have good memory infromation from them;
2040         # we already list instances living on such nodes, and that's
2041         # enough warning
2042         continue
2043       for prinode, instances in n_img.sbp.items():
2044         needed_mem = 0
2045         for instance in instances:
2046           bep = cluster_info.FillBE(instance_cfg[instance])
2047           if bep[constants.BE_AUTO_BALANCE]:
2048             needed_mem += bep[constants.BE_MEMORY]
2049         test = n_img.mfree < needed_mem
2050         self._ErrorIf(test, self.ENODEN1, node,
2051                       "not enough memory to accomodate instance failovers"
2052                       " should node %s fail (%dMiB needed, %dMiB available)",
2053                       prinode, needed_mem, n_img.mfree)
2054
2055   @classmethod
2056   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2057                    (files_all, files_all_opt, files_mc, files_vm)):
2058     """Verifies file checksums collected from all nodes.
2059
2060     @param errorif: Callback for reporting errors
2061     @param nodeinfo: List of L{objects.Node} objects
2062     @param master_node: Name of master node
2063     @param all_nvinfo: RPC results
2064
2065     """
2066     node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2067
2068     assert master_node in node_names
2069     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2070             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2071            "Found file listed in more than one file list"
2072
2073     # Define functions determining which nodes to consider for a file
2074     file2nodefn = dict([(filename, fn)
2075       for (files, fn) in [(files_all, None),
2076                           (files_all_opt, None),
2077                           (files_mc, lambda node: (node.master_candidate or
2078                                                    node.name == master_node)),
2079                           (files_vm, lambda node: node.vm_capable)]
2080       for filename in files])
2081
2082     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2083
2084     for node in nodeinfo:
2085       if node.offline:
2086         continue
2087
2088       nresult = all_nvinfo[node.name]
2089
2090       if nresult.fail_msg or not nresult.payload:
2091         node_files = None
2092       else:
2093         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2094
2095       test = not (node_files and isinstance(node_files, dict))
2096       errorif(test, cls.ENODEFILECHECK, node.name,
2097               "Node did not return file checksum data")
2098       if test:
2099         continue
2100
2101       for (filename, checksum) in node_files.items():
2102         # Check if the file should be considered for a node
2103         fn = file2nodefn[filename]
2104         if fn is None or fn(node):
2105           fileinfo[filename].setdefault(checksum, set()).add(node.name)
2106
2107     for (filename, checksums) in fileinfo.items():
2108       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2109
2110       # Nodes having the file
2111       with_file = frozenset(node_name
2112                             for nodes in fileinfo[filename].values()
2113                             for node_name in nodes)
2114
2115       # Nodes missing file
2116       missing_file = node_names - with_file
2117
2118       if filename in files_all_opt:
2119         # All or no nodes
2120         errorif(missing_file and missing_file != node_names,
2121                 cls.ECLUSTERFILECHECK, None,
2122                 "File %s is optional, but it must exist on all or no"
2123                 " nodes (not found on %s)",
2124                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2125       else:
2126         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2127                 "File %s is missing from node(s) %s", filename,
2128                 utils.CommaJoin(utils.NiceSort(missing_file)))
2129
2130       # See if there are multiple versions of the file
2131       test = len(checksums) > 1
2132       if test:
2133         variants = ["variant %s on %s" %
2134                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2135                     for (idx, (checksum, nodes)) in
2136                       enumerate(sorted(checksums.items()))]
2137       else:
2138         variants = []
2139
2140       errorif(test, cls.ECLUSTERFILECHECK, None,
2141               "File %s found with %s different checksums (%s)",
2142               filename, len(checksums), "; ".join(variants))
2143
2144   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2145                       drbd_map):
2146     """Verifies and the node DRBD status.
2147
2148     @type ninfo: L{objects.Node}
2149     @param ninfo: the node to check
2150     @param nresult: the remote results for the node
2151     @param instanceinfo: the dict of instances
2152     @param drbd_helper: the configured DRBD usermode helper
2153     @param drbd_map: the DRBD map as returned by
2154         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2155
2156     """
2157     node = ninfo.name
2158     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2159
2160     if drbd_helper:
2161       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2162       test = (helper_result == None)
2163       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2164                "no drbd usermode helper returned")
2165       if helper_result:
2166         status, payload = helper_result
2167         test = not status
2168         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2169                  "drbd usermode helper check unsuccessful: %s", payload)
2170         test = status and (payload != drbd_helper)
2171         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2172                  "wrong drbd usermode helper: %s", payload)
2173
2174     # compute the DRBD minors
2175     node_drbd = {}
2176     for minor, instance in drbd_map[node].items():
2177       test = instance not in instanceinfo
2178       _ErrorIf(test, self.ECLUSTERCFG, None,
2179                "ghost instance '%s' in temporary DRBD map", instance)
2180         # ghost instance should not be running, but otherwise we
2181         # don't give double warnings (both ghost instance and
2182         # unallocated minor in use)
2183       if test:
2184         node_drbd[minor] = (instance, False)
2185       else:
2186         instance = instanceinfo[instance]
2187         node_drbd[minor] = (instance.name, instance.admin_up)
2188
2189     # and now check them
2190     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2191     test = not isinstance(used_minors, (tuple, list))
2192     _ErrorIf(test, self.ENODEDRBD, node,
2193              "cannot parse drbd status file: %s", str(used_minors))
2194     if test:
2195       # we cannot check drbd status
2196       return
2197
2198     for minor, (iname, must_exist) in node_drbd.items():
2199       test = minor not in used_minors and must_exist
2200       _ErrorIf(test, self.ENODEDRBD, node,
2201                "drbd minor %d of instance %s is not active", minor, iname)
2202     for minor in used_minors:
2203       test = minor not in node_drbd
2204       _ErrorIf(test, self.ENODEDRBD, node,
2205                "unallocated drbd minor %d is in use", minor)
2206
2207   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2208     """Builds the node OS structures.
2209
2210     @type ninfo: L{objects.Node}
2211     @param ninfo: the node to check
2212     @param nresult: the remote results for the node
2213     @param nimg: the node image object
2214
2215     """
2216     node = ninfo.name
2217     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2218
2219     remote_os = nresult.get(constants.NV_OSLIST, None)
2220     test = (not isinstance(remote_os, list) or
2221             not compat.all(isinstance(v, list) and len(v) == 7
2222                            for v in remote_os))
2223
2224     _ErrorIf(test, self.ENODEOS, node,
2225              "node hasn't returned valid OS data")
2226
2227     nimg.os_fail = test
2228
2229     if test:
2230       return
2231
2232     os_dict = {}
2233
2234     for (name, os_path, status, diagnose,
2235          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2236
2237       if name not in os_dict:
2238         os_dict[name] = []
2239
2240       # parameters is a list of lists instead of list of tuples due to
2241       # JSON lacking a real tuple type, fix it:
2242       parameters = [tuple(v) for v in parameters]
2243       os_dict[name].append((os_path, status, diagnose,
2244                             set(variants), set(parameters), set(api_ver)))
2245
2246     nimg.oslist = os_dict
2247
2248   def _VerifyNodeOS(self, ninfo, nimg, base):
2249     """Verifies the node OS list.
2250
2251     @type ninfo: L{objects.Node}
2252     @param ninfo: the node to check
2253     @param nimg: the node image object
2254     @param base: the 'template' node we match against (e.g. from the master)
2255
2256     """
2257     node = ninfo.name
2258     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2259
2260     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2261
2262     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2263     for os_name, os_data in nimg.oslist.items():
2264       assert os_data, "Empty OS status for OS %s?!" % os_name
2265       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2266       _ErrorIf(not f_status, self.ENODEOS, node,
2267                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2268       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2269                "OS '%s' has multiple entries (first one shadows the rest): %s",
2270                os_name, utils.CommaJoin([v[0] for v in os_data]))
2271       # comparisons with the 'base' image
2272       test = os_name not in base.oslist
2273       _ErrorIf(test, self.ENODEOS, node,
2274                "Extra OS %s not present on reference node (%s)",
2275                os_name, base.name)
2276       if test:
2277         continue
2278       assert base.oslist[os_name], "Base node has empty OS status?"
2279       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2280       if not b_status:
2281         # base OS is invalid, skipping
2282         continue
2283       for kind, a, b in [("API version", f_api, b_api),
2284                          ("variants list", f_var, b_var),
2285                          ("parameters", beautify_params(f_param),
2286                           beautify_params(b_param))]:
2287         _ErrorIf(a != b, self.ENODEOS, node,
2288                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2289                  kind, os_name, base.name,
2290                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2291
2292     # check any missing OSes
2293     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2294     _ErrorIf(missing, self.ENODEOS, node,
2295              "OSes present on reference node %s but missing on this node: %s",
2296              base.name, utils.CommaJoin(missing))
2297
2298   def _VerifyOob(self, ninfo, nresult):
2299     """Verifies out of band functionality of a node.
2300
2301     @type ninfo: L{objects.Node}
2302     @param ninfo: the node to check
2303     @param nresult: the remote results for the node
2304
2305     """
2306     node = ninfo.name
2307     # We just have to verify the paths on master and/or master candidates
2308     # as the oob helper is invoked on the master
2309     if ((ninfo.master_candidate or ninfo.master_capable) and
2310         constants.NV_OOB_PATHS in nresult):
2311       for path_result in nresult[constants.NV_OOB_PATHS]:
2312         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2313
2314   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2315     """Verifies and updates the node volume data.
2316
2317     This function will update a L{NodeImage}'s internal structures
2318     with data from the remote call.
2319
2320     @type ninfo: L{objects.Node}
2321     @param ninfo: the node to check
2322     @param nresult: the remote results for the node
2323     @param nimg: the node image object
2324     @param vg_name: the configured VG name
2325
2326     """
2327     node = ninfo.name
2328     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2329
2330     nimg.lvm_fail = True
2331     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2332     if vg_name is None:
2333       pass
2334     elif isinstance(lvdata, basestring):
2335       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2336                utils.SafeEncode(lvdata))
2337     elif not isinstance(lvdata, dict):
2338       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2339     else:
2340       nimg.volumes = lvdata
2341       nimg.lvm_fail = False
2342
2343   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2344     """Verifies and updates the node instance list.
2345
2346     If the listing was successful, then updates this node's instance
2347     list. Otherwise, it marks the RPC call as failed for the instance
2348     list key.
2349
2350     @type ninfo: L{objects.Node}
2351     @param ninfo: the node to check
2352     @param nresult: the remote results for the node
2353     @param nimg: the node image object
2354
2355     """
2356     idata = nresult.get(constants.NV_INSTANCELIST, None)
2357     test = not isinstance(idata, list)
2358     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2359                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2360     if test:
2361       nimg.hyp_fail = True
2362     else:
2363       nimg.instances = idata
2364
2365   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2366     """Verifies and computes a node information map
2367
2368     @type ninfo: L{objects.Node}
2369     @param ninfo: the node to check
2370     @param nresult: the remote results for the node
2371     @param nimg: the node image object
2372     @param vg_name: the configured VG name
2373
2374     """
2375     node = ninfo.name
2376     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2377
2378     # try to read free memory (from the hypervisor)
2379     hv_info = nresult.get(constants.NV_HVINFO, None)
2380     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2381     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2382     if not test:
2383       try:
2384         nimg.mfree = int(hv_info["memory_free"])
2385       except (ValueError, TypeError):
2386         _ErrorIf(True, self.ENODERPC, node,
2387                  "node returned invalid nodeinfo, check hypervisor")
2388
2389     # FIXME: devise a free space model for file based instances as well
2390     if vg_name is not None:
2391       test = (constants.NV_VGLIST not in nresult or
2392               vg_name not in nresult[constants.NV_VGLIST])
2393       _ErrorIf(test, self.ENODELVM, node,
2394                "node didn't return data for the volume group '%s'"
2395                " - it is either missing or broken", vg_name)
2396       if not test:
2397         try:
2398           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2399         except (ValueError, TypeError):
2400           _ErrorIf(True, self.ENODERPC, node,
2401                    "node returned invalid LVM info, check LVM status")
2402
2403   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2404     """Gets per-disk status information for all instances.
2405
2406     @type nodelist: list of strings
2407     @param nodelist: Node names
2408     @type node_image: dict of (name, L{objects.Node})
2409     @param node_image: Node objects
2410     @type instanceinfo: dict of (name, L{objects.Instance})
2411     @param instanceinfo: Instance objects
2412     @rtype: {instance: {node: [(succes, payload)]}}
2413     @return: a dictionary of per-instance dictionaries with nodes as
2414         keys and disk information as values; the disk information is a
2415         list of tuples (success, payload)
2416
2417     """
2418     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2419
2420     node_disks = {}
2421     node_disks_devonly = {}
2422     diskless_instances = set()
2423     diskless = constants.DT_DISKLESS
2424
2425     for nname in nodelist:
2426       node_instances = list(itertools.chain(node_image[nname].pinst,
2427                                             node_image[nname].sinst))
2428       diskless_instances.update(inst for inst in node_instances
2429                                 if instanceinfo[inst].disk_template == diskless)
2430       disks = [(inst, disk)
2431                for inst in node_instances
2432                for disk in instanceinfo[inst].disks]
2433
2434       if not disks:
2435         # No need to collect data
2436         continue
2437
2438       node_disks[nname] = disks
2439
2440       # Creating copies as SetDiskID below will modify the objects and that can
2441       # lead to incorrect data returned from nodes
2442       devonly = [dev.Copy() for (_, dev) in disks]
2443
2444       for dev in devonly:
2445         self.cfg.SetDiskID(dev, nname)
2446
2447       node_disks_devonly[nname] = devonly
2448
2449     assert len(node_disks) == len(node_disks_devonly)
2450
2451     # Collect data from all nodes with disks
2452     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2453                                                           node_disks_devonly)
2454
2455     assert len(result) == len(node_disks)
2456
2457     instdisk = {}
2458
2459     for (nname, nres) in result.items():
2460       disks = node_disks[nname]
2461
2462       if nres.offline:
2463         # No data from this node
2464         data = len(disks) * [(False, "node offline")]
2465       else:
2466         msg = nres.fail_msg
2467         _ErrorIf(msg, self.ENODERPC, nname,
2468                  "while getting disk information: %s", msg)
2469         if msg:
2470           # No data from this node
2471           data = len(disks) * [(False, msg)]
2472         else:
2473           data = []
2474           for idx, i in enumerate(nres.payload):
2475             if isinstance(i, (tuple, list)) and len(i) == 2:
2476               data.append(i)
2477             else:
2478               logging.warning("Invalid result from node %s, entry %d: %s",
2479                               nname, idx, i)
2480               data.append((False, "Invalid result from the remote node"))
2481
2482       for ((inst, _), status) in zip(disks, data):
2483         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2484
2485     # Add empty entries for diskless instances.
2486     for inst in diskless_instances:
2487       assert inst not in instdisk
2488       instdisk[inst] = {}
2489
2490     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2491                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2492                       compat.all(isinstance(s, (tuple, list)) and
2493                                  len(s) == 2 for s in statuses)
2494                       for inst, nnames in instdisk.items()
2495                       for nname, statuses in nnames.items())
2496     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2497
2498     return instdisk
2499
2500   def BuildHooksEnv(self):
2501     """Build hooks env.
2502
2503     Cluster-Verify hooks just ran in the post phase and their failure makes
2504     the output be logged in the verify output and the verification to fail.
2505
2506     """
2507     env = {
2508       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2509       }
2510
2511     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2512                for node in self.my_node_info.values())
2513
2514     return env
2515
2516   def BuildHooksNodes(self):
2517     """Build hooks nodes.
2518
2519     """
2520     return ([], self.my_node_names)
2521
2522   def Exec(self, feedback_fn):
2523     """Verify integrity of the node group, performing various test on nodes.
2524
2525     """
2526     # This method has too many local variables. pylint: disable-msg=R0914
2527
2528     if not self.my_node_names:
2529       # empty node group
2530       feedback_fn("* Empty node group, skipping verification")
2531       return True
2532
2533     self.bad = False
2534     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2535     verbose = self.op.verbose
2536     self._feedback_fn = feedback_fn
2537
2538     vg_name = self.cfg.GetVGName()
2539     drbd_helper = self.cfg.GetDRBDHelper()
2540     cluster = self.cfg.GetClusterInfo()
2541     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2542     hypervisors = cluster.enabled_hypervisors
2543     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2544
2545     i_non_redundant = [] # Non redundant instances
2546     i_non_a_balanced = [] # Non auto-balanced instances
2547     n_offline = 0 # Count of offline nodes
2548     n_drained = 0 # Count of nodes being drained
2549     node_vol_should = {}
2550
2551     # FIXME: verify OS list
2552
2553     # File verification
2554     filemap = _ComputeAncillaryFiles(cluster, False)
2555
2556     # do local checksums
2557     master_node = self.master_node = self.cfg.GetMasterNode()
2558     master_ip = self.cfg.GetMasterIP()
2559
2560     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2561
2562     # We will make nodes contact all nodes in their group, and one node from
2563     # every other group.
2564     # TODO: should it be a *random* node, different every time?
2565     online_nodes = [node.name for node in node_data_list if not node.offline]
2566     other_group_nodes = {}
2567
2568     for name in sorted(self.all_node_info):
2569       node = self.all_node_info[name]
2570       if (node.group not in other_group_nodes
2571           and node.group != self.group_uuid
2572           and not node.offline):
2573         other_group_nodes[node.group] = node.name
2574
2575     node_verify_param = {
2576       constants.NV_FILELIST:
2577         utils.UniqueSequence(filename
2578                              for files in filemap
2579                              for filename in files),
2580       constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2581       constants.NV_HYPERVISOR: hypervisors,
2582       constants.NV_HVPARAMS:
2583         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2584       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2585                                  for node in node_data_list
2586                                  if not node.offline],
2587       constants.NV_INSTANCELIST: hypervisors,
2588       constants.NV_VERSION: None,
2589       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2590       constants.NV_NODESETUP: None,
2591       constants.NV_TIME: None,
2592       constants.NV_MASTERIP: (master_node, master_ip),
2593       constants.NV_OSLIST: None,
2594       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2595       }
2596
2597     if vg_name is not None:
2598       node_verify_param[constants.NV_VGLIST] = None
2599       node_verify_param[constants.NV_LVLIST] = vg_name
2600       node_verify_param[constants.NV_PVLIST] = [vg_name]
2601       node_verify_param[constants.NV_DRBDLIST] = None
2602
2603     if drbd_helper:
2604       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2605
2606     # bridge checks
2607     # FIXME: this needs to be changed per node-group, not cluster-wide
2608     bridges = set()
2609     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2610     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2611       bridges.add(default_nicpp[constants.NIC_LINK])
2612     for instance in self.my_inst_info.values():
2613       for nic in instance.nics:
2614         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2615         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2616           bridges.add(full_nic[constants.NIC_LINK])
2617
2618     if bridges:
2619       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2620
2621     # Build our expected cluster state
2622     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2623                                                  name=node.name,
2624                                                  vm_capable=node.vm_capable))
2625                       for node in node_data_list)
2626
2627     # Gather OOB paths
2628     oob_paths = []
2629     for node in self.all_node_info.values():
2630       path = _SupportsOob(self.cfg, node)
2631       if path and path not in oob_paths:
2632         oob_paths.append(path)
2633
2634     if oob_paths:
2635       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2636
2637     for instance in self.my_inst_names:
2638       inst_config = self.my_inst_info[instance]
2639
2640       for nname in inst_config.all_nodes:
2641         if nname not in node_image:
2642           gnode = self.NodeImage(name=nname)
2643           gnode.ghost = (nname not in self.all_node_info)
2644           node_image[nname] = gnode
2645
2646       inst_config.MapLVsByNode(node_vol_should)
2647
2648       pnode = inst_config.primary_node
2649       node_image[pnode].pinst.append(instance)
2650
2651       for snode in inst_config.secondary_nodes:
2652         nimg = node_image[snode]
2653         nimg.sinst.append(instance)
2654         if pnode not in nimg.sbp:
2655           nimg.sbp[pnode] = []
2656         nimg.sbp[pnode].append(instance)
2657
2658     # At this point, we have the in-memory data structures complete,
2659     # except for the runtime information, which we'll gather next
2660
2661     # Due to the way our RPC system works, exact response times cannot be
2662     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2663     # time before and after executing the request, we can at least have a time
2664     # window.
2665     nvinfo_starttime = time.time()
2666     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2667                                            node_verify_param,
2668                                            self.cfg.GetClusterName())
2669     nvinfo_endtime = time.time()
2670
2671     if self.extra_lv_nodes and vg_name is not None:
2672       extra_lv_nvinfo = \
2673           self.rpc.call_node_verify(self.extra_lv_nodes,
2674                                     {constants.NV_LVLIST: vg_name},
2675                                     self.cfg.GetClusterName())
2676     else:
2677       extra_lv_nvinfo = {}
2678
2679     all_drbd_map = self.cfg.ComputeDRBDMap()
2680
2681     feedback_fn("* Gathering disk information (%s nodes)" %
2682                 len(self.my_node_names))
2683     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2684                                      self.my_inst_info)
2685
2686     feedback_fn("* Verifying configuration file consistency")
2687
2688     # If not all nodes are being checked, we need to make sure the master node
2689     # and a non-checked vm_capable node are in the list.
2690     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2691     if absent_nodes:
2692       vf_nvinfo = all_nvinfo.copy()
2693       vf_node_info = list(self.my_node_info.values())
2694       additional_nodes = []
2695       if master_node not in self.my_node_info:
2696         additional_nodes.append(master_node)
2697         vf_node_info.append(self.all_node_info[master_node])
2698       # Add the first vm_capable node we find which is not included
2699       for node in absent_nodes:
2700         nodeinfo = self.all_node_info[node]
2701         if nodeinfo.vm_capable and not nodeinfo.offline:
2702           additional_nodes.append(node)
2703           vf_node_info.append(self.all_node_info[node])
2704           break
2705       key = constants.NV_FILELIST
2706       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2707                                                  {key: node_verify_param[key]},
2708                                                  self.cfg.GetClusterName()))
2709     else:
2710       vf_nvinfo = all_nvinfo
2711       vf_node_info = self.my_node_info.values()
2712
2713     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2714
2715     feedback_fn("* Verifying node status")
2716
2717     refos_img = None
2718
2719     for node_i in node_data_list:
2720       node = node_i.name
2721       nimg = node_image[node]
2722
2723       if node_i.offline:
2724         if verbose:
2725           feedback_fn("* Skipping offline node %s" % (node,))
2726         n_offline += 1
2727         continue
2728
2729       if node == master_node:
2730         ntype = "master"
2731       elif node_i.master_candidate:
2732         ntype = "master candidate"
2733       elif node_i.drained:
2734         ntype = "drained"
2735         n_drained += 1
2736       else:
2737         ntype = "regular"
2738       if verbose:
2739         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2740
2741       msg = all_nvinfo[node].fail_msg
2742       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2743       if msg:
2744         nimg.rpc_fail = True
2745         continue
2746
2747       nresult = all_nvinfo[node].payload
2748
2749       nimg.call_ok = self._VerifyNode(node_i, nresult)
2750       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2751       self._VerifyNodeNetwork(node_i, nresult)
2752       self._VerifyOob(node_i, nresult)
2753
2754       if nimg.vm_capable:
2755         self._VerifyNodeLVM(node_i, nresult, vg_name)
2756         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2757                              all_drbd_map)
2758
2759         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2760         self._UpdateNodeInstances(node_i, nresult, nimg)
2761         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2762         self._UpdateNodeOS(node_i, nresult, nimg)
2763
2764         if not nimg.os_fail:
2765           if refos_img is None:
2766             refos_img = nimg
2767           self._VerifyNodeOS(node_i, nimg, refos_img)
2768         self._VerifyNodeBridges(node_i, nresult, bridges)
2769
2770         # Check whether all running instancies are primary for the node. (This
2771         # can no longer be done from _VerifyInstance below, since some of the
2772         # wrong instances could be from other node groups.)
2773         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2774
2775         for inst in non_primary_inst:
2776           test = inst in self.all_inst_info
2777           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2778                    "instance should not run on node %s", node_i.name)
2779           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2780                    "node is running unknown instance %s", inst)
2781
2782     for node, result in extra_lv_nvinfo.items():
2783       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2784                               node_image[node], vg_name)
2785
2786     feedback_fn("* Verifying instance status")
2787     for instance in self.my_inst_names:
2788       if verbose:
2789         feedback_fn("* Verifying instance %s" % instance)
2790       inst_config = self.my_inst_info[instance]
2791       self._VerifyInstance(instance, inst_config, node_image,
2792                            instdisk[instance])
2793       inst_nodes_offline = []
2794
2795       pnode = inst_config.primary_node
2796       pnode_img = node_image[pnode]
2797       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2798                self.ENODERPC, pnode, "instance %s, connection to"
2799                " primary node failed", instance)
2800
2801       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2802                self.EINSTANCEBADNODE, instance,
2803                "instance is marked as running and lives on offline node %s",
2804                inst_config.primary_node)
2805
2806       # If the instance is non-redundant we cannot survive losing its primary
2807       # node, so we are not N+1 compliant. On the other hand we have no disk
2808       # templates with more than one secondary so that situation is not well
2809       # supported either.
2810       # FIXME: does not support file-backed instances
2811       if not inst_config.secondary_nodes:
2812         i_non_redundant.append(instance)
2813
2814       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2815                instance, "instance has multiple secondary nodes: %s",
2816                utils.CommaJoin(inst_config.secondary_nodes),
2817                code=self.ETYPE_WARNING)
2818
2819       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2820         pnode = inst_config.primary_node
2821         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2822         instance_groups = {}
2823
2824         for node in instance_nodes:
2825           instance_groups.setdefault(self.all_node_info[node].group,
2826                                      []).append(node)
2827
2828         pretty_list = [
2829           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2830           # Sort so that we always list the primary node first.
2831           for group, nodes in sorted(instance_groups.items(),
2832                                      key=lambda (_, nodes): pnode in nodes,
2833                                      reverse=True)]
2834
2835         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2836                       instance, "instance has primary and secondary nodes in"
2837                       " different groups: %s", utils.CommaJoin(pretty_list),
2838                       code=self.ETYPE_WARNING)
2839
2840       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2841         i_non_a_balanced.append(instance)
2842
2843       for snode in inst_config.secondary_nodes:
2844         s_img = node_image[snode]
2845         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2846                  "instance %s, connection to secondary node failed", instance)
2847
2848         if s_img.offline:
2849           inst_nodes_offline.append(snode)
2850
2851       # warn that the instance lives on offline nodes
2852       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2853                "instance has offline secondary node(s) %s",
2854                utils.CommaJoin(inst_nodes_offline))
2855       # ... or ghost/non-vm_capable nodes
2856       for node in inst_config.all_nodes:
2857         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2858                  "instance lives on ghost node %s", node)
2859         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2860                  instance, "instance lives on non-vm_capable node %s", node)
2861
2862     feedback_fn("* Verifying orphan volumes")
2863     reserved = utils.FieldSet(*cluster.reserved_lvs)
2864
2865     # We will get spurious "unknown volume" warnings if any node of this group
2866     # is secondary for an instance whose primary is in another group. To avoid
2867     # them, we find these instances and add their volumes to node_vol_should.
2868     for inst in self.all_inst_info.values():
2869       for secondary in inst.secondary_nodes:
2870         if (secondary in self.my_node_info
2871             and inst.name not in self.my_inst_info):
2872           inst.MapLVsByNode(node_vol_should)
2873           break
2874
2875     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2876
2877     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2878       feedback_fn("* Verifying N+1 Memory redundancy")
2879       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2880
2881     feedback_fn("* Other Notes")
2882     if i_non_redundant:
2883       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2884                   % len(i_non_redundant))
2885
2886     if i_non_a_balanced:
2887       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2888                   % len(i_non_a_balanced))
2889
2890     if n_offline:
2891       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2892
2893     if n_drained:
2894       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2895
2896     return not self.bad
2897
2898   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2899     """Analyze the post-hooks' result
2900
2901     This method analyses the hook result, handles it, and sends some
2902     nicely-formatted feedback back to the user.
2903
2904     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2905         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2906     @param hooks_results: the results of the multi-node hooks rpc call
2907     @param feedback_fn: function used send feedback back to the caller
2908     @param lu_result: previous Exec result
2909     @return: the new Exec result, based on the previous result
2910         and hook results
2911
2912     """
2913     # We only really run POST phase hooks, only for non-empty groups,
2914     # and are only interested in their results
2915     if not self.my_node_names:
2916       # empty node group
2917       pass
2918     elif phase == constants.HOOKS_PHASE_POST:
2919       # Used to change hooks' output to proper indentation
2920       feedback_fn("* Hooks Results")
2921       assert hooks_results, "invalid result from hooks"
2922
2923       for node_name in hooks_results:
2924         res = hooks_results[node_name]
2925         msg = res.fail_msg
2926         test = msg and not res.offline
2927         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2928                       "Communication failure in hooks execution: %s", msg)
2929         if res.offline or msg:
2930           # No need to investigate payload if node is offline or gave an error.
2931           # override manually lu_result here as _ErrorIf only
2932           # overrides self.bad
2933           lu_result = 1
2934           continue
2935         for script, hkr, output in res.payload:
2936           test = hkr == constants.HKR_FAIL
2937           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2938                         "Script %s failed, output:", script)
2939           if test:
2940             output = self._HOOKS_INDENT_RE.sub("      ", output)
2941             feedback_fn("%s" % output)
2942             lu_result = 0
2943
2944     return lu_result
2945
2946
2947 class LUClusterVerifyDisks(NoHooksLU):
2948   """Verifies the cluster disks status.
2949
2950   """
2951   REQ_BGL = False
2952
2953   def ExpandNames(self):
2954     self.share_locks = _ShareAll()
2955     self.needed_locks = {
2956       locking.LEVEL_NODEGROUP: locking.ALL_SET,
2957       }
2958
2959   def Exec(self, feedback_fn):
2960     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
2961
2962     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2963     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2964                            for group in group_names])
2965
2966
2967 class LUGroupVerifyDisks(NoHooksLU):
2968   """Verifies the status of all disks in a node group.
2969
2970   """
2971   REQ_BGL = False
2972
2973   def ExpandNames(self):
2974     # Raises errors.OpPrereqError on its own if group can't be found
2975     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2976
2977     self.share_locks = _ShareAll()
2978     self.needed_locks = {
2979       locking.LEVEL_INSTANCE: [],
2980       locking.LEVEL_NODEGROUP: [],
2981       locking.LEVEL_NODE: [],
2982       }
2983
2984   def DeclareLocks(self, level):
2985     if level == locking.LEVEL_INSTANCE:
2986       assert not self.needed_locks[locking.LEVEL_INSTANCE]
2987
2988       # Lock instances optimistically, needs verification once node and group
2989       # locks have been acquired
2990       self.needed_locks[locking.LEVEL_INSTANCE] = \
2991         self.cfg.GetNodeGroupInstances(self.group_uuid)
2992
2993     elif level == locking.LEVEL_NODEGROUP:
2994       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2995
2996       self.needed_locks[locking.LEVEL_NODEGROUP] = \
2997         set([self.group_uuid] +
2998             # Lock all groups used by instances optimistically; this requires
2999             # going via the node before it's locked, requiring verification
3000             # later on
3001             [group_uuid
3002              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3003              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3004
3005     elif level == locking.LEVEL_NODE:
3006       # This will only lock the nodes in the group to be verified which contain
3007       # actual instances
3008       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3009       self._LockInstancesNodes()
3010
3011       # Lock all nodes in group to be verified
3012       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3013       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3014       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3015
3016   def CheckPrereq(self):
3017     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3018     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3019     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3020
3021     assert self.group_uuid in owned_groups
3022
3023     # Check if locked instances are still correct
3024     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3025
3026     # Get instance information
3027     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3028
3029     # Check if node groups for locked instances are still correct
3030     for (instance_name, inst) in self.instances.items():
3031       assert owned_nodes.issuperset(inst.all_nodes), \
3032         "Instance %s's nodes changed while we kept the lock" % instance_name
3033
3034       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3035                                              owned_groups)
3036
3037       assert self.group_uuid in inst_groups, \
3038         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3039
3040   def Exec(self, feedback_fn):
3041     """Verify integrity of cluster disks.
3042
3043     @rtype: tuple of three items
3044     @return: a tuple of (dict of node-to-node_error, list of instances
3045         which need activate-disks, dict of instance: (node, volume) for
3046         missing volumes
3047
3048     """
3049     res_nodes = {}
3050     res_instances = set()
3051     res_missing = {}
3052
3053     nv_dict = _MapInstanceDisksToNodes([inst
3054                                         for inst in self.instances.values()
3055                                         if inst.admin_up])
3056
3057     if nv_dict:
3058       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3059                              set(self.cfg.GetVmCapableNodeList()))
3060
3061       node_lvs = self.rpc.call_lv_list(nodes, [])
3062
3063       for (node, node_res) in node_lvs.items():
3064         if node_res.offline:
3065           continue
3066
3067         msg = node_res.fail_msg
3068         if msg:
3069           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3070           res_nodes[node] = msg
3071           continue
3072
3073         for lv_name, (_, _, lv_online) in node_res.payload.items():
3074           inst = nv_dict.pop((node, lv_name), None)
3075           if not (lv_online or inst is None):
3076             res_instances.add(inst)
3077
3078       # any leftover items in nv_dict are missing LVs, let's arrange the data
3079       # better
3080       for key, inst in nv_dict.iteritems():
3081         res_missing.setdefault(inst, []).append(key)
3082
3083     return (res_nodes, list(res_instances), res_missing)
3084
3085
3086 class LUClusterRepairDiskSizes(NoHooksLU):
3087   """Verifies the cluster disks sizes.
3088
3089   """
3090   REQ_BGL = False
3091
3092   def ExpandNames(self):
3093     if self.op.instances:
3094       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3095       self.needed_locks = {
3096         locking.LEVEL_NODE: [],
3097         locking.LEVEL_INSTANCE: self.wanted_names,
3098         }
3099       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3100     else:
3101       self.wanted_names = None
3102       self.needed_locks = {
3103         locking.LEVEL_NODE: locking.ALL_SET,
3104         locking.LEVEL_INSTANCE: locking.ALL_SET,
3105         }
3106     self.share_locks = _ShareAll()
3107
3108   def DeclareLocks(self, level):
3109     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3110       self._LockInstancesNodes(primary_only=True)
3111
3112   def CheckPrereq(self):
3113     """Check prerequisites.
3114
3115     This only checks the optional instance list against the existing names.
3116
3117     """
3118     if self.wanted_names is None:
3119       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3120
3121     self.wanted_instances = \
3122         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3123
3124   def _EnsureChildSizes(self, disk):
3125     """Ensure children of the disk have the needed disk size.
3126
3127     This is valid mainly for DRBD8 and fixes an issue where the
3128     children have smaller disk size.
3129
3130     @param disk: an L{ganeti.objects.Disk} object
3131
3132     """
3133     if disk.dev_type == constants.LD_DRBD8:
3134       assert disk.children, "Empty children for DRBD8?"
3135       fchild = disk.children[0]
3136       mismatch = fchild.size < disk.size
3137       if mismatch:
3138         self.LogInfo("Child disk has size %d, parent %d, fixing",
3139                      fchild.size, disk.size)
3140         fchild.size = disk.size
3141
3142       # and we recurse on this child only, not on the metadev
3143       return self._EnsureChildSizes(fchild) or mismatch
3144     else:
3145       return False
3146
3147   def Exec(self, feedback_fn):
3148     """Verify the size of cluster disks.
3149
3150     """
3151     # TODO: check child disks too
3152     # TODO: check differences in size between primary/secondary nodes
3153     per_node_disks = {}
3154     for instance in self.wanted_instances:
3155       pnode = instance.primary_node
3156       if pnode not in per_node_disks:
3157         per_node_disks[pnode] = []
3158       for idx, disk in enumerate(instance.disks):
3159         per_node_disks[pnode].append((instance, idx, disk))
3160
3161     changed = []
3162     for node, dskl in per_node_disks.items():
3163       newl = [v[2].Copy() for v in dskl]
3164       for dsk in newl:
3165         self.cfg.SetDiskID(dsk, node)
3166       result = self.rpc.call_blockdev_getsize(node, newl)
3167       if result.fail_msg:
3168         self.LogWarning("Failure in blockdev_getsize call to node"
3169                         " %s, ignoring", node)
3170         continue
3171       if len(result.payload) != len(dskl):
3172         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3173                         " result.payload=%s", node, len(dskl), result.payload)
3174         self.LogWarning("Invalid result from node %s, ignoring node results",
3175                         node)
3176         continue
3177       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3178         if size is None:
3179           self.LogWarning("Disk %d of instance %s did not return size"
3180                           " information, ignoring", idx, instance.name)
3181           continue
3182         if not isinstance(size, (int, long)):
3183           self.LogWarning("Disk %d of instance %s did not return valid"
3184                           " size information, ignoring", idx, instance.name)
3185           continue
3186         size = size >> 20
3187         if size != disk.size:
3188           self.LogInfo("Disk %d of instance %s has mismatched size,"
3189                        " correcting: recorded %d, actual %d", idx,
3190                        instance.name, disk.size, size)
3191           disk.size = size
3192           self.cfg.Update(instance, feedback_fn)
3193           changed.append((instance.name, idx, size))
3194         if self._EnsureChildSizes(disk):
3195           self.cfg.Update(instance, feedback_fn)
3196           changed.append((instance.name, idx, disk.size))
3197     return changed
3198
3199
3200 class LUClusterRename(LogicalUnit):
3201   """Rename the cluster.
3202
3203   """
3204   HPATH = "cluster-rename"
3205   HTYPE = constants.HTYPE_CLUSTER
3206
3207   def BuildHooksEnv(self):
3208     """Build hooks env.
3209
3210     """
3211     return {
3212       "OP_TARGET": self.cfg.GetClusterName(),
3213       "NEW_NAME": self.op.name,
3214       }
3215
3216   def BuildHooksNodes(self):
3217     """Build hooks nodes.
3218
3219     """
3220     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3221
3222   def CheckPrereq(self):
3223     """Verify that the passed name is a valid one.
3224
3225     """
3226     hostname = netutils.GetHostname(name=self.op.name,
3227                                     family=self.cfg.GetPrimaryIPFamily())
3228
3229     new_name = hostname.name
3230     self.ip = new_ip = hostname.ip
3231     old_name = self.cfg.GetClusterName()
3232     old_ip = self.cfg.GetMasterIP()
3233     if new_name == old_name and new_ip == old_ip:
3234       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3235                                  " cluster has changed",
3236                                  errors.ECODE_INVAL)
3237     if new_ip != old_ip:
3238       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3239         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3240                                    " reachable on the network" %
3241                                    new_ip, errors.ECODE_NOTUNIQUE)
3242
3243     self.op.name = new_name
3244
3245   def Exec(self, feedback_fn):
3246     """Rename the cluster.
3247
3248     """
3249     clustername = self.op.name
3250     ip = self.ip
3251
3252     # shutdown the master IP
3253     master = self.cfg.GetMasterNode()
3254     result = self.rpc.call_node_stop_master(master, False)
3255     result.Raise("Could not disable the master role")
3256
3257     try:
3258       cluster = self.cfg.GetClusterInfo()
3259       cluster.cluster_name = clustername
3260       cluster.master_ip = ip
3261       self.cfg.Update(cluster, feedback_fn)
3262
3263       # update the known hosts file
3264       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3265       node_list = self.cfg.GetOnlineNodeList()
3266       try:
3267         node_list.remove(master)
3268       except ValueError:
3269         pass
3270       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3271     finally:
3272       result = self.rpc.call_node_start_master(master, False, False)
3273       msg = result.fail_msg
3274       if msg:
3275         self.LogWarning("Could not re-enable the master role on"
3276                         " the master, please restart manually: %s", msg)
3277
3278     return clustername
3279
3280
3281 class LUClusterSetParams(LogicalUnit):
3282   """Change the parameters of the cluster.
3283
3284   """
3285   HPATH = "cluster-modify"
3286   HTYPE = constants.HTYPE_CLUSTER
3287   REQ_BGL = False
3288
3289   def CheckArguments(self):
3290     """Check parameters
3291
3292     """
3293     if self.op.uid_pool:
3294       uidpool.CheckUidPool(self.op.uid_pool)
3295
3296     if self.op.add_uids:
3297       uidpool.CheckUidPool(self.op.add_uids)
3298
3299     if self.op.remove_uids:
3300       uidpool.CheckUidPool(self.op.remove_uids)
3301
3302   def ExpandNames(self):
3303     # FIXME: in the future maybe other cluster params won't require checking on
3304     # all nodes to be modified.
3305     self.needed_locks = {
3306       locking.LEVEL_NODE: locking.ALL_SET,
3307     }
3308     self.share_locks[locking.LEVEL_NODE] = 1
3309
3310   def BuildHooksEnv(self):
3311     """Build hooks env.
3312
3313     """
3314     return {
3315       "OP_TARGET": self.cfg.GetClusterName(),
3316       "NEW_VG_NAME": self.op.vg_name,
3317       }
3318
3319   def BuildHooksNodes(self):
3320     """Build hooks nodes.
3321
3322     """
3323     mn = self.cfg.GetMasterNode()
3324     return ([mn], [mn])
3325
3326   def CheckPrereq(self):
3327     """Check prerequisites.
3328
3329     This checks whether the given params don't conflict and
3330     if the given volume group is valid.
3331
3332     """
3333     if self.op.vg_name is not None and not self.op.vg_name:
3334       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3335         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3336                                    " instances exist", errors.ECODE_INVAL)
3337
3338     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3339       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3340         raise errors.OpPrereqError("Cannot disable drbd helper while"
3341                                    " drbd-based instances exist",
3342                                    errors.ECODE_INVAL)
3343
3344     node_list = self.owned_locks(locking.LEVEL_NODE)
3345
3346     # if vg_name not None, checks given volume group on all nodes
3347     if self.op.vg_name:
3348       vglist = self.rpc.call_vg_list(node_list)
3349       for node in node_list:
3350         msg = vglist[node].fail_msg
3351         if msg:
3352           # ignoring down node
3353           self.LogWarning("Error while gathering data on node %s"
3354                           " (ignoring node): %s", node, msg)
3355           continue
3356         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3357                                               self.op.vg_name,
3358                                               constants.MIN_VG_SIZE)
3359         if vgstatus:
3360           raise errors.OpPrereqError("Error on node '%s': %s" %
3361                                      (node, vgstatus), errors.ECODE_ENVIRON)
3362
3363     if self.op.drbd_helper:
3364       # checks given drbd helper on all nodes
3365       helpers = self.rpc.call_drbd_helper(node_list)
3366       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3367         if ninfo.offline:
3368           self.LogInfo("Not checking drbd helper on offline node %s", node)
3369           continue
3370         msg = helpers[node].fail_msg
3371         if msg:
3372           raise errors.OpPrereqError("Error checking drbd helper on node"
3373                                      " '%s': %s" % (node, msg),
3374                                      errors.ECODE_ENVIRON)
3375         node_helper = helpers[node].payload
3376         if node_helper != self.op.drbd_helper:
3377           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3378                                      (node, node_helper), errors.ECODE_ENVIRON)
3379
3380     self.cluster = cluster = self.cfg.GetClusterInfo()
3381     # validate params changes
3382     if self.op.beparams:
3383       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3384       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3385
3386     if self.op.ndparams:
3387       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3388       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3389
3390       # TODO: we need a more general way to handle resetting
3391       # cluster-level parameters to default values
3392       if self.new_ndparams["oob_program"] == "":
3393         self.new_ndparams["oob_program"] = \
3394             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3395
3396     if self.op.nicparams:
3397       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3398       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3399       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3400       nic_errors = []
3401
3402       # check all instances for consistency
3403       for instance in self.cfg.GetAllInstancesInfo().values():
3404         for nic_idx, nic in enumerate(instance.nics):
3405           params_copy = copy.deepcopy(nic.nicparams)
3406           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3407
3408           # check parameter syntax
3409           try:
3410             objects.NIC.CheckParameterSyntax(params_filled)
3411           except errors.ConfigurationError, err:
3412             nic_errors.append("Instance %s, nic/%d: %s" %
3413                               (instance.name, nic_idx, err))
3414
3415           # if we're moving instances to routed, check that they have an ip
3416           target_mode = params_filled[constants.NIC_MODE]
3417           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3418             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3419                               " address" % (instance.name, nic_idx))
3420       if nic_errors:
3421         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3422                                    "\n".join(nic_errors))
3423
3424     # hypervisor list/parameters
3425     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3426     if self.op.hvparams:
3427       for hv_name, hv_dict in self.op.hvparams.items():
3428         if hv_name not in self.new_hvparams:
3429           self.new_hvparams[hv_name] = hv_dict
3430         else:
3431           self.new_hvparams[hv_name].update(hv_dict)
3432
3433     # os hypervisor parameters
3434     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3435     if self.op.os_hvp:
3436       for os_name, hvs in self.op.os_hvp.items():
3437         if os_name not in self.new_os_hvp:
3438           self.new_os_hvp[os_name] = hvs
3439         else:
3440           for hv_name, hv_dict in hvs.items():
3441             if hv_name not in self.new_os_hvp[os_name]:
3442               self.new_os_hvp[os_name][hv_name] = hv_dict
3443             else:
3444               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3445
3446     # os parameters
3447     self.new_osp = objects.FillDict(cluster.osparams, {})
3448     if self.op.osparams:
3449       for os_name, osp in self.op.osparams.items():
3450         if os_name not in self.new_osp:
3451           self.new_osp[os_name] = {}
3452
3453         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3454                                                   use_none=True)
3455
3456         if not self.new_osp[os_name]:
3457           # we removed all parameters
3458           del self.new_osp[os_name]
3459         else:
3460           # check the parameter validity (remote check)
3461           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3462                          os_name, self.new_osp[os_name])
3463
3464     # changes to the hypervisor list
3465     if self.op.enabled_hypervisors is not None:
3466       self.hv_list = self.op.enabled_hypervisors
3467       for hv in self.hv_list:
3468         # if the hypervisor doesn't already exist in the cluster
3469         # hvparams, we initialize it to empty, and then (in both
3470         # cases) we make sure to fill the defaults, as we might not
3471         # have a complete defaults list if the hypervisor wasn't
3472         # enabled before
3473         if hv not in new_hvp:
3474           new_hvp[hv] = {}
3475         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3476         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3477     else:
3478       self.hv_list = cluster.enabled_hypervisors
3479
3480     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3481       # either the enabled list has changed, or the parameters have, validate
3482       for hv_name, hv_params in self.new_hvparams.items():
3483         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3484             (self.op.enabled_hypervisors and
3485              hv_name in self.op.enabled_hypervisors)):
3486           # either this is a new hypervisor, or its parameters have changed
3487           hv_class = hypervisor.GetHypervisor(hv_name)
3488           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3489           hv_class.CheckParameterSyntax(hv_params)
3490           _CheckHVParams(self, node_list, hv_name, hv_params)
3491
3492     if self.op.os_hvp:
3493       # no need to check any newly-enabled hypervisors, since the
3494       # defaults have already been checked in the above code-block
3495       for os_name, os_hvp in self.new_os_hvp.items():
3496         for hv_name, hv_params in os_hvp.items():
3497           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3498           # we need to fill in the new os_hvp on top of the actual hv_p
3499           cluster_defaults = self.new_hvparams.get(hv_name, {})
3500           new_osp = objects.FillDict(cluster_defaults, hv_params)
3501           hv_class = hypervisor.GetHypervisor(hv_name)
3502           hv_class.CheckParameterSyntax(new_osp)
3503           _CheckHVParams(self, node_list, hv_name, new_osp)
3504
3505     if self.op.default_iallocator:
3506       alloc_script = utils.FindFile(self.op.default_iallocator,
3507                                     constants.IALLOCATOR_SEARCH_PATH,
3508                                     os.path.isfile)
3509       if alloc_script is None:
3510         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3511                                    " specified" % self.op.default_iallocator,
3512                                    errors.ECODE_INVAL)
3513
3514   def Exec(self, feedback_fn):
3515     """Change the parameters of the cluster.
3516
3517     """
3518     if self.op.vg_name is not None:
3519       new_volume = self.op.vg_name
3520       if not new_volume:
3521         new_volume = None
3522       if new_volume != self.cfg.GetVGName():
3523         self.cfg.SetVGName(new_volume)
3524       else:
3525         feedback_fn("Cluster LVM configuration already in desired"
3526                     " state, not changing")
3527     if self.op.drbd_helper is not None:
3528       new_helper = self.op.drbd_helper
3529       if not new_helper:
3530         new_helper = None
3531       if new_helper != self.cfg.GetDRBDHelper():
3532         self.cfg.SetDRBDHelper(new_helper)
3533       else:
3534         feedback_fn("Cluster DRBD helper already in desired state,"
3535                     " not changing")
3536     if self.op.hvparams:
3537       self.cluster.hvparams = self.new_hvparams
3538     if self.op.os_hvp:
3539       self.cluster.os_hvp = self.new_os_hvp
3540     if self.op.enabled_hypervisors is not None:
3541       self.cluster.hvparams = self.new_hvparams
3542       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3543     if self.op.beparams:
3544       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3545     if self.op.nicparams:
3546       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3547     if self.op.osparams:
3548       self.cluster.osparams = self.new_osp
3549     if self.op.ndparams:
3550       self.cluster.ndparams = self.new_ndparams
3551
3552     if self.op.candidate_pool_size is not None:
3553       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3554       # we need to update the pool size here, otherwise the save will fail
3555       _AdjustCandidatePool(self, [])
3556
3557     if self.op.maintain_node_health is not None:
3558       self.cluster.maintain_node_health = self.op.maintain_node_health
3559
3560     if self.op.prealloc_wipe_disks is not None:
3561       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3562
3563     if self.op.add_uids is not None:
3564       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3565
3566     if self.op.remove_uids is not None:
3567       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3568
3569     if self.op.uid_pool is not None:
3570       self.cluster.uid_pool = self.op.uid_pool
3571
3572     if self.op.default_iallocator is not None:
3573       self.cluster.default_iallocator = self.op.default_iallocator
3574
3575     if self.op.reserved_lvs is not None:
3576       self.cluster.reserved_lvs = self.op.reserved_lvs
3577
3578     def helper_os(aname, mods, desc):
3579       desc += " OS list"
3580       lst = getattr(self.cluster, aname)
3581       for key, val in mods:
3582         if key == constants.DDM_ADD:
3583           if val in lst:
3584             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3585           else:
3586             lst.append(val)
3587         elif key == constants.DDM_REMOVE:
3588           if val in lst:
3589             lst.remove(val)
3590           else:
3591             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3592         else:
3593           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3594
3595     if self.op.hidden_os:
3596       helper_os("hidden_os", self.op.hidden_os, "hidden")
3597
3598     if self.op.blacklisted_os:
3599       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3600
3601     if self.op.master_netdev:
3602       master = self.cfg.GetMasterNode()
3603       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3604                   self.cluster.master_netdev)
3605       result = self.rpc.call_node_stop_master(master, False)
3606       result.Raise("Could not disable the master ip")
3607       feedback_fn("Changing master_netdev from %s to %s" %
3608                   (self.cluster.master_netdev, self.op.master_netdev))
3609       self.cluster.master_netdev = self.op.master_netdev
3610
3611     self.cfg.Update(self.cluster, feedback_fn)
3612
3613     if self.op.master_netdev:
3614       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3615                   self.op.master_netdev)
3616       result = self.rpc.call_node_start_master(master, False, False)
3617       if result.fail_msg:
3618         self.LogWarning("Could not re-enable the master ip on"
3619                         " the master, please restart manually: %s",
3620                         result.fail_msg)
3621
3622
3623 def _UploadHelper(lu, nodes, fname):
3624   """Helper for uploading a file and showing warnings.
3625
3626   """
3627   if os.path.exists(fname):
3628     result = lu.rpc.call_upload_file(nodes, fname)
3629     for to_node, to_result in result.items():
3630       msg = to_result.fail_msg
3631       if msg:
3632         msg = ("Copy of file %s to node %s failed: %s" %
3633                (fname, to_node, msg))
3634         lu.proc.LogWarning(msg)
3635
3636
3637 def _ComputeAncillaryFiles(cluster, redist):
3638   """Compute files external to Ganeti which need to be consistent.
3639
3640   @type redist: boolean
3641   @param redist: Whether to include files which need to be redistributed
3642
3643   """
3644   # Compute files for all nodes
3645   files_all = set([
3646     constants.SSH_KNOWN_HOSTS_FILE,
3647     constants.CONFD_HMAC_KEY,
3648     constants.CLUSTER_DOMAIN_SECRET_FILE,
3649     ])
3650
3651   if not redist:
3652     files_all.update(constants.ALL_CERT_FILES)
3653     files_all.update(ssconf.SimpleStore().GetFileList())
3654
3655   if cluster.modify_etc_hosts:
3656     files_all.add(constants.ETC_HOSTS)
3657
3658   # Files which must either exist on all nodes or on none
3659   files_all_opt = set([
3660     constants.RAPI_USERS_FILE,
3661     ])
3662
3663   # Files which should only be on master candidates
3664   files_mc = set()
3665   if not redist:
3666     files_mc.add(constants.CLUSTER_CONF_FILE)
3667
3668   # Files which should only be on VM-capable nodes
3669   files_vm = set(filename
3670     for hv_name in cluster.enabled_hypervisors
3671     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3672
3673   # Filenames must be unique
3674   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3675           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3676          "Found file listed in more than one file list"
3677
3678   return (files_all, files_all_opt, files_mc, files_vm)
3679
3680
3681 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3682   """Distribute additional files which are part of the cluster configuration.
3683
3684   ConfigWriter takes care of distributing the config and ssconf files, but
3685   there are more files which should be distributed to all nodes. This function
3686   makes sure those are copied.
3687
3688   @param lu: calling logical unit
3689   @param additional_nodes: list of nodes not in the config to distribute to
3690   @type additional_vm: boolean
3691   @param additional_vm: whether the additional nodes are vm-capable or not
3692
3693   """
3694   # Gather target nodes
3695   cluster = lu.cfg.GetClusterInfo()
3696   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3697
3698   online_nodes = lu.cfg.GetOnlineNodeList()
3699   vm_nodes = lu.cfg.GetVmCapableNodeList()
3700
3701   if additional_nodes is not None:
3702     online_nodes.extend(additional_nodes)
3703     if additional_vm:
3704       vm_nodes.extend(additional_nodes)
3705
3706   # Never distribute to master node
3707   for nodelist in [online_nodes, vm_nodes]:
3708     if master_info.name in nodelist:
3709       nodelist.remove(master_info.name)
3710
3711   # Gather file lists
3712   (files_all, files_all_opt, files_mc, files_vm) = \
3713     _ComputeAncillaryFiles(cluster, True)
3714
3715   # Never re-distribute configuration file from here
3716   assert not (constants.CLUSTER_CONF_FILE in files_all or
3717               constants.CLUSTER_CONF_FILE in files_vm)
3718   assert not files_mc, "Master candidates not handled in this function"
3719
3720   filemap = [
3721     (online_nodes, files_all),
3722     (online_nodes, files_all_opt),
3723     (vm_nodes, files_vm),
3724     ]
3725
3726   # Upload the files
3727   for (node_list, files) in filemap:
3728     for fname in files:
3729       _UploadHelper(lu, node_list, fname)
3730
3731
3732 class LUClusterRedistConf(NoHooksLU):
3733   """Force the redistribution of cluster configuration.
3734
3735   This is a very simple LU.
3736
3737   """
3738   REQ_BGL = False
3739
3740   def ExpandNames(self):
3741     self.needed_locks = {
3742       locking.LEVEL_NODE: locking.ALL_SET,
3743     }
3744     self.share_locks[locking.LEVEL_NODE] = 1
3745
3746   def Exec(self, feedback_fn):
3747     """Redistribute the configuration.
3748
3749     """
3750     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3751     _RedistributeAncillaryFiles(self)
3752
3753
3754 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3755   """Sleep and poll for an instance's disk to sync.
3756
3757   """
3758   if not instance.disks or disks is not None and not disks:
3759     return True
3760
3761   disks = _ExpandCheckDisks(instance, disks)
3762
3763   if not oneshot:
3764     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3765
3766   node = instance.primary_node
3767
3768   for dev in disks:
3769     lu.cfg.SetDiskID(dev, node)
3770
3771   # TODO: Convert to utils.Retry
3772
3773   retries = 0
3774   degr_retries = 10 # in seconds, as we sleep 1 second each time
3775   while True:
3776     max_time = 0
3777     done = True
3778     cumul_degraded = False
3779     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3780     msg = rstats.fail_msg
3781     if msg:
3782       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3783       retries += 1
3784       if retries >= 10:
3785         raise errors.RemoteError("Can't contact node %s for mirror data,"
3786                                  " aborting." % node)
3787       time.sleep(6)
3788       continue
3789     rstats = rstats.payload
3790     retries = 0
3791     for i, mstat in enumerate(rstats):
3792       if mstat is None:
3793         lu.LogWarning("Can't compute data for node %s/%s",
3794                            node, disks[i].iv_name)
3795         continue
3796
3797       cumul_degraded = (cumul_degraded or
3798                         (mstat.is_degraded and mstat.sync_percent is None))
3799       if mstat.sync_percent is not None:
3800         done = False
3801         if mstat.estimated_time is not None:
3802           rem_time = ("%s remaining (estimated)" %
3803                       utils.FormatSeconds(mstat.estimated_time))
3804           max_time = mstat.estimated_time
3805         else:
3806           rem_time = "no time estimate"
3807         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3808                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3809
3810     # if we're done but degraded, let's do a few small retries, to
3811     # make sure we see a stable and not transient situation; therefore
3812     # we force restart of the loop
3813     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3814       logging.info("Degraded disks found, %d retries left", degr_retries)
3815       degr_retries -= 1
3816       time.sleep(1)
3817       continue
3818
3819     if done or oneshot:
3820       break
3821
3822     time.sleep(min(60, max_time))
3823
3824   if done:
3825     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3826   return not cumul_degraded
3827
3828
3829 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3830   """Check that mirrors are not degraded.
3831
3832   The ldisk parameter, if True, will change the test from the
3833   is_degraded attribute (which represents overall non-ok status for
3834   the device(s)) to the ldisk (representing the local storage status).
3835
3836   """
3837   lu.cfg.SetDiskID(dev, node)
3838
3839   result = True
3840
3841   if on_primary or dev.AssembleOnSecondary():
3842     rstats = lu.rpc.call_blockdev_find(node, dev)
3843     msg = rstats.fail_msg
3844     if msg:
3845       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3846       result = False
3847     elif not rstats.payload:
3848       lu.LogWarning("Can't find disk on node %s", node)
3849       result = False
3850     else:
3851       if ldisk:
3852         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3853       else:
3854         result = result and not rstats.payload.is_degraded
3855
3856   if dev.children:
3857     for child in dev.children:
3858       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3859
3860   return result
3861
3862
3863 class LUOobCommand(NoHooksLU):
3864   """Logical unit for OOB handling.
3865
3866   """
3867   REG_BGL = False
3868   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3869
3870   def ExpandNames(self):
3871     """Gather locks we need.
3872
3873     """
3874     if self.op.node_names:
3875       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3876       lock_names = self.op.node_names
3877     else:
3878       lock_names = locking.ALL_SET
3879
3880     self.needed_locks = {
3881       locking.LEVEL_NODE: lock_names,
3882       }
3883
3884   def CheckPrereq(self):
3885     """Check prerequisites.
3886
3887     This checks:
3888      - the node exists in the configuration
3889      - OOB is supported
3890
3891     Any errors are signaled by raising errors.OpPrereqError.
3892
3893     """
3894     self.nodes = []
3895     self.master_node = self.cfg.GetMasterNode()
3896
3897     assert self.op.power_delay >= 0.0
3898
3899     if self.op.node_names:
3900       if (self.op.command in self._SKIP_MASTER and
3901           self.master_node in self.op.node_names):
3902         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3903         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3904
3905         if master_oob_handler:
3906           additional_text = ("run '%s %s %s' if you want to operate on the"
3907                              " master regardless") % (master_oob_handler,
3908                                                       self.op.command,
3909                                                       self.master_node)
3910         else:
3911           additional_text = "it does not support out-of-band operations"
3912
3913         raise errors.OpPrereqError(("Operating on the master node %s is not"
3914                                     " allowed for %s; %s") %
3915                                    (self.master_node, self.op.command,
3916                                     additional_text), errors.ECODE_INVAL)
3917     else:
3918       self.op.node_names = self.cfg.GetNodeList()
3919       if self.op.command in self._SKIP_MASTER:
3920         self.op.node_names.remove(self.master_node)
3921
3922     if self.op.command in self._SKIP_MASTER:
3923       assert self.master_node not in self.op.node_names
3924
3925     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3926       if node is None:
3927         raise errors.OpPrereqError("Node %s not found" % node_name,
3928                                    errors.ECODE_NOENT)
3929       else:
3930         self.nodes.append(node)
3931
3932       if (not self.op.ignore_status and
3933           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3934         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3935                                     " not marked offline") % node_name,
3936                                    errors.ECODE_STATE)
3937
3938   def Exec(self, feedback_fn):
3939     """Execute OOB and return result if we expect any.
3940
3941     """
3942     master_node = self.master_node
3943     ret = []
3944
3945     for idx, node in enumerate(utils.NiceSort(self.nodes,
3946                                               key=lambda node: node.name)):
3947       node_entry = [(constants.RS_NORMAL, node.name)]
3948       ret.append(node_entry)
3949
3950       oob_program = _SupportsOob(self.cfg, node)
3951
3952       if not oob_program:
3953         node_entry.append((constants.RS_UNAVAIL, None))
3954         continue
3955
3956       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3957                    self.op.command, oob_program, node.name)
3958       result = self.rpc.call_run_oob(master_node, oob_program,
3959                                      self.op.command, node.name,
3960                                      self.op.timeout)
3961
3962       if result.fail_msg:
3963         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3964                         node.name, result.fail_msg)
3965         node_entry.append((constants.RS_NODATA, None))
3966       else:
3967         try:
3968           self._CheckPayload(result)
3969         except errors.OpExecError, err:
3970           self.LogWarning("Payload returned by node '%s' is not valid: %s",
3971                           node.name, err)
3972           node_entry.append((constants.RS_NODATA, None))
3973         else:
3974           if self.op.command == constants.OOB_HEALTH:
3975             # For health we should log important events
3976             for item, status in result.payload:
3977               if status in [constants.OOB_STATUS_WARNING,
3978                             constants.OOB_STATUS_CRITICAL]:
3979                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3980                                 item, node.name, status)
3981
3982           if self.op.command == constants.OOB_POWER_ON:
3983             node.powered = True
3984           elif self.op.command == constants.OOB_POWER_OFF:
3985             node.powered = False
3986           elif self.op.command == constants.OOB_POWER_STATUS:
3987             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3988             if powered != node.powered:
3989               logging.warning(("Recorded power state (%s) of node '%s' does not"
3990                                " match actual power state (%s)"), node.powered,
3991                               node.name, powered)
3992
3993           # For configuration changing commands we should update the node
3994           if self.op.command in (constants.OOB_POWER_ON,
3995                                  constants.OOB_POWER_OFF):
3996             self.cfg.Update(node, feedback_fn)
3997
3998           node_entry.append((constants.RS_NORMAL, result.payload))
3999
4000           if (self.op.command == constants.OOB_POWER_ON and
4001               idx < len(self.nodes) - 1):
4002             time.sleep(self.op.power_delay)
4003
4004     return ret
4005
4006   def _CheckPayload(self, result):
4007     """Checks if the payload is valid.
4008
4009     @param result: RPC result
4010     @raises errors.OpExecError: If payload is not valid
4011
4012     """
4013     errs = []
4014     if self.op.command == constants.OOB_HEALTH:
4015       if not isinstance(result.payload, list):
4016         errs.append("command 'health' is expected to return a list but got %s" %
4017                     type(result.payload))
4018       else:
4019         for item, status in result.payload:
4020           if status not in constants.OOB_STATUSES:
4021             errs.append("health item '%s' has invalid status '%s'" %
4022                         (item, status))
4023
4024     if self.op.command == constants.OOB_POWER_STATUS:
4025       if not isinstance(result.payload, dict):
4026         errs.append("power-status is expected to return a dict but got %s" %
4027                     type(result.payload))
4028
4029     if self.op.command in [
4030         constants.OOB_POWER_ON,
4031         constants.OOB_POWER_OFF,
4032         constants.OOB_POWER_CYCLE,
4033         ]:
4034       if result.payload is not None:
4035         errs.append("%s is expected to not return payload but got '%s'" %
4036                     (self.op.command, result.payload))
4037
4038     if errs:
4039       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4040                                utils.CommaJoin(errs))
4041
4042 class _OsQuery(_QueryBase):
4043   FIELDS = query.OS_FIELDS
4044
4045   def ExpandNames(self, lu):
4046     # Lock all nodes in shared mode
4047     # Temporary removal of locks, should be reverted later
4048     # TODO: reintroduce locks when they are lighter-weight
4049     lu.needed_locks = {}
4050     #self.share_locks[locking.LEVEL_NODE] = 1
4051     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4052
4053     # The following variables interact with _QueryBase._GetNames
4054     if self.names:
4055       self.wanted = self.names
4056     else:
4057       self.wanted = locking.ALL_SET
4058
4059     self.do_locking = self.use_locking
4060
4061   def DeclareLocks(self, lu, level):
4062     pass
4063
4064   @staticmethod
4065   def _DiagnoseByOS(rlist):
4066     """Remaps a per-node return list into an a per-os per-node dictionary
4067
4068     @param rlist: a map with node names as keys and OS objects as values
4069
4070     @rtype: dict
4071     @return: a dictionary with osnames as keys and as value another
4072         map, with nodes as keys and tuples of (path, status, diagnose,
4073         variants, parameters, api_versions) as values, eg::
4074
4075           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4076                                      (/srv/..., False, "invalid api")],
4077                            "node2": [(/srv/..., True, "", [], [])]}
4078           }
4079
4080     """
4081     all_os = {}
4082     # we build here the list of nodes that didn't fail the RPC (at RPC
4083     # level), so that nodes with a non-responding node daemon don't
4084     # make all OSes invalid
4085     good_nodes = [node_name for node_name in rlist
4086                   if not rlist[node_name].fail_msg]
4087     for node_name, nr in rlist.items():
4088       if nr.fail_msg or not nr.payload:
4089         continue
4090       for (name, path, status, diagnose, variants,
4091            params, api_versions) in nr.payload:
4092         if name not in all_os:
4093           # build a list of nodes for this os containing empty lists
4094           # for each node in node_list
4095           all_os[name] = {}
4096           for nname in good_nodes:
4097             all_os[name][nname] = []
4098         # convert params from [name, help] to (name, help)
4099         params = [tuple(v) for v in params]
4100         all_os[name][node_name].append((path, status, diagnose,
4101                                         variants, params, api_versions))
4102     return all_os
4103
4104   def _GetQueryData(self, lu):
4105     """Computes the list of nodes and their attributes.
4106
4107     """
4108     # Locking is not used
4109     assert not (compat.any(lu.glm.is_owned(level)
4110                            for level in locking.LEVELS
4111                            if level != locking.LEVEL_CLUSTER) or
4112                 self.do_locking or self.use_locking)
4113
4114     valid_nodes = [node.name
4115                    for node in lu.cfg.GetAllNodesInfo().values()
4116                    if not node.offline and node.vm_capable]
4117     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4118     cluster = lu.cfg.GetClusterInfo()
4119
4120     data = {}
4121
4122     for (os_name, os_data) in pol.items():
4123       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4124                           hidden=(os_name in cluster.hidden_os),
4125                           blacklisted=(os_name in cluster.blacklisted_os))
4126
4127       variants = set()
4128       parameters = set()
4129       api_versions = set()
4130
4131       for idx, osl in enumerate(os_data.values()):
4132         info.valid = bool(info.valid and osl and osl[0][1])
4133         if not info.valid:
4134           break
4135
4136         (node_variants, node_params, node_api) = osl[0][3:6]
4137         if idx == 0:
4138           # First entry
4139           variants.update(node_variants)
4140           parameters.update(node_params)
4141           api_versions.update(node_api)
4142         else:
4143           # Filter out inconsistent values
4144           variants.intersection_update(node_variants)
4145           parameters.intersection_update(node_params)
4146           api_versions.intersection_update(node_api)
4147
4148       info.variants = list(variants)
4149       info.parameters = list(parameters)
4150       info.api_versions = list(api_versions)
4151
4152       data[os_name] = info
4153
4154     # Prepare data in requested order
4155     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4156             if name in data]
4157
4158
4159 class LUOsDiagnose(NoHooksLU):
4160   """Logical unit for OS diagnose/query.
4161
4162   """
4163   REQ_BGL = False
4164
4165   @staticmethod
4166   def _BuildFilter(fields, names):
4167     """Builds a filter for querying OSes.
4168
4169     """
4170     name_filter = qlang.MakeSimpleFilter("name", names)
4171
4172     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4173     # respective field is not requested
4174     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4175                      for fname in ["hidden", "blacklisted"]
4176                      if fname not in fields]
4177     if "valid" not in fields:
4178       status_filter.append([qlang.OP_TRUE, "valid"])
4179
4180     if status_filter:
4181       status_filter.insert(0, qlang.OP_AND)
4182     else:
4183       status_filter = None
4184
4185     if name_filter and status_filter:
4186       return [qlang.OP_AND, name_filter, status_filter]
4187     elif name_filter:
4188       return name_filter
4189     else:
4190       return status_filter
4191
4192   def CheckArguments(self):
4193     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4194                        self.op.output_fields, False)
4195
4196   def ExpandNames(self):
4197     self.oq.ExpandNames(self)
4198
4199   def Exec(self, feedback_fn):
4200     return self.oq.OldStyleQuery(self)
4201
4202
4203 class LUNodeRemove(LogicalUnit):
4204   """Logical unit for removing a node.
4205
4206   """
4207   HPATH = "node-remove"
4208   HTYPE = constants.HTYPE_NODE
4209
4210   def BuildHooksEnv(self):
4211     """Build hooks env.
4212
4213     This doesn't run on the target node in the pre phase as a failed
4214     node would then be impossible to remove.
4215
4216     """
4217     return {
4218       "OP_TARGET": self.op.node_name,
4219       "NODE_NAME": self.op.node_name,
4220       }
4221
4222   def BuildHooksNodes(self):
4223     """Build hooks nodes.
4224
4225     """
4226     all_nodes = self.cfg.GetNodeList()
4227     try:
4228       all_nodes.remove(self.op.node_name)
4229     except ValueError:
4230       logging.warning("Node '%s', which is about to be removed, was not found"
4231                       " in the list of all nodes", self.op.node_name)
4232     return (all_nodes, all_nodes)
4233
4234   def CheckPrereq(self):
4235     """Check prerequisites.
4236
4237     This checks:
4238      - the node exists in the configuration
4239      - it does not have primary or secondary instances
4240      - it's not the master
4241
4242     Any errors are signaled by raising errors.OpPrereqError.
4243
4244     """
4245     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4246     node = self.cfg.GetNodeInfo(self.op.node_name)
4247     assert node is not None
4248
4249     masternode = self.cfg.GetMasterNode()
4250     if node.name == masternode:
4251       raise errors.OpPrereqError("Node is the master node, failover to another"
4252                                  " node is required", errors.ECODE_INVAL)
4253
4254     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4255       if node.name in instance.all_nodes:
4256         raise errors.OpPrereqError("Instance %s is still running on the node,"
4257                                    " please remove first" % instance_name,
4258                                    errors.ECODE_INVAL)
4259     self.op.node_name = node.name
4260     self.node = node
4261
4262   def Exec(self, feedback_fn):
4263     """Removes the node from the cluster.
4264
4265     """
4266     node = self.node
4267     logging.info("Stopping the node daemon and removing configs from node %s",
4268                  node.name)
4269
4270     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4271
4272     # Promote nodes to master candidate as needed
4273     _AdjustCandidatePool(self, exceptions=[node.name])
4274     self.context.RemoveNode(node.name)
4275
4276     # Run post hooks on the node before it's removed
4277     _RunPostHook(self, node.name)
4278
4279     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4280     msg = result.fail_msg
4281     if msg:
4282       self.LogWarning("Errors encountered on the remote node while leaving"
4283                       " the cluster: %s", msg)
4284
4285     # Remove node from our /etc/hosts
4286     if self.cfg.GetClusterInfo().modify_etc_hosts:
4287       master_node = self.cfg.GetMasterNode()
4288       result = self.rpc.call_etc_hosts_modify(master_node,
4289                                               constants.ETC_HOSTS_REMOVE,
4290                                               node.name, None)
4291       result.Raise("Can't update hosts file with new host data")
4292       _RedistributeAncillaryFiles(self)
4293
4294
4295 class _NodeQuery(_QueryBase):
4296   FIELDS = query.NODE_FIELDS
4297
4298   def ExpandNames(self, lu):
4299     lu.needed_locks = {}
4300     lu.share_locks[locking.LEVEL_NODE] = 1
4301
4302     if self.names:
4303       self.wanted = _GetWantedNodes(lu, self.names)
4304     else:
4305       self.wanted = locking.ALL_SET
4306
4307     self.do_locking = (self.use_locking and
4308                        query.NQ_LIVE in self.requested_data)
4309
4310     if self.do_locking:
4311       # if we don't request only static fields, we need to lock the nodes
4312       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4313
4314   def DeclareLocks(self, lu, level):
4315     pass
4316
4317   def _GetQueryData(self, lu):
4318     """Computes the list of nodes and their attributes.
4319
4320     """
4321     all_info = lu.cfg.GetAllNodesInfo()
4322
4323     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4324
4325     # Gather data as requested
4326     if query.NQ_LIVE in self.requested_data:
4327       # filter out non-vm_capable nodes
4328       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4329
4330       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4331                                         lu.cfg.GetHypervisorType())
4332       live_data = dict((name, nresult.payload)
4333                        for (name, nresult) in node_data.items()
4334                        if not nresult.fail_msg and nresult.payload)
4335     else:
4336       live_data = None
4337
4338     if query.NQ_INST in self.requested_data:
4339       node_to_primary = dict([(name, set()) for name in nodenames])
4340       node_to_secondary = dict([(name, set()) for name in nodenames])
4341
4342       inst_data = lu.cfg.GetAllInstancesInfo()
4343
4344       for inst in inst_data.values():
4345         if inst.primary_node in node_to_primary:
4346           node_to_primary[inst.primary_node].add(inst.name)
4347         for secnode in inst.secondary_nodes:
4348           if secnode in node_to_secondary:
4349             node_to_secondary[secnode].add(inst.name)
4350     else:
4351       node_to_primary = None
4352       node_to_secondary = None
4353
4354     if query.NQ_OOB in self.requested_data:
4355       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4356                          for name, node in all_info.iteritems())
4357     else:
4358       oob_support = None
4359
4360     if query.NQ_GROUP in self.requested_data:
4361       groups = lu.cfg.GetAllNodeGroupsInfo()
4362     else:
4363       groups = {}
4364
4365     return query.NodeQueryData([all_info[name] for name in nodenames],
4366                                live_data, lu.cfg.GetMasterNode(),
4367                                node_to_primary, node_to_secondary, groups,
4368                                oob_support, lu.cfg.GetClusterInfo())
4369
4370
4371 class LUNodeQuery(NoHooksLU):
4372   """Logical unit for querying nodes.
4373
4374   """
4375   # pylint: disable-msg=W0142
4376   REQ_BGL = False
4377
4378   def CheckArguments(self):
4379     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4380                          self.op.output_fields, self.op.use_locking)
4381
4382   def ExpandNames(self):
4383     self.nq.ExpandNames(self)
4384
4385   def Exec(self, feedback_fn):
4386     return self.nq.OldStyleQuery(self)
4387
4388
4389 class LUNodeQueryvols(NoHooksLU):
4390   """Logical unit for getting volumes on node(s).
4391
4392   """
4393   REQ_BGL = False
4394   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4395   _FIELDS_STATIC = utils.FieldSet("node")
4396
4397   def CheckArguments(self):
4398     _CheckOutputFields(static=self._FIELDS_STATIC,
4399                        dynamic=self._FIELDS_DYNAMIC,
4400                        selected=self.op.output_fields)
4401
4402   def ExpandNames(self):
4403     self.needed_locks = {}
4404     self.share_locks[locking.LEVEL_NODE] = 1
4405     if not self.op.nodes:
4406       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4407     else:
4408       self.needed_locks[locking.LEVEL_NODE] = \
4409         _GetWantedNodes(self, self.op.nodes)
4410
4411   def Exec(self, feedback_fn):
4412     """Computes the list of nodes and their attributes.
4413
4414     """
4415     nodenames = self.owned_locks(locking.LEVEL_NODE)
4416     volumes = self.rpc.call_node_volumes(nodenames)
4417
4418     ilist = self.cfg.GetAllInstancesInfo()
4419     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4420
4421     output = []
4422     for node in nodenames:
4423       nresult = volumes[node]
4424       if nresult.offline:
4425         continue
4426       msg = nresult.fail_msg
4427       if msg:
4428         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4429         continue
4430
4431       node_vols = sorted(nresult.payload,
4432                          key=operator.itemgetter("dev"))
4433
4434       for vol in node_vols:
4435         node_output = []
4436         for field in self.op.output_fields:
4437           if field == "node":
4438             val = node
4439           elif field == "phys":
4440             val = vol["dev"]
4441           elif field == "vg":
4442             val = vol["vg"]
4443           elif field == "name":
4444             val = vol["name"]
4445           elif field == "size":
4446             val = int(float(vol["size"]))
4447           elif field == "instance":
4448             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4449           else:
4450             raise errors.ParameterError(field)
4451           node_output.append(str(val))
4452
4453         output.append(node_output)
4454
4455     return output
4456
4457
4458 class LUNodeQueryStorage(NoHooksLU):
4459   """Logical unit for getting information on storage units on node(s).
4460
4461   """
4462   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4463   REQ_BGL = False
4464
4465   def CheckArguments(self):
4466     _CheckOutputFields(static=self._FIELDS_STATIC,
4467                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4468                        selected=self.op.output_fields)
4469
4470   def ExpandNames(self):
4471     self.needed_locks = {}
4472     self.share_locks[locking.LEVEL_NODE] = 1
4473
4474     if self.op.nodes:
4475       self.needed_locks[locking.LEVEL_NODE] = \
4476         _GetWantedNodes(self, self.op.nodes)
4477     else:
4478       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4479
4480   def Exec(self, feedback_fn):
4481     """Computes the list of nodes and their attributes.
4482
4483     """
4484     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4485
4486     # Always get name to sort by
4487     if constants.SF_NAME in self.op.output_fields:
4488       fields = self.op.output_fields[:]
4489     else:
4490       fields = [constants.SF_NAME] + self.op.output_fields
4491
4492     # Never ask for node or type as it's only known to the LU
4493     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4494       while extra in fields:
4495         fields.remove(extra)
4496
4497     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4498     name_idx = field_idx[constants.SF_NAME]
4499
4500     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4501     data = self.rpc.call_storage_list(self.nodes,
4502                                       self.op.storage_type, st_args,
4503                                       self.op.name, fields)
4504
4505     result = []
4506
4507     for node in utils.NiceSort(self.nodes):
4508       nresult = data[node]
4509       if nresult.offline:
4510         continue
4511
4512       msg = nresult.fail_msg
4513       if msg:
4514         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4515         continue
4516
4517       rows = dict([(row[name_idx], row) for row in nresult.payload])
4518
4519       for name in utils.NiceSort(rows.keys()):
4520         row = rows[name]
4521
4522         out = []
4523
4524         for field in self.op.output_fields:
4525           if field == constants.SF_NODE:
4526             val = node
4527           elif field == constants.SF_TYPE:
4528             val = self.op.storage_type
4529           elif field in field_idx:
4530             val = row[field_idx[field]]
4531           else:
4532             raise errors.ParameterError(field)
4533
4534           out.append(val)
4535
4536         result.append(out)
4537
4538     return result
4539
4540
4541 class _InstanceQuery(_QueryBase):
4542   FIELDS = query.INSTANCE_FIELDS
4543
4544   def ExpandNames(self, lu):
4545     lu.needed_locks = {}
4546     lu.share_locks = _ShareAll()
4547
4548     if self.names:
4549       self.wanted = _GetWantedInstances(lu, self.names)
4550     else:
4551       self.wanted = locking.ALL_SET
4552
4553     self.do_locking = (self.use_locking and
4554                        query.IQ_LIVE in self.requested_data)
4555     if self.do_locking:
4556       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4557       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4558       lu.needed_locks[locking.LEVEL_NODE] = []
4559       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4560
4561     self.do_grouplocks = (self.do_locking and
4562                           query.IQ_NODES in self.requested_data)
4563
4564   def DeclareLocks(self, lu, level):
4565     if self.do_locking:
4566       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4567         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4568
4569         # Lock all groups used by instances optimistically; this requires going
4570         # via the node before it's locked, requiring verification later on
4571         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4572           set(group_uuid
4573               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4574               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4575       elif level == locking.LEVEL_NODE:
4576         lu._LockInstancesNodes() # pylint: disable-msg=W0212
4577
4578   @staticmethod
4579   def _CheckGroupLocks(lu):
4580     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4581     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4582
4583     # Check if node groups for locked instances are still correct
4584     for instance_name in owned_instances:
4585       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4586
4587   def _GetQueryData(self, lu):
4588     """Computes the list of instances and their attributes.
4589
4590     """
4591     if self.do_grouplocks:
4592       self._CheckGroupLocks(lu)
4593
4594     cluster = lu.cfg.GetClusterInfo()
4595     all_info = lu.cfg.GetAllInstancesInfo()
4596
4597     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4598
4599     instance_list = [all_info[name] for name in instance_names]
4600     nodes = frozenset(itertools.chain(*(inst.all_nodes
4601                                         for inst in instance_list)))
4602     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4603     bad_nodes = []
4604     offline_nodes = []
4605     wrongnode_inst = set()
4606
4607     # Gather data as requested
4608     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4609       live_data = {}
4610       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4611       for name in nodes:
4612         result = node_data[name]
4613         if result.offline:
4614           # offline nodes will be in both lists
4615           assert result.fail_msg
4616           offline_nodes.append(name)
4617         if result.fail_msg:
4618           bad_nodes.append(name)
4619         elif result.payload:
4620           for inst in result.payload:
4621             if inst in all_info:
4622               if all_info[inst].primary_node == name:
4623                 live_data.update(result.payload)
4624               else:
4625                 wrongnode_inst.add(inst)
4626             else:
4627               # orphan instance; we don't list it here as we don't
4628               # handle this case yet in the output of instance listing
4629               logging.warning("Orphan instance '%s' found on node %s",
4630                               inst, name)
4631         # else no instance is alive
4632     else:
4633       live_data = {}
4634
4635     if query.IQ_DISKUSAGE in self.requested_data:
4636       disk_usage = dict((inst.name,
4637                          _ComputeDiskSize(inst.disk_template,
4638                                           [{constants.IDISK_SIZE: disk.size}
4639                                            for disk in inst.disks]))
4640                         for inst in instance_list)
4641     else:
4642       disk_usage = None
4643
4644     if query.IQ_CONSOLE in self.requested_data:
4645       consinfo = {}
4646       for inst in instance_list:
4647         if inst.name in live_data:
4648           # Instance is running
4649           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4650         else:
4651           consinfo[inst.name] = None
4652       assert set(consinfo.keys()) == set(instance_names)
4653     else:
4654       consinfo = None
4655
4656     if query.IQ_NODES in self.requested_data:
4657       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4658                                             instance_list)))
4659       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4660       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4661                     for uuid in set(map(operator.attrgetter("group"),
4662                                         nodes.values())))
4663     else:
4664       nodes = None
4665       groups = None
4666
4667     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4668                                    disk_usage, offline_nodes, bad_nodes,
4669                                    live_data, wrongnode_inst, consinfo,
4670                                    nodes, groups)
4671
4672
4673 class LUQuery(NoHooksLU):
4674   """Query for resources/items of a certain kind.
4675
4676   """
4677   # pylint: disable-msg=W0142
4678   REQ_BGL = False
4679
4680   def CheckArguments(self):
4681     qcls = _GetQueryImplementation(self.op.what)
4682
4683     self.impl = qcls(self.op.filter, self.op.fields, False)
4684
4685   def ExpandNames(self):
4686     self.impl.ExpandNames(self)
4687
4688   def DeclareLocks(self, level):
4689     self.impl.DeclareLocks(self, level)
4690
4691   def Exec(self, feedback_fn):
4692     return self.impl.NewStyleQuery(self)
4693
4694
4695 class LUQueryFields(NoHooksLU):
4696   """Query for resources/items of a certain kind.
4697
4698   """
4699   # pylint: disable-msg=W0142
4700   REQ_BGL = False
4701
4702   def CheckArguments(self):
4703     self.qcls = _GetQueryImplementation(self.op.what)
4704
4705   def ExpandNames(self):
4706     self.needed_locks = {}
4707
4708   def Exec(self, feedback_fn):
4709     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4710
4711
4712 class LUNodeModifyStorage(NoHooksLU):
4713   """Logical unit for modifying a storage volume on a node.
4714
4715   """
4716   REQ_BGL = False
4717
4718   def CheckArguments(self):
4719     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4720
4721     storage_type = self.op.storage_type
4722
4723     try:
4724       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4725     except KeyError:
4726       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4727                                  " modified" % storage_type,
4728                                  errors.ECODE_INVAL)
4729
4730     diff = set(self.op.changes.keys()) - modifiable
4731     if diff:
4732       raise errors.OpPrereqError("The following fields can not be modified for"
4733                                  " storage units of type '%s': %r" %
4734                                  (storage_type, list(diff)),
4735                                  errors.ECODE_INVAL)
4736
4737   def ExpandNames(self):
4738     self.needed_locks = {
4739       locking.LEVEL_NODE: self.op.node_name,
4740       }
4741
4742   def Exec(self, feedback_fn):
4743     """Computes the list of nodes and their attributes.
4744
4745     """
4746     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4747     result = self.rpc.call_storage_modify(self.op.node_name,
4748                                           self.op.storage_type, st_args,
4749                                           self.op.name, self.op.changes)
4750     result.Raise("Failed to modify storage unit '%s' on %s" %
4751                  (self.op.name, self.op.node_name))
4752
4753
4754 class LUNodeAdd(LogicalUnit):
4755   """Logical unit for adding node to the cluster.
4756
4757   """
4758   HPATH = "node-add"
4759   HTYPE = constants.HTYPE_NODE
4760   _NFLAGS = ["master_capable", "vm_capable"]
4761
4762   def CheckArguments(self):
4763     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4764     # validate/normalize the node name
4765     self.hostname = netutils.GetHostname(name=self.op.node_name,
4766                                          family=self.primary_ip_family)
4767     self.op.node_name = self.hostname.name
4768
4769     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4770       raise errors.OpPrereqError("Cannot readd the master node",
4771                                  errors.ECODE_STATE)
4772
4773     if self.op.readd and self.op.group:
4774       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4775                                  " being readded", errors.ECODE_INVAL)
4776
4777   def BuildHooksEnv(self):
4778     """Build hooks env.
4779
4780     This will run on all nodes before, and on all nodes + the new node after.
4781
4782     """
4783     return {
4784       "OP_TARGET": self.op.node_name,
4785       "NODE_NAME": self.op.node_name,
4786       "NODE_PIP": self.op.primary_ip,
4787       "NODE_SIP": self.op.secondary_ip,
4788       "MASTER_CAPABLE": str(self.op.master_capable),
4789       "VM_CAPABLE": str(self.op.vm_capable),
4790       }
4791
4792   def BuildHooksNodes(self):
4793     """Build hooks nodes.
4794
4795     """
4796     # Exclude added node
4797     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4798     post_nodes = pre_nodes + [self.op.node_name, ]
4799
4800     return (pre_nodes, post_nodes)
4801
4802   def CheckPrereq(self):
4803     """Check prerequisites.
4804
4805     This checks:
4806      - the new node is not already in the config
4807      - it is resolvable
4808      - its parameters (single/dual homed) matches the cluster
4809
4810     Any errors are signaled by raising errors.OpPrereqError.
4811
4812     """
4813     cfg = self.cfg
4814     hostname = self.hostname
4815     node = hostname.name
4816     primary_ip = self.op.primary_ip = hostname.ip
4817     if self.op.secondary_ip is None:
4818       if self.primary_ip_family == netutils.IP6Address.family:
4819         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4820                                    " IPv4 address must be given as secondary",
4821                                    errors.ECODE_INVAL)
4822       self.op.secondary_ip = primary_ip
4823
4824     secondary_ip = self.op.secondary_ip
4825     if not netutils.IP4Address.IsValid(secondary_ip):
4826       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4827                                  " address" % secondary_ip, errors.ECODE_INVAL)
4828
4829     node_list = cfg.GetNodeList()
4830     if not self.op.readd and node in node_list:
4831       raise errors.OpPrereqError("Node %s is already in the configuration" %
4832                                  node, errors.ECODE_EXISTS)
4833     elif self.op.readd and node not in node_list:
4834       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4835                                  errors.ECODE_NOENT)
4836
4837     self.changed_primary_ip = False
4838
4839     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4840       if self.op.readd and node == existing_node_name:
4841         if existing_node.secondary_ip != secondary_ip:
4842           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4843                                      " address configuration as before",
4844                                      errors.ECODE_INVAL)
4845         if existing_node.primary_ip != primary_ip:
4846           self.changed_primary_ip = True
4847
4848         continue
4849
4850       if (existing_node.primary_ip == primary_ip or
4851           existing_node.secondary_ip == primary_ip or
4852           existing_node.primary_ip == secondary_ip or
4853           existing_node.secondary_ip == secondary_ip):
4854         raise errors.OpPrereqError("New node ip address(es) conflict with"
4855                                    " existing node %s" % existing_node.name,
4856                                    errors.ECODE_NOTUNIQUE)
4857
4858     # After this 'if' block, None is no longer a valid value for the
4859     # _capable op attributes
4860     if self.op.readd:
4861       old_node = self.cfg.GetNodeInfo(node)
4862       assert old_node is not None, "Can't retrieve locked node %s" % node
4863       for attr in self._NFLAGS:
4864         if getattr(self.op, attr) is None:
4865           setattr(self.op, attr, getattr(old_node, attr))
4866     else:
4867       for attr in self._NFLAGS:
4868         if getattr(self.op, attr) is None:
4869           setattr(self.op, attr, True)
4870
4871     if self.op.readd and not self.op.vm_capable:
4872       pri, sec = cfg.GetNodeInstances(node)
4873       if pri or sec:
4874         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4875                                    " flag set to false, but it already holds"
4876                                    " instances" % node,
4877                                    errors.ECODE_STATE)
4878
4879     # check that the type of the node (single versus dual homed) is the
4880     # same as for the master
4881     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4882     master_singlehomed = myself.secondary_ip == myself.primary_ip
4883     newbie_singlehomed = secondary_ip == primary_ip
4884     if master_singlehomed != newbie_singlehomed:
4885       if master_singlehomed:
4886         raise errors.OpPrereqError("The master has no secondary ip but the"
4887                                    " new node has one",
4888                                    errors.ECODE_INVAL)
4889       else:
4890         raise errors.OpPrereqError("The master has a secondary ip but the"
4891                                    " new node doesn't have one",
4892                                    errors.ECODE_INVAL)
4893
4894     # checks reachability
4895     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4896       raise errors.OpPrereqError("Node not reachable by ping",
4897                                  errors.ECODE_ENVIRON)
4898
4899     if not newbie_singlehomed:
4900       # check reachability from my secondary ip to newbie's secondary ip
4901       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4902                            source=myself.secondary_ip):
4903         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4904                                    " based ping to node daemon port",
4905                                    errors.ECODE_ENVIRON)
4906
4907     if self.op.readd:
4908       exceptions = [node]
4909     else:
4910       exceptions = []
4911
4912     if self.op.master_capable:
4913       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4914     else:
4915       self.master_candidate = False
4916
4917     if self.op.readd:
4918       self.new_node = old_node
4919     else:
4920       node_group = cfg.LookupNodeGroup(self.op.group)
4921       self.new_node = objects.Node(name=node,
4922                                    primary_ip=primary_ip,
4923                                    secondary_ip=secondary_ip,
4924                                    master_candidate=self.master_candidate,
4925                                    offline=False, drained=False,
4926                                    group=node_group)
4927
4928     if self.op.ndparams:
4929       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4930
4931   def Exec(self, feedback_fn):
4932     """Adds the new node to the cluster.
4933
4934     """
4935     new_node = self.new_node
4936     node = new_node.name
4937
4938     # We adding a new node so we assume it's powered
4939     new_node.powered = True
4940
4941     # for re-adds, reset the offline/drained/master-candidate flags;
4942     # we need to reset here, otherwise offline would prevent RPC calls
4943     # later in the procedure; this also means that if the re-add
4944     # fails, we are left with a non-offlined, broken node
4945     if self.op.readd:
4946       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4947       self.LogInfo("Readding a node, the offline/drained flags were reset")
4948       # if we demote the node, we do cleanup later in the procedure
4949       new_node.master_candidate = self.master_candidate
4950       if self.changed_primary_ip:
4951         new_node.primary_ip = self.op.primary_ip
4952
4953     # copy the master/vm_capable flags
4954     for attr in self._NFLAGS:
4955       setattr(new_node, attr, getattr(self.op, attr))
4956
4957     # notify the user about any possible mc promotion
4958     if new_node.master_candidate:
4959       self.LogInfo("Node will be a master candidate")
4960
4961     if self.op.ndparams:
4962       new_node.ndparams = self.op.ndparams
4963     else:
4964       new_node.ndparams = {}
4965
4966     # check connectivity
4967     result = self.rpc.call_version([node])[node]
4968     result.Raise("Can't get version information from node %s" % node)
4969     if constants.PROTOCOL_VERSION == result.payload:
4970       logging.info("Communication to node %s fine, sw version %s match",
4971                    node, result.payload)
4972     else:
4973       raise errors.OpExecError("Version mismatch master version %s,"
4974                                " node version %s" %
4975                                (constants.PROTOCOL_VERSION, result.payload))
4976
4977     # Add node to our /etc/hosts, and add key to known_hosts
4978     if self.cfg.GetClusterInfo().modify_etc_hosts:
4979       master_node = self.cfg.GetMasterNode()
4980       result = self.rpc.call_etc_hosts_modify(master_node,
4981                                               constants.ETC_HOSTS_ADD,
4982                                               self.hostname.name,
4983                                               self.hostname.ip)
4984       result.Raise("Can't update hosts file with new host data")
4985
4986     if new_node.secondary_ip != new_node.primary_ip:
4987       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4988                                False)
4989
4990     node_verify_list = [self.cfg.GetMasterNode()]
4991     node_verify_param = {
4992       constants.NV_NODELIST: [node],
4993       # TODO: do a node-net-test as well?
4994     }
4995
4996     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4997                                        self.cfg.GetClusterName())
4998     for verifier in node_verify_list:
4999       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5000       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5001       if nl_payload:
5002         for failed in nl_payload:
5003           feedback_fn("ssh/hostname verification failed"
5004                       " (checking from %s): %s" %
5005                       (verifier, nl_payload[failed]))
5006         raise errors.OpExecError("ssh/hostname verification failed")
5007
5008     if self.op.readd:
5009       _RedistributeAncillaryFiles(self)
5010       self.context.ReaddNode(new_node)
5011       # make sure we redistribute the config
5012       self.cfg.Update(new_node, feedback_fn)
5013       # and make sure the new node will not have old files around
5014       if not new_node.master_candidate:
5015         result = self.rpc.call_node_demote_from_mc(new_node.name)
5016         msg = result.fail_msg
5017         if msg:
5018           self.LogWarning("Node failed to demote itself from master"
5019                           " candidate status: %s" % msg)
5020     else:
5021       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5022                                   additional_vm=self.op.vm_capable)
5023       self.context.AddNode(new_node, self.proc.GetECId())
5024
5025
5026 class LUNodeSetParams(LogicalUnit):
5027   """Modifies the parameters of a node.
5028
5029   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5030       to the node role (as _ROLE_*)
5031   @cvar _R2F: a dictionary from node role to tuples of flags
5032   @cvar _FLAGS: a list of attribute names corresponding to the flags
5033
5034   """
5035   HPATH = "node-modify"
5036   HTYPE = constants.HTYPE_NODE
5037   REQ_BGL = False
5038   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5039   _F2R = {
5040     (True, False, False): _ROLE_CANDIDATE,
5041     (False, True, False): _ROLE_DRAINED,
5042     (False, False, True): _ROLE_OFFLINE,
5043     (False, False, False): _ROLE_REGULAR,
5044     }
5045   _R2F = dict((v, k) for k, v in _F2R.items())
5046   _FLAGS = ["master_candidate", "drained", "offline"]
5047
5048   def CheckArguments(self):
5049     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5050     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5051                 self.op.master_capable, self.op.vm_capable,
5052                 self.op.secondary_ip, self.op.ndparams]
5053     if all_mods.count(None) == len(all_mods):
5054       raise errors.OpPrereqError("Please pass at least one modification",
5055                                  errors.ECODE_INVAL)
5056     if all_mods.count(True) > 1:
5057       raise errors.OpPrereqError("Can't set the node into more than one"
5058                                  " state at the same time",
5059                                  errors.ECODE_INVAL)
5060
5061     # Boolean value that tells us whether we might be demoting from MC
5062     self.might_demote = (self.op.master_candidate == False or
5063                          self.op.offline == True or
5064                          self.op.drained == True or
5065                          self.op.master_capable == False)
5066
5067     if self.op.secondary_ip:
5068       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5069         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5070                                    " address" % self.op.secondary_ip,
5071                                    errors.ECODE_INVAL)
5072
5073     self.lock_all = self.op.auto_promote and self.might_demote
5074     self.lock_instances = self.op.secondary_ip is not None
5075
5076   def ExpandNames(self):
5077     if self.lock_all:
5078       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5079     else:
5080       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5081
5082     if self.lock_instances:
5083       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5084
5085   def DeclareLocks(self, level):
5086     # If we have locked all instances, before waiting to lock nodes, release
5087     # all the ones living on nodes unrelated to the current operation.
5088     if level == locking.LEVEL_NODE and self.lock_instances:
5089       self.affected_instances = []
5090       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5091         instances_keep = []
5092
5093         # Build list of instances to release
5094         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5095         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5096           if (instance.disk_template in constants.DTS_INT_MIRROR and
5097               self.op.node_name in instance.all_nodes):
5098             instances_keep.append(instance_name)
5099             self.affected_instances.append(instance)
5100
5101         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5102
5103         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5104                 set(instances_keep))
5105
5106   def BuildHooksEnv(self):
5107     """Build hooks env.
5108
5109     This runs on the master node.
5110
5111     """
5112     return {
5113       "OP_TARGET": self.op.node_name,
5114       "MASTER_CANDIDATE": str(self.op.master_candidate),
5115       "OFFLINE": str(self.op.offline),
5116       "DRAINED": str(self.op.drained),
5117       "MASTER_CAPABLE": str(self.op.master_capable),
5118       "VM_CAPABLE": str(self.op.vm_capable),
5119       }
5120
5121   def BuildHooksNodes(self):
5122     """Build hooks nodes.
5123
5124     """
5125     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5126     return (nl, nl)
5127
5128   def CheckPrereq(self):
5129     """Check prerequisites.
5130
5131     This only checks the instance list against the existing names.
5132
5133     """
5134     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5135
5136     if (self.op.master_candidate is not None or
5137         self.op.drained is not None or
5138         self.op.offline is not None):
5139       # we can't change the master's node flags
5140       if self.op.node_name == self.cfg.GetMasterNode():
5141         raise errors.OpPrereqError("The master role can be changed"
5142                                    " only via master-failover",
5143                                    errors.ECODE_INVAL)
5144
5145     if self.op.master_candidate and not node.master_capable:
5146       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5147                                  " it a master candidate" % node.name,
5148                                  errors.ECODE_STATE)
5149
5150     if self.op.vm_capable == False:
5151       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5152       if ipri or isec:
5153         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5154                                    " the vm_capable flag" % node.name,
5155                                    errors.ECODE_STATE)
5156
5157     if node.master_candidate and self.might_demote and not self.lock_all:
5158       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5159       # check if after removing the current node, we're missing master
5160       # candidates
5161       (mc_remaining, mc_should, _) = \
5162           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5163       if mc_remaining < mc_should:
5164         raise errors.OpPrereqError("Not enough master candidates, please"
5165                                    " pass auto promote option to allow"
5166                                    " promotion", errors.ECODE_STATE)
5167
5168     self.old_flags = old_flags = (node.master_candidate,
5169                                   node.drained, node.offline)
5170     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5171     self.old_role = old_role = self._F2R[old_flags]
5172
5173     # Check for ineffective changes
5174     for attr in self._FLAGS:
5175       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5176         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5177         setattr(self.op, attr, None)
5178
5179     # Past this point, any flag change to False means a transition
5180     # away from the respective state, as only real changes are kept
5181
5182     # TODO: We might query the real power state if it supports OOB
5183     if _SupportsOob(self.cfg, node):
5184       if self.op.offline is False and not (node.powered or
5185                                            self.op.powered == True):
5186         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5187                                     " offline status can be reset") %
5188                                    self.op.node_name)
5189     elif self.op.powered is not None:
5190       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5191                                   " as it does not support out-of-band"
5192                                   " handling") % self.op.node_name)
5193
5194     # If we're being deofflined/drained, we'll MC ourself if needed
5195     if (self.op.drained == False or self.op.offline == False or
5196         (self.op.master_capable and not node.master_capable)):
5197       if _DecideSelfPromotion(self):
5198         self.op.master_candidate = True
5199         self.LogInfo("Auto-promoting node to master candidate")
5200
5201     # If we're no longer master capable, we'll demote ourselves from MC
5202     if self.op.master_capable == False and node.master_candidate:
5203       self.LogInfo("Demoting from master candidate")
5204       self.op.master_candidate = False
5205
5206     # Compute new role
5207     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5208     if self.op.master_candidate:
5209       new_role = self._ROLE_CANDIDATE
5210     elif self.op.drained:
5211       new_role = self._ROLE_DRAINED
5212     elif self.op.offline:
5213       new_role = self._ROLE_OFFLINE
5214     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5215       # False is still in new flags, which means we're un-setting (the
5216       # only) True flag
5217       new_role = self._ROLE_REGULAR
5218     else: # no new flags, nothing, keep old role
5219       new_role = old_role
5220
5221     self.new_role = new_role
5222
5223     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5224       # Trying to transition out of offline status
5225       result = self.rpc.call_version([node.name])[node.name]
5226       if result.fail_msg:
5227         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5228                                    " to report its version: %s" %
5229                                    (node.name, result.fail_msg),
5230                                    errors.ECODE_STATE)
5231       else:
5232         self.LogWarning("Transitioning node from offline to online state"
5233                         " without using re-add. Please make sure the node"
5234                         " is healthy!")
5235
5236     if self.op.secondary_ip:
5237       # Ok even without locking, because this can't be changed by any LU
5238       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5239       master_singlehomed = master.secondary_ip == master.primary_ip
5240       if master_singlehomed and self.op.secondary_ip:
5241         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5242                                    " homed cluster", errors.ECODE_INVAL)
5243
5244       if node.offline:
5245         if self.affected_instances:
5246           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5247                                      " node has instances (%s) configured"
5248                                      " to use it" % self.affected_instances)
5249       else:
5250         # On online nodes, check that no instances are running, and that
5251         # the node has the new ip and we can reach it.
5252         for instance in self.affected_instances:
5253           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5254
5255         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5256         if master.name != node.name:
5257           # check reachability from master secondary ip to new secondary ip
5258           if not netutils.TcpPing(self.op.secondary_ip,
5259                                   constants.DEFAULT_NODED_PORT,
5260                                   source=master.secondary_ip):
5261             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5262                                        " based ping to node daemon port",
5263                                        errors.ECODE_ENVIRON)
5264
5265     if self.op.ndparams:
5266       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5267       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5268       self.new_ndparams = new_ndparams
5269
5270   def Exec(self, feedback_fn):
5271     """Modifies a node.
5272
5273     """
5274     node = self.node
5275     old_role = self.old_role
5276     new_role = self.new_role
5277
5278     result = []
5279
5280     if self.op.ndparams:
5281       node.ndparams = self.new_ndparams
5282
5283     if self.op.powered is not None:
5284       node.powered = self.op.powered
5285
5286     for attr in ["master_capable", "vm_capable"]:
5287       val = getattr(self.op, attr)
5288       if val is not None:
5289         setattr(node, attr, val)
5290         result.append((attr, str(val)))
5291
5292     if new_role != old_role:
5293       # Tell the node to demote itself, if no longer MC and not offline
5294       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5295         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5296         if msg:
5297           self.LogWarning("Node failed to demote itself: %s", msg)
5298
5299       new_flags = self._R2F[new_role]
5300       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5301         if of != nf:
5302           result.append((desc, str(nf)))
5303       (node.master_candidate, node.drained, node.offline) = new_flags
5304
5305       # we locked all nodes, we adjust the CP before updating this node
5306       if self.lock_all:
5307         _AdjustCandidatePool(self, [node.name])
5308
5309     if self.op.secondary_ip:
5310       node.secondary_ip = self.op.secondary_ip
5311       result.append(("secondary_ip", self.op.secondary_ip))
5312
5313     # this will trigger configuration file update, if needed
5314     self.cfg.Update(node, feedback_fn)
5315
5316     # this will trigger job queue propagation or cleanup if the mc
5317     # flag changed
5318     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5319       self.context.ReaddNode(node)
5320
5321     return result
5322
5323
5324 class LUNodePowercycle(NoHooksLU):
5325   """Powercycles a node.
5326
5327   """
5328   REQ_BGL = False
5329
5330   def CheckArguments(self):
5331     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5332     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5333       raise errors.OpPrereqError("The node is the master and the force"
5334                                  " parameter was not set",
5335                                  errors.ECODE_INVAL)
5336
5337   def ExpandNames(self):
5338     """Locking for PowercycleNode.
5339
5340     This is a last-resort option and shouldn't block on other
5341     jobs. Therefore, we grab no locks.
5342
5343     """
5344     self.needed_locks = {}
5345
5346   def Exec(self, feedback_fn):
5347     """Reboots a node.
5348
5349     """
5350     result = self.rpc.call_node_powercycle(self.op.node_name,
5351                                            self.cfg.GetHypervisorType())
5352     result.Raise("Failed to schedule the reboot")
5353     return result.payload
5354
5355
5356 class LUClusterQuery(NoHooksLU):
5357   """Query cluster configuration.
5358
5359   """
5360   REQ_BGL = False
5361
5362   def ExpandNames(self):
5363     self.needed_locks = {}
5364
5365   def Exec(self, feedback_fn):
5366     """Return cluster config.
5367
5368     """
5369     cluster = self.cfg.GetClusterInfo()
5370     os_hvp = {}
5371
5372     # Filter just for enabled hypervisors
5373     for os_name, hv_dict in cluster.os_hvp.items():
5374       os_hvp[os_name] = {}
5375       for hv_name, hv_params in hv_dict.items():
5376         if hv_name in cluster.enabled_hypervisors:
5377           os_hvp[os_name][hv_name] = hv_params
5378
5379     # Convert ip_family to ip_version
5380     primary_ip_version = constants.IP4_VERSION
5381     if cluster.primary_ip_family == netutils.IP6Address.family:
5382       primary_ip_version = constants.IP6_VERSION
5383
5384     result = {
5385       "software_version": constants.RELEASE_VERSION,
5386       "protocol_version": constants.PROTOCOL_VERSION,
5387       "config_version": constants.CONFIG_VERSION,
5388       "os_api_version": max(constants.OS_API_VERSIONS),
5389       "export_version": constants.EXPORT_VERSION,
5390       "architecture": (platform.architecture()[0], platform.machine()),
5391       "name": cluster.cluster_name,
5392       "master": cluster.master_node,
5393       "default_hypervisor": cluster.enabled_hypervisors[0],
5394       "enabled_hypervisors": cluster.enabled_hypervisors,
5395       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5396                         for hypervisor_name in cluster.enabled_hypervisors]),
5397       "os_hvp": os_hvp,
5398       "beparams": cluster.beparams,
5399       "osparams": cluster.osparams,
5400       "nicparams": cluster.nicparams,
5401       "ndparams": cluster.ndparams,
5402       "candidate_pool_size": cluster.candidate_pool_size,
5403       "master_netdev": cluster.master_netdev,
5404       "volume_group_name": cluster.volume_group_name,
5405       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5406       "file_storage_dir": cluster.file_storage_dir,
5407       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5408       "maintain_node_health": cluster.maintain_node_health,
5409       "ctime": cluster.ctime,
5410       "mtime": cluster.mtime,
5411       "uuid": cluster.uuid,
5412       "tags": list(cluster.GetTags()),
5413       "uid_pool": cluster.uid_pool,
5414       "default_iallocator": cluster.default_iallocator,
5415       "reserved_lvs": cluster.reserved_lvs,
5416       "primary_ip_version": primary_ip_version,
5417       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5418       "hidden_os": cluster.hidden_os,
5419       "blacklisted_os": cluster.blacklisted_os,
5420       }
5421
5422     return result
5423
5424
5425 class LUClusterConfigQuery(NoHooksLU):
5426   """Return configuration values.
5427
5428   """
5429   REQ_BGL = False
5430   _FIELDS_DYNAMIC = utils.FieldSet()
5431   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5432                                   "watcher_pause", "volume_group_name")
5433
5434   def CheckArguments(self):
5435     _CheckOutputFields(static=self._FIELDS_STATIC,
5436                        dynamic=self._FIELDS_DYNAMIC,
5437                        selected=self.op.output_fields)
5438
5439   def ExpandNames(self):
5440     self.needed_locks = {}
5441
5442   def Exec(self, feedback_fn):
5443     """Dump a representation of the cluster config to the standard output.
5444
5445     """
5446     values = []
5447     for field in self.op.output_fields:
5448       if field == "cluster_name":
5449         entry = self.cfg.GetClusterName()
5450       elif field == "master_node":
5451         entry = self.cfg.GetMasterNode()
5452       elif field == "drain_flag":
5453         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5454       elif field == "watcher_pause":
5455         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5456       elif field == "volume_group_name":
5457         entry = self.cfg.GetVGName()
5458       else:
5459         raise errors.ParameterError(field)
5460       values.append(entry)
5461     return values
5462
5463
5464 class LUInstanceActivateDisks(NoHooksLU):
5465   """Bring up an instance's disks.
5466
5467   """
5468   REQ_BGL = False
5469
5470   def ExpandNames(self):
5471     self._ExpandAndLockInstance()
5472     self.needed_locks[locking.LEVEL_NODE] = []
5473     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5474
5475   def DeclareLocks(self, level):
5476     if level == locking.LEVEL_NODE:
5477       self._LockInstancesNodes()
5478
5479   def CheckPrereq(self):
5480     """Check prerequisites.
5481
5482     This checks that the instance is in the cluster.
5483
5484     """
5485     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5486     assert self.instance is not None, \
5487       "Cannot retrieve locked instance %s" % self.op.instance_name
5488     _CheckNodeOnline(self, self.instance.primary_node)
5489
5490   def Exec(self, feedback_fn):
5491     """Activate the disks.
5492
5493     """
5494     disks_ok, disks_info = \
5495               _AssembleInstanceDisks(self, self.instance,
5496                                      ignore_size=self.op.ignore_size)
5497     if not disks_ok:
5498       raise errors.OpExecError("Cannot activate block devices")
5499
5500     return disks_info
5501
5502
5503 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5504                            ignore_size=False):
5505   """Prepare the block devices for an instance.
5506
5507   This sets up the block devices on all nodes.
5508
5509   @type lu: L{LogicalUnit}
5510   @param lu: the logical unit on whose behalf we execute
5511   @type instance: L{objects.Instance}
5512   @param instance: the instance for whose disks we assemble
5513   @type disks: list of L{objects.Disk} or None
5514   @param disks: which disks to assemble (or all, if None)
5515   @type ignore_secondaries: boolean
5516   @param ignore_secondaries: if true, errors on secondary nodes
5517       won't result in an error return from the function
5518   @type ignore_size: boolean
5519   @param ignore_size: if true, the current known size of the disk
5520       will not be used during the disk activation, useful for cases
5521       when the size is wrong
5522   @return: False if the operation failed, otherwise a list of
5523       (host, instance_visible_name, node_visible_name)
5524       with the mapping from node devices to instance devices
5525
5526   """
5527   device_info = []
5528   disks_ok = True
5529   iname = instance.name
5530   disks = _ExpandCheckDisks(instance, disks)
5531
5532   # With the two passes mechanism we try to reduce the window of
5533   # opportunity for the race condition of switching DRBD to primary
5534   # before handshaking occured, but we do not eliminate it
5535
5536   # The proper fix would be to wait (with some limits) until the
5537   # connection has been made and drbd transitions from WFConnection
5538   # into any other network-connected state (Connected, SyncTarget,
5539   # SyncSource, etc.)
5540
5541   # 1st pass, assemble on all nodes in secondary mode
5542   for idx, inst_disk in enumerate(disks):
5543     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5544       if ignore_size:
5545         node_disk = node_disk.Copy()
5546         node_disk.UnsetSize()
5547       lu.cfg.SetDiskID(node_disk, node)
5548       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5549       msg = result.fail_msg
5550       if msg:
5551         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5552                            " (is_primary=False, pass=1): %s",
5553                            inst_disk.iv_name, node, msg)
5554         if not ignore_secondaries:
5555           disks_ok = False
5556
5557   # FIXME: race condition on drbd migration to primary
5558
5559   # 2nd pass, do only the primary node
5560   for idx, inst_disk in enumerate(disks):
5561     dev_path = None
5562
5563     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5564       if node != instance.primary_node:
5565         continue
5566       if ignore_size:
5567         node_disk = node_disk.Copy()
5568         node_disk.UnsetSize()
5569       lu.cfg.SetDiskID(node_disk, node)
5570       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5571       msg = result.fail_msg
5572       if msg:
5573         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5574                            " (is_primary=True, pass=2): %s",
5575                            inst_disk.iv_name, node, msg)
5576         disks_ok = False
5577       else:
5578         dev_path = result.payload
5579
5580     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5581
5582   # leave the disks configured for the primary node
5583   # this is a workaround that would be fixed better by
5584   # improving the logical/physical id handling
5585   for disk in disks:
5586     lu.cfg.SetDiskID(disk, instance.primary_node)
5587
5588   return disks_ok, device_info
5589
5590
5591 def _StartInstanceDisks(lu, instance, force):
5592   """Start the disks of an instance.
5593
5594   """
5595   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5596                                            ignore_secondaries=force)
5597   if not disks_ok:
5598     _ShutdownInstanceDisks(lu, instance)
5599     if force is not None and not force:
5600       lu.proc.LogWarning("", hint="If the message above refers to a"
5601                          " secondary node,"
5602                          " you can retry the operation using '--force'.")
5603     raise errors.OpExecError("Disk consistency error")
5604
5605
5606 class LUInstanceDeactivateDisks(NoHooksLU):
5607   """Shutdown an instance's disks.
5608
5609   """
5610   REQ_BGL = False
5611
5612   def ExpandNames(self):
5613     self._ExpandAndLockInstance()
5614     self.needed_locks[locking.LEVEL_NODE] = []
5615     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5616
5617   def DeclareLocks(self, level):
5618     if level == locking.LEVEL_NODE:
5619       self._LockInstancesNodes()
5620
5621   def CheckPrereq(self):
5622     """Check prerequisites.
5623
5624     This checks that the instance is in the cluster.
5625
5626     """
5627     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5628     assert self.instance is not None, \
5629       "Cannot retrieve locked instance %s" % self.op.instance_name
5630
5631   def Exec(self, feedback_fn):
5632     """Deactivate the disks
5633
5634     """
5635     instance = self.instance
5636     if self.op.force:
5637       _ShutdownInstanceDisks(self, instance)
5638     else:
5639       _SafeShutdownInstanceDisks(self, instance)
5640
5641
5642 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5643   """Shutdown block devices of an instance.
5644
5645   This function checks if an instance is running, before calling
5646   _ShutdownInstanceDisks.
5647
5648   """
5649   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5650   _ShutdownInstanceDisks(lu, instance, disks=disks)
5651
5652
5653 def _ExpandCheckDisks(instance, disks):
5654   """Return the instance disks selected by the disks list
5655
5656   @type disks: list of L{objects.Disk} or None
5657   @param disks: selected disks
5658   @rtype: list of L{objects.Disk}
5659   @return: selected instance disks to act on
5660
5661   """
5662   if disks is None:
5663     return instance.disks
5664   else:
5665     if not set(disks).issubset(instance.disks):
5666       raise errors.ProgrammerError("Can only act on disks belonging to the"
5667                                    " target instance")
5668     return disks
5669
5670
5671 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5672   """Shutdown block devices of an instance.
5673
5674   This does the shutdown on all nodes of the instance.
5675
5676   If the ignore_primary is false, errors on the primary node are
5677   ignored.
5678
5679   """
5680   all_result = True
5681   disks = _ExpandCheckDisks(instance, disks)
5682
5683   for disk in disks:
5684     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5685       lu.cfg.SetDiskID(top_disk, node)
5686       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5687       msg = result.fail_msg
5688       if msg:
5689         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5690                       disk.iv_name, node, msg)
5691         if ((node == instance.primary_node and not ignore_primary) or
5692             (node != instance.primary_node and not result.offline)):
5693           all_result = False
5694   return all_result
5695
5696
5697 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5698   """Checks if a node has enough free memory.
5699
5700   This function check if a given node has the needed amount of free
5701   memory. In case the node has less memory or we cannot get the
5702   information from the node, this function raise an OpPrereqError
5703   exception.
5704
5705   @type lu: C{LogicalUnit}
5706   @param lu: a logical unit from which we get configuration data
5707   @type node: C{str}
5708   @param node: the node to check
5709   @type reason: C{str}
5710   @param reason: string to use in the error message
5711   @type requested: C{int}
5712   @param requested: the amount of memory in MiB to check for
5713   @type hypervisor_name: C{str}
5714   @param hypervisor_name: the hypervisor to ask for memory stats
5715   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5716       we cannot check the node
5717
5718   """
5719   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5720   nodeinfo[node].Raise("Can't get data from node %s" % node,
5721                        prereq=True, ecode=errors.ECODE_ENVIRON)
5722   free_mem = nodeinfo[node].payload.get("memory_free", None)
5723   if not isinstance(free_mem, int):
5724     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5725                                " was '%s'" % (node, free_mem),
5726                                errors.ECODE_ENVIRON)
5727   if requested > free_mem:
5728     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5729                                " needed %s MiB, available %s MiB" %
5730                                (node, reason, requested, free_mem),
5731                                errors.ECODE_NORES)
5732
5733
5734 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5735   """Checks if nodes have enough free disk space in the all VGs.
5736
5737   This function check if all given nodes have the needed amount of
5738   free disk. In case any node has less disk or we cannot get the
5739   information from the node, this function raise an OpPrereqError
5740   exception.
5741
5742   @type lu: C{LogicalUnit}
5743   @param lu: a logical unit from which we get configuration data
5744   @type nodenames: C{list}
5745   @param nodenames: the list of node names to check
5746   @type req_sizes: C{dict}
5747   @param req_sizes: the hash of vg and corresponding amount of disk in
5748       MiB to check for
5749   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5750       or we cannot check the node
5751
5752   """
5753   for vg, req_size in req_sizes.items():
5754     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5755
5756
5757 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5758   """Checks if nodes have enough free disk space in the specified VG.
5759
5760   This function check if all given nodes have the needed amount of
5761   free disk. In case any node has less disk or we cannot get the
5762   information from the node, this function raise an OpPrereqError
5763   exception.
5764
5765   @type lu: C{LogicalUnit}
5766   @param lu: a logical unit from which we get configuration data
5767   @type nodenames: C{list}
5768   @param nodenames: the list of node names to check
5769   @type vg: C{str}
5770   @param vg: the volume group to check
5771   @type requested: C{int}
5772   @param requested: the amount of disk in MiB to check for
5773   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5774       or we cannot check the node
5775
5776   """
5777   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5778   for node in nodenames:
5779     info = nodeinfo[node]
5780     info.Raise("Cannot get current information from node %s" % node,
5781                prereq=True, ecode=errors.ECODE_ENVIRON)
5782     vg_free = info.payload.get("vg_free", None)
5783     if not isinstance(vg_free, int):
5784       raise errors.OpPrereqError("Can't compute free disk space on node"
5785                                  " %s for vg %s, result was '%s'" %
5786                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5787     if requested > vg_free:
5788       raise errors.OpPrereqError("Not enough disk space on target node %s"
5789                                  " vg %s: required %d MiB, available %d MiB" %
5790                                  (node, vg, requested, vg_free),
5791                                  errors.ECODE_NORES)
5792
5793
5794 class LUInstanceStartup(LogicalUnit):
5795   """Starts an instance.
5796
5797   """
5798   HPATH = "instance-start"
5799   HTYPE = constants.HTYPE_INSTANCE
5800   REQ_BGL = False
5801
5802   def CheckArguments(self):
5803     # extra beparams
5804     if self.op.beparams:
5805       # fill the beparams dict
5806       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5807
5808   def ExpandNames(self):
5809     self._ExpandAndLockInstance()
5810
5811   def BuildHooksEnv(self):
5812     """Build hooks env.
5813
5814     This runs on master, primary and secondary nodes of the instance.
5815
5816     """
5817     env = {
5818       "FORCE": self.op.force,
5819       }
5820
5821     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5822
5823     return env
5824
5825   def BuildHooksNodes(self):
5826     """Build hooks nodes.
5827
5828     """
5829     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5830     return (nl, nl)
5831
5832   def CheckPrereq(self):
5833     """Check prerequisites.
5834
5835     This checks that the instance is in the cluster.
5836
5837     """
5838     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5839     assert self.instance is not None, \
5840       "Cannot retrieve locked instance %s" % self.op.instance_name
5841
5842     # extra hvparams
5843     if self.op.hvparams:
5844       # check hypervisor parameter syntax (locally)
5845       cluster = self.cfg.GetClusterInfo()
5846       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5847       filled_hvp = cluster.FillHV(instance)
5848       filled_hvp.update(self.op.hvparams)
5849       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5850       hv_type.CheckParameterSyntax(filled_hvp)
5851       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5852
5853     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5854
5855     if self.primary_offline and self.op.ignore_offline_nodes:
5856       self.proc.LogWarning("Ignoring offline primary node")
5857
5858       if self.op.hvparams or self.op.beparams:
5859         self.proc.LogWarning("Overridden parameters are ignored")
5860     else:
5861       _CheckNodeOnline(self, instance.primary_node)
5862
5863       bep = self.cfg.GetClusterInfo().FillBE(instance)
5864
5865       # check bridges existence
5866       _CheckInstanceBridgesExist(self, instance)
5867
5868       remote_info = self.rpc.call_instance_info(instance.primary_node,
5869                                                 instance.name,
5870                                                 instance.hypervisor)
5871       remote_info.Raise("Error checking node %s" % instance.primary_node,
5872                         prereq=True, ecode=errors.ECODE_ENVIRON)
5873       if not remote_info.payload: # not running already
5874         _CheckNodeFreeMemory(self, instance.primary_node,
5875                              "starting instance %s" % instance.name,
5876                              bep[constants.BE_MEMORY], instance.hypervisor)
5877
5878   def Exec(self, feedback_fn):
5879     """Start the instance.
5880
5881     """
5882     instance = self.instance
5883     force = self.op.force
5884
5885     if not self.op.no_remember:
5886       self.cfg.MarkInstanceUp(instance.name)
5887
5888     if self.primary_offline:
5889       assert self.op.ignore_offline_nodes
5890       self.proc.LogInfo("Primary node offline, marked instance as started")
5891     else:
5892       node_current = instance.primary_node
5893
5894       _StartInstanceDisks(self, instance, force)
5895
5896       result = self.rpc.call_instance_start(node_current, instance,
5897                                             self.op.hvparams, self.op.beparams,
5898                                             self.op.startup_paused)
5899       msg = result.fail_msg
5900       if msg:
5901         _ShutdownInstanceDisks(self, instance)
5902         raise errors.OpExecError("Could not start instance: %s" % msg)
5903
5904
5905 class LUInstanceReboot(LogicalUnit):
5906   """Reboot an instance.
5907
5908   """
5909   HPATH = "instance-reboot"
5910   HTYPE = constants.HTYPE_INSTANCE
5911   REQ_BGL = False
5912
5913   def ExpandNames(self):
5914     self._ExpandAndLockInstance()
5915
5916   def BuildHooksEnv(self):
5917     """Build hooks env.
5918
5919     This runs on master, primary and secondary nodes of the instance.
5920
5921     """
5922     env = {
5923       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5924       "REBOOT_TYPE": self.op.reboot_type,
5925       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5926       }
5927
5928     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5929
5930     return env
5931
5932   def BuildHooksNodes(self):
5933     """Build hooks nodes.
5934
5935     """
5936     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5937     return (nl, nl)
5938
5939   def CheckPrereq(self):
5940     """Check prerequisites.
5941
5942     This checks that the instance is in the cluster.
5943
5944     """
5945     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5946     assert self.instance is not None, \
5947       "Cannot retrieve locked instance %s" % self.op.instance_name
5948
5949     _CheckNodeOnline(self, instance.primary_node)
5950
5951     # check bridges existence
5952     _CheckInstanceBridgesExist(self, instance)
5953
5954   def Exec(self, feedback_fn):
5955     """Reboot the instance.
5956
5957     """
5958     instance = self.instance
5959     ignore_secondaries = self.op.ignore_secondaries
5960     reboot_type = self.op.reboot_type
5961
5962     remote_info = self.rpc.call_instance_info(instance.primary_node,
5963                                               instance.name,
5964                                               instance.hypervisor)
5965     remote_info.Raise("Error checking node %s" % instance.primary_node)
5966     instance_running = bool(remote_info.payload)
5967
5968     node_current = instance.primary_node
5969
5970     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5971                                             constants.INSTANCE_REBOOT_HARD]:
5972       for disk in instance.disks:
5973         self.cfg.SetDiskID(disk, node_current)
5974       result = self.rpc.call_instance_reboot(node_current, instance,
5975                                              reboot_type,
5976                                              self.op.shutdown_timeout)
5977       result.Raise("Could not reboot instance")
5978     else:
5979       if instance_running:
5980         result = self.rpc.call_instance_shutdown(node_current, instance,
5981                                                  self.op.shutdown_timeout)
5982         result.Raise("Could not shutdown instance for full reboot")
5983         _ShutdownInstanceDisks(self, instance)
5984       else:
5985         self.LogInfo("Instance %s was already stopped, starting now",
5986                      instance.name)
5987       _StartInstanceDisks(self, instance, ignore_secondaries)
5988       result = self.rpc.call_instance_start(node_current, instance,
5989                                             None, None, False)
5990       msg = result.fail_msg
5991       if msg:
5992         _ShutdownInstanceDisks(self, instance)
5993         raise errors.OpExecError("Could not start instance for"
5994                                  " full reboot: %s" % msg)
5995
5996     self.cfg.MarkInstanceUp(instance.name)
5997
5998
5999 class LUInstanceShutdown(LogicalUnit):
6000   """Shutdown an instance.
6001
6002   """
6003   HPATH = "instance-stop"
6004   HTYPE = constants.HTYPE_INSTANCE
6005   REQ_BGL = False
6006
6007   def ExpandNames(self):
6008     self._ExpandAndLockInstance()
6009
6010   def BuildHooksEnv(self):
6011     """Build hooks env.
6012
6013     This runs on master, primary and secondary nodes of the instance.
6014
6015     """
6016     env = _BuildInstanceHookEnvByObject(self, self.instance)
6017     env["TIMEOUT"] = self.op.timeout
6018     return env
6019
6020   def BuildHooksNodes(self):
6021     """Build hooks nodes.
6022
6023     """
6024     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6025     return (nl, nl)
6026
6027   def CheckPrereq(self):
6028     """Check prerequisites.
6029
6030     This checks that the instance is in the cluster.
6031
6032     """
6033     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6034     assert self.instance is not None, \
6035       "Cannot retrieve locked instance %s" % self.op.instance_name
6036
6037     self.primary_offline = \
6038       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6039
6040     if self.primary_offline and self.op.ignore_offline_nodes:
6041       self.proc.LogWarning("Ignoring offline primary node")
6042     else:
6043       _CheckNodeOnline(self, self.instance.primary_node)
6044
6045   def Exec(self, feedback_fn):
6046     """Shutdown the instance.
6047
6048     """
6049     instance = self.instance
6050     node_current = instance.primary_node
6051     timeout = self.op.timeout
6052
6053     if not self.op.no_remember:
6054       self.cfg.MarkInstanceDown(instance.name)
6055
6056     if self.primary_offline:
6057       assert self.op.ignore_offline_nodes
6058       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6059     else:
6060       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6061       msg = result.fail_msg
6062       if msg:
6063         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6064
6065       _ShutdownInstanceDisks(self, instance)
6066
6067
6068 class LUInstanceReinstall(LogicalUnit):
6069   """Reinstall an instance.
6070
6071   """
6072   HPATH = "instance-reinstall"
6073   HTYPE = constants.HTYPE_INSTANCE
6074   REQ_BGL = False
6075
6076   def ExpandNames(self):
6077     self._ExpandAndLockInstance()
6078
6079   def BuildHooksEnv(self):
6080     """Build hooks env.
6081
6082     This runs on master, primary and secondary nodes of the instance.
6083
6084     """
6085     return _BuildInstanceHookEnvByObject(self, self.instance)
6086
6087   def BuildHooksNodes(self):
6088     """Build hooks nodes.
6089
6090     """
6091     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6092     return (nl, nl)
6093
6094   def CheckPrereq(self):
6095     """Check prerequisites.
6096
6097     This checks that the instance is in the cluster and is not running.
6098
6099     """
6100     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6101     assert instance is not None, \
6102       "Cannot retrieve locked instance %s" % self.op.instance_name
6103     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6104                      " offline, cannot reinstall")
6105     for node in instance.secondary_nodes:
6106       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6107                        " cannot reinstall")
6108
6109     if instance.disk_template == constants.DT_DISKLESS:
6110       raise errors.OpPrereqError("Instance '%s' has no disks" %
6111                                  self.op.instance_name,
6112                                  errors.ECODE_INVAL)
6113     _CheckInstanceDown(self, instance, "cannot reinstall")
6114
6115     if self.op.os_type is not None:
6116       # OS verification
6117       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6118       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6119       instance_os = self.op.os_type
6120     else:
6121       instance_os = instance.os
6122
6123     nodelist = list(instance.all_nodes)
6124
6125     if self.op.osparams:
6126       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6127       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6128       self.os_inst = i_osdict # the new dict (without defaults)
6129     else:
6130       self.os_inst = None
6131
6132     self.instance = instance
6133
6134   def Exec(self, feedback_fn):
6135     """Reinstall the instance.
6136
6137     """
6138     inst = self.instance
6139
6140     if self.op.os_type is not None:
6141       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6142       inst.os = self.op.os_type
6143       # Write to configuration
6144       self.cfg.Update(inst, feedback_fn)
6145
6146     _StartInstanceDisks(self, inst, None)
6147     try:
6148       feedback_fn("Running the instance OS create scripts...")
6149       # FIXME: pass debug option from opcode to backend
6150       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6151                                              self.op.debug_level,
6152                                              osparams=self.os_inst)
6153       result.Raise("Could not install OS for instance %s on node %s" %
6154                    (inst.name, inst.primary_node))
6155     finally:
6156       _ShutdownInstanceDisks(self, inst)
6157
6158
6159 class LUInstanceRecreateDisks(LogicalUnit):
6160   """Recreate an instance's missing disks.
6161
6162   """
6163   HPATH = "instance-recreate-disks"
6164   HTYPE = constants.HTYPE_INSTANCE
6165   REQ_BGL = False
6166
6167   def CheckArguments(self):
6168     # normalise the disk list
6169     self.op.disks = sorted(frozenset(self.op.disks))
6170
6171   def ExpandNames(self):
6172     self._ExpandAndLockInstance()
6173     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6174     if self.op.nodes:
6175       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6176       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6177     else:
6178       self.needed_locks[locking.LEVEL_NODE] = []
6179
6180   def DeclareLocks(self, level):
6181     if level == locking.LEVEL_NODE:
6182       # if we replace the nodes, we only need to lock the old primary,
6183       # otherwise we need to lock all nodes for disk re-creation
6184       primary_only = bool(self.op.nodes)
6185       self._LockInstancesNodes(primary_only=primary_only)
6186
6187   def BuildHooksEnv(self):
6188     """Build hooks env.
6189
6190     This runs on master, primary and secondary nodes of the instance.
6191
6192     """
6193     return _BuildInstanceHookEnvByObject(self, self.instance)
6194
6195   def BuildHooksNodes(self):
6196     """Build hooks nodes.
6197
6198     """
6199     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6200     return (nl, nl)
6201
6202   def CheckPrereq(self):
6203     """Check prerequisites.
6204
6205     This checks that the instance is in the cluster and is not running.
6206
6207     """
6208     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6209     assert instance is not None, \
6210       "Cannot retrieve locked instance %s" % self.op.instance_name
6211     if self.op.nodes:
6212       if len(self.op.nodes) != len(instance.all_nodes):
6213         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6214                                    " %d replacement nodes were specified" %
6215                                    (instance.name, len(instance.all_nodes),
6216                                     len(self.op.nodes)),
6217                                    errors.ECODE_INVAL)
6218       assert instance.disk_template != constants.DT_DRBD8 or \
6219           len(self.op.nodes) == 2
6220       assert instance.disk_template != constants.DT_PLAIN or \
6221           len(self.op.nodes) == 1
6222       primary_node = self.op.nodes[0]
6223     else:
6224       primary_node = instance.primary_node
6225     _CheckNodeOnline(self, primary_node)
6226
6227     if instance.disk_template == constants.DT_DISKLESS:
6228       raise errors.OpPrereqError("Instance '%s' has no disks" %
6229                                  self.op.instance_name, errors.ECODE_INVAL)
6230     # if we replace nodes *and* the old primary is offline, we don't
6231     # check
6232     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6233     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6234     if not (self.op.nodes and old_pnode.offline):
6235       _CheckInstanceDown(self, instance, "cannot recreate disks")
6236
6237     if not self.op.disks:
6238       self.op.disks = range(len(instance.disks))
6239     else:
6240       for idx in self.op.disks:
6241         if idx >= len(instance.disks):
6242           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6243                                      errors.ECODE_INVAL)
6244     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6245       raise errors.OpPrereqError("Can't recreate disks partially and"
6246                                  " change the nodes at the same time",
6247                                  errors.ECODE_INVAL)
6248     self.instance = instance
6249
6250   def Exec(self, feedback_fn):
6251     """Recreate the disks.
6252
6253     """
6254     instance = self.instance
6255
6256     to_skip = []
6257     mods = [] # keeps track of needed logical_id changes
6258
6259     for idx, disk in enumerate(instance.disks):
6260       if idx not in self.op.disks: # disk idx has not been passed in
6261         to_skip.append(idx)
6262         continue
6263       # update secondaries for disks, if needed
6264       if self.op.nodes:
6265         if disk.dev_type == constants.LD_DRBD8:
6266           # need to update the nodes and minors
6267           assert len(self.op.nodes) == 2
6268           assert len(disk.logical_id) == 6 # otherwise disk internals
6269                                            # have changed
6270           (_, _, old_port, _, _, old_secret) = disk.logical_id
6271           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6272           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6273                     new_minors[0], new_minors[1], old_secret)
6274           assert len(disk.logical_id) == len(new_id)
6275           mods.append((idx, new_id))
6276
6277     # now that we have passed all asserts above, we can apply the mods
6278     # in a single run (to avoid partial changes)
6279     for idx, new_id in mods:
6280       instance.disks[idx].logical_id = new_id
6281
6282     # change primary node, if needed
6283     if self.op.nodes:
6284       instance.primary_node = self.op.nodes[0]
6285       self.LogWarning("Changing the instance's nodes, you will have to"
6286                       " remove any disks left on the older nodes manually")
6287
6288     if self.op.nodes:
6289       self.cfg.Update(instance, feedback_fn)
6290
6291     _CreateDisks(self, instance, to_skip=to_skip)
6292
6293
6294 class LUInstanceRename(LogicalUnit):
6295   """Rename an instance.
6296
6297   """
6298   HPATH = "instance-rename"
6299   HTYPE = constants.HTYPE_INSTANCE
6300
6301   def CheckArguments(self):
6302     """Check arguments.
6303
6304     """
6305     if self.op.ip_check and not self.op.name_check:
6306       # TODO: make the ip check more flexible and not depend on the name check
6307       raise errors.OpPrereqError("IP address check requires a name check",
6308                                  errors.ECODE_INVAL)
6309
6310   def BuildHooksEnv(self):
6311     """Build hooks env.
6312
6313     This runs on master, primary and secondary nodes of the instance.
6314
6315     """
6316     env = _BuildInstanceHookEnvByObject(self, self.instance)
6317     env["INSTANCE_NEW_NAME"] = self.op.new_name
6318     return env
6319
6320   def BuildHooksNodes(self):
6321     """Build hooks nodes.
6322
6323     """
6324     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6325     return (nl, nl)
6326
6327   def CheckPrereq(self):
6328     """Check prerequisites.
6329
6330     This checks that the instance is in the cluster and is not running.
6331
6332     """
6333     self.op.instance_name = _ExpandInstanceName(self.cfg,
6334                                                 self.op.instance_name)
6335     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6336     assert instance is not None
6337     _CheckNodeOnline(self, instance.primary_node)
6338     _CheckInstanceDown(self, instance, "cannot rename")
6339     self.instance = instance
6340
6341     new_name = self.op.new_name
6342     if self.op.name_check:
6343       hostname = netutils.GetHostname(name=new_name)
6344       if hostname != new_name:
6345         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6346                      hostname.name)
6347       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6348         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6349                                     " same as given hostname '%s'") %
6350                                     (hostname.name, self.op.new_name),
6351                                     errors.ECODE_INVAL)
6352       new_name = self.op.new_name = hostname.name
6353       if (self.op.ip_check and
6354           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6355         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6356                                    (hostname.ip, new_name),
6357                                    errors.ECODE_NOTUNIQUE)
6358
6359     instance_list = self.cfg.GetInstanceList()
6360     if new_name in instance_list and new_name != instance.name:
6361       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6362                                  new_name, errors.ECODE_EXISTS)
6363
6364   def Exec(self, feedback_fn):
6365     """Rename the instance.
6366
6367     """
6368     inst = self.instance
6369     old_name = inst.name
6370
6371     rename_file_storage = False
6372     if (inst.disk_template in constants.DTS_FILEBASED and
6373         self.op.new_name != inst.name):
6374       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6375       rename_file_storage = True
6376
6377     self.cfg.RenameInstance(inst.name, self.op.new_name)
6378     # Change the instance lock. This is definitely safe while we hold the BGL.
6379     # Otherwise the new lock would have to be added in acquired mode.
6380     assert self.REQ_BGL
6381     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6382     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6383
6384     # re-read the instance from the configuration after rename
6385     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6386
6387     if rename_file_storage:
6388       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6389       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6390                                                      old_file_storage_dir,
6391                                                      new_file_storage_dir)
6392       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6393                    " (but the instance has been renamed in Ganeti)" %
6394                    (inst.primary_node, old_file_storage_dir,
6395                     new_file_storage_dir))
6396
6397     _StartInstanceDisks(self, inst, None)
6398     try:
6399       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6400                                                  old_name, self.op.debug_level)
6401       msg = result.fail_msg
6402       if msg:
6403         msg = ("Could not run OS rename script for instance %s on node %s"
6404                " (but the instance has been renamed in Ganeti): %s" %
6405                (inst.name, inst.primary_node, msg))
6406         self.proc.LogWarning(msg)
6407     finally:
6408       _ShutdownInstanceDisks(self, inst)
6409
6410     return inst.name
6411
6412
6413 class LUInstanceRemove(LogicalUnit):
6414   """Remove an instance.
6415
6416   """
6417   HPATH = "instance-remove"
6418   HTYPE = constants.HTYPE_INSTANCE
6419   REQ_BGL = False
6420
6421   def ExpandNames(self):
6422     self._ExpandAndLockInstance()
6423     self.needed_locks[locking.LEVEL_NODE] = []
6424     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6425
6426   def DeclareLocks(self, level):
6427     if level == locking.LEVEL_NODE:
6428       self._LockInstancesNodes()
6429
6430   def BuildHooksEnv(self):
6431     """Build hooks env.
6432
6433     This runs on master, primary and secondary nodes of the instance.
6434
6435     """
6436     env = _BuildInstanceHookEnvByObject(self, self.instance)
6437     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6438     return env
6439
6440   def BuildHooksNodes(self):
6441     """Build hooks nodes.
6442
6443     """
6444     nl = [self.cfg.GetMasterNode()]
6445     nl_post = list(self.instance.all_nodes) + nl
6446     return (nl, nl_post)
6447
6448   def CheckPrereq(self):
6449     """Check prerequisites.
6450
6451     This checks that the instance is in the cluster.
6452
6453     """
6454     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6455     assert self.instance is not None, \
6456       "Cannot retrieve locked instance %s" % self.op.instance_name
6457
6458   def Exec(self, feedback_fn):
6459     """Remove the instance.
6460
6461     """
6462     instance = self.instance
6463     logging.info("Shutting down instance %s on node %s",
6464                  instance.name, instance.primary_node)
6465
6466     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6467                                              self.op.shutdown_timeout)
6468     msg = result.fail_msg
6469     if msg:
6470       if self.op.ignore_failures:
6471         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6472       else:
6473         raise errors.OpExecError("Could not shutdown instance %s on"
6474                                  " node %s: %s" %
6475                                  (instance.name, instance.primary_node, msg))
6476
6477     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6478
6479
6480 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6481   """Utility function to remove an instance.
6482
6483   """
6484   logging.info("Removing block devices for instance %s", instance.name)
6485
6486   if not _RemoveDisks(lu, instance):
6487     if not ignore_failures:
6488       raise errors.OpExecError("Can't remove instance's disks")
6489     feedback_fn("Warning: can't remove instance's disks")
6490
6491   logging.info("Removing instance %s out of cluster config", instance.name)
6492
6493   lu.cfg.RemoveInstance(instance.name)
6494
6495   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6496     "Instance lock removal conflict"
6497
6498   # Remove lock for the instance
6499   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6500
6501
6502 class LUInstanceQuery(NoHooksLU):
6503   """Logical unit for querying instances.
6504
6505   """
6506   # pylint: disable-msg=W0142
6507   REQ_BGL = False
6508
6509   def CheckArguments(self):
6510     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6511                              self.op.output_fields, self.op.use_locking)
6512
6513   def ExpandNames(self):
6514     self.iq.ExpandNames(self)
6515
6516   def DeclareLocks(self, level):
6517     self.iq.DeclareLocks(self, level)
6518
6519   def Exec(self, feedback_fn):
6520     return self.iq.OldStyleQuery(self)
6521
6522
6523 class LUInstanceFailover(LogicalUnit):
6524   """Failover an instance.
6525
6526   """
6527   HPATH = "instance-failover"
6528   HTYPE = constants.HTYPE_INSTANCE
6529   REQ_BGL = False
6530
6531   def CheckArguments(self):
6532     """Check the arguments.
6533
6534     """
6535     self.iallocator = getattr(self.op, "iallocator", None)
6536     self.target_node = getattr(self.op, "target_node", None)
6537
6538   def ExpandNames(self):
6539     self._ExpandAndLockInstance()
6540
6541     if self.op.target_node is not None:
6542       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6543
6544     self.needed_locks[locking.LEVEL_NODE] = []
6545     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6546
6547     ignore_consistency = self.op.ignore_consistency
6548     shutdown_timeout = self.op.shutdown_timeout
6549     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6550                                        cleanup=False,
6551                                        failover=True,
6552                                        ignore_consistency=ignore_consistency,
6553                                        shutdown_timeout=shutdown_timeout)
6554     self.tasklets = [self._migrater]
6555
6556   def DeclareLocks(self, level):
6557     if level == locking.LEVEL_NODE:
6558       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6559       if instance.disk_template in constants.DTS_EXT_MIRROR:
6560         if self.op.target_node is None:
6561           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6562         else:
6563           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6564                                                    self.op.target_node]
6565         del self.recalculate_locks[locking.LEVEL_NODE]
6566       else:
6567         self._LockInstancesNodes()
6568
6569   def BuildHooksEnv(self):
6570     """Build hooks env.
6571
6572     This runs on master, primary and secondary nodes of the instance.
6573
6574     """
6575     instance = self._migrater.instance
6576     source_node = instance.primary_node
6577     target_node = self.op.target_node
6578     env = {
6579       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6580       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6581       "OLD_PRIMARY": source_node,
6582       "NEW_PRIMARY": target_node,
6583       }
6584
6585     if instance.disk_template in constants.DTS_INT_MIRROR:
6586       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6587       env["NEW_SECONDARY"] = source_node
6588     else:
6589       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6590
6591     env.update(_BuildInstanceHookEnvByObject(self, instance))
6592
6593     return env
6594
6595   def BuildHooksNodes(self):
6596     """Build hooks nodes.
6597
6598     """
6599     instance = self._migrater.instance
6600     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6601     return (nl, nl + [instance.primary_node])
6602
6603
6604 class LUInstanceMigrate(LogicalUnit):
6605   """Migrate an instance.
6606
6607   This is migration without shutting down, compared to the failover,
6608   which is done with shutdown.
6609
6610   """
6611   HPATH = "instance-migrate"
6612   HTYPE = constants.HTYPE_INSTANCE
6613   REQ_BGL = False
6614
6615   def ExpandNames(self):
6616     self._ExpandAndLockInstance()
6617
6618     if self.op.target_node is not None:
6619       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6620
6621     self.needed_locks[locking.LEVEL_NODE] = []
6622     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6623
6624     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6625                                        cleanup=self.op.cleanup,
6626                                        failover=False,
6627                                        fallback=self.op.allow_failover)
6628     self.tasklets = [self._migrater]
6629
6630   def DeclareLocks(self, level):
6631     if level == locking.LEVEL_NODE:
6632       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6633       if instance.disk_template in constants.DTS_EXT_MIRROR:
6634         if self.op.target_node is None:
6635           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6636         else:
6637           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6638                                                    self.op.target_node]
6639         del self.recalculate_locks[locking.LEVEL_NODE]
6640       else:
6641         self._LockInstancesNodes()
6642
6643   def BuildHooksEnv(self):
6644     """Build hooks env.
6645
6646     This runs on master, primary and secondary nodes of the instance.
6647
6648     """
6649     instance = self._migrater.instance
6650     source_node = instance.primary_node
6651     target_node = self.op.target_node
6652     env = _BuildInstanceHookEnvByObject(self, instance)
6653     env.update({
6654       "MIGRATE_LIVE": self._migrater.live,
6655       "MIGRATE_CLEANUP": self.op.cleanup,
6656       "OLD_PRIMARY": source_node,
6657       "NEW_PRIMARY": target_node,
6658       })
6659
6660     if instance.disk_template in constants.DTS_INT_MIRROR:
6661       env["OLD_SECONDARY"] = target_node
6662       env["NEW_SECONDARY"] = source_node
6663     else:
6664       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6665
6666     return env
6667
6668   def BuildHooksNodes(self):
6669     """Build hooks nodes.
6670
6671     """
6672     instance = self._migrater.instance
6673     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6674     return (nl, nl + [instance.primary_node])
6675
6676
6677 class LUInstanceMove(LogicalUnit):
6678   """Move an instance by data-copying.
6679
6680   """
6681   HPATH = "instance-move"
6682   HTYPE = constants.HTYPE_INSTANCE
6683   REQ_BGL = False
6684
6685   def ExpandNames(self):
6686     self._ExpandAndLockInstance()
6687     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6688     self.op.target_node = target_node
6689     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6690     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6691
6692   def DeclareLocks(self, level):
6693     if level == locking.LEVEL_NODE:
6694       self._LockInstancesNodes(primary_only=True)
6695
6696   def BuildHooksEnv(self):
6697     """Build hooks env.
6698
6699     This runs on master, primary and secondary nodes of the instance.
6700
6701     """
6702     env = {
6703       "TARGET_NODE": self.op.target_node,
6704       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6705       }
6706     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6707     return env
6708
6709   def BuildHooksNodes(self):
6710     """Build hooks nodes.
6711
6712     """
6713     nl = [
6714       self.cfg.GetMasterNode(),
6715       self.instance.primary_node,
6716       self.op.target_node,
6717       ]
6718     return (nl, nl)
6719
6720   def CheckPrereq(self):
6721     """Check prerequisites.
6722
6723     This checks that the instance is in the cluster.
6724
6725     """
6726     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6727     assert self.instance is not None, \
6728       "Cannot retrieve locked instance %s" % self.op.instance_name
6729
6730     node = self.cfg.GetNodeInfo(self.op.target_node)
6731     assert node is not None, \
6732       "Cannot retrieve locked node %s" % self.op.target_node
6733
6734     self.target_node = target_node = node.name
6735
6736     if target_node == instance.primary_node:
6737       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6738                                  (instance.name, target_node),
6739                                  errors.ECODE_STATE)
6740
6741     bep = self.cfg.GetClusterInfo().FillBE(instance)
6742
6743     for idx, dsk in enumerate(instance.disks):
6744       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6745         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6746                                    " cannot copy" % idx, errors.ECODE_STATE)
6747
6748     _CheckNodeOnline(self, target_node)
6749     _CheckNodeNotDrained(self, target_node)
6750     _CheckNodeVmCapable(self, target_node)
6751
6752     if instance.admin_up:
6753       # check memory requirements on the secondary node
6754       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6755                            instance.name, bep[constants.BE_MEMORY],
6756                            instance.hypervisor)
6757     else:
6758       self.LogInfo("Not checking memory on the secondary node as"
6759                    " instance will not be started")
6760
6761     # check bridge existance
6762     _CheckInstanceBridgesExist(self, instance, node=target_node)
6763
6764   def Exec(self, feedback_fn):
6765     """Move an instance.
6766
6767     The move is done by shutting it down on its present node, copying
6768     the data over (slow) and starting it on the new node.
6769
6770     """
6771     instance = self.instance
6772
6773     source_node = instance.primary_node
6774     target_node = self.target_node
6775
6776     self.LogInfo("Shutting down instance %s on source node %s",
6777                  instance.name, source_node)
6778
6779     result = self.rpc.call_instance_shutdown(source_node, instance,
6780                                              self.op.shutdown_timeout)
6781     msg = result.fail_msg
6782     if msg:
6783       if self.op.ignore_consistency:
6784         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6785                              " Proceeding anyway. Please make sure node"
6786                              " %s is down. Error details: %s",
6787                              instance.name, source_node, source_node, msg)
6788       else:
6789         raise errors.OpExecError("Could not shutdown instance %s on"
6790                                  " node %s: %s" %
6791                                  (instance.name, source_node, msg))
6792
6793     # create the target disks
6794     try:
6795       _CreateDisks(self, instance, target_node=target_node)
6796     except errors.OpExecError:
6797       self.LogWarning("Device creation failed, reverting...")
6798       try:
6799         _RemoveDisks(self, instance, target_node=target_node)
6800       finally:
6801         self.cfg.ReleaseDRBDMinors(instance.name)
6802         raise
6803
6804     cluster_name = self.cfg.GetClusterInfo().cluster_name
6805
6806     errs = []
6807     # activate, get path, copy the data over
6808     for idx, disk in enumerate(instance.disks):
6809       self.LogInfo("Copying data for disk %d", idx)
6810       result = self.rpc.call_blockdev_assemble(target_node, disk,
6811                                                instance.name, True, idx)
6812       if result.fail_msg:
6813         self.LogWarning("Can't assemble newly created disk %d: %s",
6814                         idx, result.fail_msg)
6815         errs.append(result.fail_msg)
6816         break
6817       dev_path = result.payload
6818       result = self.rpc.call_blockdev_export(source_node, disk,
6819                                              target_node, dev_path,
6820                                              cluster_name)
6821       if result.fail_msg:
6822         self.LogWarning("Can't copy data over for disk %d: %s",
6823                         idx, result.fail_msg)
6824         errs.append(result.fail_msg)
6825         break
6826
6827     if errs:
6828       self.LogWarning("Some disks failed to copy, aborting")
6829       try:
6830         _RemoveDisks(self, instance, target_node=target_node)
6831       finally:
6832         self.cfg.ReleaseDRBDMinors(instance.name)
6833         raise errors.OpExecError("Errors during disk copy: %s" %
6834                                  (",".join(errs),))
6835
6836     instance.primary_node = target_node
6837     self.cfg.Update(instance, feedback_fn)
6838
6839     self.LogInfo("Removing the disks on the original node")
6840     _RemoveDisks(self, instance, target_node=source_node)
6841
6842     # Only start the instance if it's marked as up
6843     if instance.admin_up:
6844       self.LogInfo("Starting instance %s on node %s",
6845                    instance.name, target_node)
6846
6847       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6848                                            ignore_secondaries=True)
6849       if not disks_ok:
6850         _ShutdownInstanceDisks(self, instance)
6851         raise errors.OpExecError("Can't activate the instance's disks")
6852
6853       result = self.rpc.call_instance_start(target_node, instance,
6854                                             None, None, False)
6855       msg = result.fail_msg
6856       if msg:
6857         _ShutdownInstanceDisks(self, instance)
6858         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6859                                  (instance.name, target_node, msg))
6860
6861
6862 class LUNodeMigrate(LogicalUnit):
6863   """Migrate all instances from a node.
6864
6865   """
6866   HPATH = "node-migrate"
6867   HTYPE = constants.HTYPE_NODE
6868   REQ_BGL = False
6869
6870   def CheckArguments(self):
6871     pass
6872
6873   def ExpandNames(self):
6874     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6875
6876     self.share_locks = _ShareAll()
6877     self.needed_locks = {
6878       locking.LEVEL_NODE: [self.op.node_name],
6879       }
6880
6881   def BuildHooksEnv(self):
6882     """Build hooks env.
6883
6884     This runs on the master, the primary and all the secondaries.
6885
6886     """
6887     return {
6888       "NODE_NAME": self.op.node_name,
6889       }
6890
6891   def BuildHooksNodes(self):
6892     """Build hooks nodes.
6893
6894     """
6895     nl = [self.cfg.GetMasterNode()]
6896     return (nl, nl)
6897
6898   def CheckPrereq(self):
6899     pass
6900
6901   def Exec(self, feedback_fn):
6902     # Prepare jobs for migration instances
6903     jobs = [
6904       [opcodes.OpInstanceMigrate(instance_name=inst.name,
6905                                  mode=self.op.mode,
6906                                  live=self.op.live,
6907                                  iallocator=self.op.iallocator,
6908                                  target_node=self.op.target_node)]
6909       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6910       ]
6911
6912     # TODO: Run iallocator in this opcode and pass correct placement options to
6913     # OpInstanceMigrate. Since other jobs can modify the cluster between
6914     # running the iallocator and the actual migration, a good consistency model
6915     # will have to be found.
6916
6917     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
6918             frozenset([self.op.node_name]))
6919
6920     return ResultWithJobs(jobs)
6921
6922
6923 class TLMigrateInstance(Tasklet):
6924   """Tasklet class for instance migration.
6925
6926   @type live: boolean
6927   @ivar live: whether the migration will be done live or non-live;
6928       this variable is initalized only after CheckPrereq has run
6929   @type cleanup: boolean
6930   @ivar cleanup: Wheater we cleanup from a failed migration
6931   @type iallocator: string
6932   @ivar iallocator: The iallocator used to determine target_node
6933   @type target_node: string
6934   @ivar target_node: If given, the target_node to reallocate the instance to
6935   @type failover: boolean
6936   @ivar failover: Whether operation results in failover or migration
6937   @type fallback: boolean
6938   @ivar fallback: Whether fallback to failover is allowed if migration not
6939                   possible
6940   @type ignore_consistency: boolean
6941   @ivar ignore_consistency: Wheter we should ignore consistency between source
6942                             and target node
6943   @type shutdown_timeout: int
6944   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6945
6946   """
6947   def __init__(self, lu, instance_name, cleanup=False,
6948                failover=False, fallback=False,
6949                ignore_consistency=False,
6950                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6951     """Initializes this class.
6952
6953     """
6954     Tasklet.__init__(self, lu)
6955
6956     # Parameters
6957     self.instance_name = instance_name
6958     self.cleanup = cleanup
6959     self.live = False # will be overridden later
6960     self.failover = failover
6961     self.fallback = fallback
6962     self.ignore_consistency = ignore_consistency
6963     self.shutdown_timeout = shutdown_timeout
6964
6965   def CheckPrereq(self):
6966     """Check prerequisites.
6967
6968     This checks that the instance is in the cluster.
6969
6970     """
6971     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6972     instance = self.cfg.GetInstanceInfo(instance_name)
6973     assert instance is not None
6974     self.instance = instance
6975
6976     if (not self.cleanup and not instance.admin_up and not self.failover and
6977         self.fallback):
6978       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6979                       " to failover")
6980       self.failover = True
6981
6982     if instance.disk_template not in constants.DTS_MIRRORED:
6983       if self.failover:
6984         text = "failovers"
6985       else:
6986         text = "migrations"
6987       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6988                                  " %s" % (instance.disk_template, text),
6989                                  errors.ECODE_STATE)
6990
6991     if instance.disk_template in constants.DTS_EXT_MIRROR:
6992       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6993
6994       if self.lu.op.iallocator:
6995         self._RunAllocator()
6996       else:
6997         # We set set self.target_node as it is required by
6998         # BuildHooksEnv
6999         self.target_node = self.lu.op.target_node
7000
7001       # self.target_node is already populated, either directly or by the
7002       # iallocator run
7003       target_node = self.target_node
7004       if self.target_node == instance.primary_node:
7005         raise errors.OpPrereqError("Cannot migrate instance %s"
7006                                    " to its primary (%s)" %
7007                                    (instance.name, instance.primary_node))
7008
7009       if len(self.lu.tasklets) == 1:
7010         # It is safe to release locks only when we're the only tasklet
7011         # in the LU
7012         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7013                       keep=[instance.primary_node, self.target_node])
7014
7015     else:
7016       secondary_nodes = instance.secondary_nodes
7017       if not secondary_nodes:
7018         raise errors.ConfigurationError("No secondary node but using"
7019                                         " %s disk template" %
7020                                         instance.disk_template)
7021       target_node = secondary_nodes[0]
7022       if self.lu.op.iallocator or (self.lu.op.target_node and
7023                                    self.lu.op.target_node != target_node):
7024         if self.failover:
7025           text = "failed over"
7026         else:
7027           text = "migrated"
7028         raise errors.OpPrereqError("Instances with disk template %s cannot"
7029                                    " be %s to arbitrary nodes"
7030                                    " (neither an iallocator nor a target"
7031                                    " node can be passed)" %
7032                                    (instance.disk_template, text),
7033                                    errors.ECODE_INVAL)
7034
7035     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7036
7037     # check memory requirements on the secondary node
7038     if not self.failover or instance.admin_up:
7039       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7040                            instance.name, i_be[constants.BE_MEMORY],
7041                            instance.hypervisor)
7042     else:
7043       self.lu.LogInfo("Not checking memory on the secondary node as"
7044                       " instance will not be started")
7045
7046     # check bridge existance
7047     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7048
7049     if not self.cleanup:
7050       _CheckNodeNotDrained(self.lu, target_node)
7051       if not self.failover:
7052         result = self.rpc.call_instance_migratable(instance.primary_node,
7053                                                    instance)
7054         if result.fail_msg and self.fallback:
7055           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7056                           " failover")
7057           self.failover = True
7058         else:
7059           result.Raise("Can't migrate, please use failover",
7060                        prereq=True, ecode=errors.ECODE_STATE)
7061
7062     assert not (self.failover and self.cleanup)
7063
7064     if not self.failover:
7065       if self.lu.op.live is not None and self.lu.op.mode is not None:
7066         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7067                                    " parameters are accepted",
7068                                    errors.ECODE_INVAL)
7069       if self.lu.op.live is not None:
7070         if self.lu.op.live:
7071           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7072         else:
7073           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7074         # reset the 'live' parameter to None so that repeated
7075         # invocations of CheckPrereq do not raise an exception
7076         self.lu.op.live = None
7077       elif self.lu.op.mode is None:
7078         # read the default value from the hypervisor
7079         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7080                                                 skip_globals=False)
7081         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7082
7083       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7084     else:
7085       # Failover is never live
7086       self.live = False
7087
7088   def _RunAllocator(self):
7089     """Run the allocator based on input opcode.
7090
7091     """
7092     ial = IAllocator(self.cfg, self.rpc,
7093                      mode=constants.IALLOCATOR_MODE_RELOC,
7094                      name=self.instance_name,
7095                      # TODO See why hail breaks with a single node below
7096                      relocate_from=[self.instance.primary_node,
7097                                     self.instance.primary_node],
7098                      )
7099
7100     ial.Run(self.lu.op.iallocator)
7101
7102     if not ial.success:
7103       raise errors.OpPrereqError("Can't compute nodes using"
7104                                  " iallocator '%s': %s" %
7105                                  (self.lu.op.iallocator, ial.info),
7106                                  errors.ECODE_NORES)
7107     if len(ial.result) != ial.required_nodes:
7108       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7109                                  " of nodes (%s), required %s" %
7110                                  (self.lu.op.iallocator, len(ial.result),
7111                                   ial.required_nodes), errors.ECODE_FAULT)
7112     self.target_node = ial.result[0]
7113     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7114                  self.instance_name, self.lu.op.iallocator,
7115                  utils.CommaJoin(ial.result))
7116
7117   def _WaitUntilSync(self):
7118     """Poll with custom rpc for disk sync.
7119
7120     This uses our own step-based rpc call.
7121
7122     """
7123     self.feedback_fn("* wait until resync is done")
7124     all_done = False
7125     while not all_done:
7126       all_done = True
7127       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7128                                             self.nodes_ip,
7129                                             self.instance.disks)
7130       min_percent = 100
7131       for node, nres in result.items():
7132         nres.Raise("Cannot resync disks on node %s" % node)
7133         node_done, node_percent = nres.payload
7134         all_done = all_done and node_done
7135         if node_percent is not None:
7136           min_percent = min(min_percent, node_percent)
7137       if not all_done:
7138         if min_percent < 100:
7139           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7140         time.sleep(2)
7141
7142   def _EnsureSecondary(self, node):
7143     """Demote a node to secondary.
7144
7145     """
7146     self.feedback_fn("* switching node %s to secondary mode" % node)
7147
7148     for dev in self.instance.disks:
7149       self.cfg.SetDiskID(dev, node)
7150
7151     result = self.rpc.call_blockdev_close(node, self.instance.name,
7152                                           self.instance.disks)
7153     result.Raise("Cannot change disk to secondary on node %s" % node)
7154
7155   def _GoStandalone(self):
7156     """Disconnect from the network.
7157
7158     """
7159     self.feedback_fn("* changing into standalone mode")
7160     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7161                                                self.instance.disks)
7162     for node, nres in result.items():
7163       nres.Raise("Cannot disconnect disks node %s" % node)
7164
7165   def _GoReconnect(self, multimaster):
7166     """Reconnect to the network.
7167
7168     """
7169     if multimaster:
7170       msg = "dual-master"
7171     else:
7172       msg = "single-master"
7173     self.feedback_fn("* changing disks into %s mode" % msg)
7174     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7175                                            self.instance.disks,
7176                                            self.instance.name, multimaster)
7177     for node, nres in result.items():
7178       nres.Raise("Cannot change disks config on node %s" % node)
7179
7180   def _ExecCleanup(self):
7181     """Try to cleanup after a failed migration.
7182
7183     The cleanup is done by:
7184       - check that the instance is running only on one node
7185         (and update the config if needed)
7186       - change disks on its secondary node to secondary
7187       - wait until disks are fully synchronized
7188       - disconnect from the network
7189       - change disks into single-master mode
7190       - wait again until disks are fully synchronized
7191
7192     """
7193     instance = self.instance
7194     target_node = self.target_node
7195     source_node = self.source_node
7196
7197     # check running on only one node
7198     self.feedback_fn("* checking where the instance actually runs"
7199                      " (if this hangs, the hypervisor might be in"
7200                      " a bad state)")
7201     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7202     for node, result in ins_l.items():
7203       result.Raise("Can't contact node %s" % node)
7204
7205     runningon_source = instance.name in ins_l[source_node].payload
7206     runningon_target = instance.name in ins_l[target_node].payload
7207
7208     if runningon_source and runningon_target:
7209       raise errors.OpExecError("Instance seems to be running on two nodes,"
7210                                " or the hypervisor is confused; you will have"
7211                                " to ensure manually that it runs only on one"
7212                                " and restart this operation")
7213
7214     if not (runningon_source or runningon_target):
7215       raise errors.OpExecError("Instance does not seem to be running at all;"
7216                                " in this case it's safer to repair by"
7217                                " running 'gnt-instance stop' to ensure disk"
7218                                " shutdown, and then restarting it")
7219
7220     if runningon_target:
7221       # the migration has actually succeeded, we need to update the config
7222       self.feedback_fn("* instance running on secondary node (%s),"
7223                        " updating config" % target_node)
7224       instance.primary_node = target_node
7225       self.cfg.Update(instance, self.feedback_fn)
7226       demoted_node = source_node
7227     else:
7228       self.feedback_fn("* instance confirmed to be running on its"
7229                        " primary node (%s)" % source_node)
7230       demoted_node = target_node
7231
7232     if instance.disk_template in constants.DTS_INT_MIRROR:
7233       self._EnsureSecondary(demoted_node)
7234       try:
7235         self._WaitUntilSync()
7236       except errors.OpExecError:
7237         # we ignore here errors, since if the device is standalone, it
7238         # won't be able to sync
7239         pass
7240       self._GoStandalone()
7241       self._GoReconnect(False)
7242       self._WaitUntilSync()
7243
7244     self.feedback_fn("* done")
7245
7246   def _RevertDiskStatus(self):
7247     """Try to revert the disk status after a failed migration.
7248
7249     """
7250     target_node = self.target_node
7251     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7252       return
7253
7254     try:
7255       self._EnsureSecondary(target_node)
7256       self._GoStandalone()
7257       self._GoReconnect(False)
7258       self._WaitUntilSync()
7259     except errors.OpExecError, err:
7260       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7261                          " please try to recover the instance manually;"
7262                          " error '%s'" % str(err))
7263
7264   def _AbortMigration(self):
7265     """Call the hypervisor code to abort a started migration.
7266
7267     """
7268     instance = self.instance
7269     target_node = self.target_node
7270     migration_info = self.migration_info
7271
7272     abort_result = self.rpc.call_finalize_migration(target_node,
7273                                                     instance,
7274                                                     migration_info,
7275                                                     False)
7276     abort_msg = abort_result.fail_msg
7277     if abort_msg:
7278       logging.error("Aborting migration failed on target node %s: %s",
7279                     target_node, abort_msg)
7280       # Don't raise an exception here, as we stil have to try to revert the
7281       # disk status, even if this step failed.
7282
7283   def _ExecMigration(self):
7284     """Migrate an instance.
7285
7286     The migrate is done by:
7287       - change the disks into dual-master mode
7288       - wait until disks are fully synchronized again
7289       - migrate the instance
7290       - change disks on the new secondary node (the old primary) to secondary
7291       - wait until disks are fully synchronized
7292       - change disks into single-master mode
7293
7294     """
7295     instance = self.instance
7296     target_node = self.target_node
7297     source_node = self.source_node
7298
7299     self.feedback_fn("* checking disk consistency between source and target")
7300     for dev in instance.disks:
7301       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7302         raise errors.OpExecError("Disk %s is degraded or not fully"
7303                                  " synchronized on target node,"
7304                                  " aborting migration" % dev.iv_name)
7305
7306     # First get the migration information from the remote node
7307     result = self.rpc.call_migration_info(source_node, instance)
7308     msg = result.fail_msg
7309     if msg:
7310       log_err = ("Failed fetching source migration information from %s: %s" %
7311                  (source_node, msg))
7312       logging.error(log_err)
7313       raise errors.OpExecError(log_err)
7314
7315     self.migration_info = migration_info = result.payload
7316
7317     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7318       # Then switch the disks to master/master mode
7319       self._EnsureSecondary(target_node)
7320       self._GoStandalone()
7321       self._GoReconnect(True)
7322       self._WaitUntilSync()
7323
7324     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7325     result = self.rpc.call_accept_instance(target_node,
7326                                            instance,
7327                                            migration_info,
7328                                            self.nodes_ip[target_node])
7329
7330     msg = result.fail_msg
7331     if msg:
7332       logging.error("Instance pre-migration failed, trying to revert"
7333                     " disk status: %s", msg)
7334       self.feedback_fn("Pre-migration failed, aborting")
7335       self._AbortMigration()
7336       self._RevertDiskStatus()
7337       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7338                                (instance.name, msg))
7339
7340     self.feedback_fn("* migrating instance to %s" % target_node)
7341     result = self.rpc.call_instance_migrate(source_node, instance,
7342                                             self.nodes_ip[target_node],
7343                                             self.live)
7344     msg = result.fail_msg
7345     if msg:
7346       logging.error("Instance migration failed, trying to revert"
7347                     " disk status: %s", msg)
7348       self.feedback_fn("Migration failed, aborting")
7349       self._AbortMigration()
7350       self._RevertDiskStatus()
7351       raise errors.OpExecError("Could not migrate instance %s: %s" %
7352                                (instance.name, msg))
7353
7354     instance.primary_node = target_node
7355     # distribute new instance config to the other nodes
7356     self.cfg.Update(instance, self.feedback_fn)
7357
7358     result = self.rpc.call_finalize_migration(target_node,
7359                                               instance,
7360                                               migration_info,
7361                                               True)
7362     msg = result.fail_msg
7363     if msg:
7364       logging.error("Instance migration succeeded, but finalization failed:"
7365                     " %s", msg)
7366       raise errors.OpExecError("Could not finalize instance migration: %s" %
7367                                msg)
7368
7369     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7370       self._EnsureSecondary(source_node)
7371       self._WaitUntilSync()
7372       self._GoStandalone()
7373       self._GoReconnect(False)
7374       self._WaitUntilSync()
7375
7376     self.feedback_fn("* done")
7377
7378   def _ExecFailover(self):
7379     """Failover an instance.
7380
7381     The failover is done by shutting it down on its present node and
7382     starting it on the secondary.
7383
7384     """
7385     instance = self.instance
7386     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7387
7388     source_node = instance.primary_node
7389     target_node = self.target_node
7390
7391     if instance.admin_up:
7392       self.feedback_fn("* checking disk consistency between source and target")
7393       for dev in instance.disks:
7394         # for drbd, these are drbd over lvm
7395         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7396           if primary_node.offline:
7397             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7398                              " target node %s" %
7399                              (primary_node.name, dev.iv_name, target_node))
7400           elif not self.ignore_consistency:
7401             raise errors.OpExecError("Disk %s is degraded on target node,"
7402                                      " aborting failover" % dev.iv_name)
7403     else:
7404       self.feedback_fn("* not checking disk consistency as instance is not"
7405                        " running")
7406
7407     self.feedback_fn("* shutting down instance on source node")
7408     logging.info("Shutting down instance %s on node %s",
7409                  instance.name, source_node)
7410
7411     result = self.rpc.call_instance_shutdown(source_node, instance,
7412                                              self.shutdown_timeout)
7413     msg = result.fail_msg
7414     if msg:
7415       if self.ignore_consistency or primary_node.offline:
7416         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7417                            " proceeding anyway; please make sure node"
7418                            " %s is down; error details: %s",
7419                            instance.name, source_node, source_node, msg)
7420       else:
7421         raise errors.OpExecError("Could not shutdown instance %s on"
7422                                  " node %s: %s" %
7423                                  (instance.name, source_node, msg))
7424
7425     self.feedback_fn("* deactivating the instance's disks on source node")
7426     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7427       raise errors.OpExecError("Can't shut down the instance's disks")
7428
7429     instance.primary_node = target_node
7430     # distribute new instance config to the other nodes
7431     self.cfg.Update(instance, self.feedback_fn)
7432
7433     # Only start the instance if it's marked as up
7434     if instance.admin_up:
7435       self.feedback_fn("* activating the instance's disks on target node %s" %
7436                        target_node)
7437       logging.info("Starting instance %s on node %s",
7438                    instance.name, target_node)
7439
7440       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7441                                            ignore_secondaries=True)
7442       if not disks_ok:
7443         _ShutdownInstanceDisks(self.lu, instance)
7444         raise errors.OpExecError("Can't activate the instance's disks")
7445
7446       self.feedback_fn("* starting the instance on the target node %s" %
7447                        target_node)
7448       result = self.rpc.call_instance_start(target_node, instance, None, None,
7449                                             False)
7450       msg = result.fail_msg
7451       if msg:
7452         _ShutdownInstanceDisks(self.lu, instance)
7453         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7454                                  (instance.name, target_node, msg))
7455
7456   def Exec(self, feedback_fn):
7457     """Perform the migration.
7458
7459     """
7460     self.feedback_fn = feedback_fn
7461     self.source_node = self.instance.primary_node
7462
7463     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7464     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7465       self.target_node = self.instance.secondary_nodes[0]
7466       # Otherwise self.target_node has been populated either
7467       # directly, or through an iallocator.
7468
7469     self.all_nodes = [self.source_node, self.target_node]
7470     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7471                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7472
7473     if self.failover:
7474       feedback_fn("Failover instance %s" % self.instance.name)
7475       self._ExecFailover()
7476     else:
7477       feedback_fn("Migrating instance %s" % self.instance.name)
7478
7479       if self.cleanup:
7480         return self._ExecCleanup()
7481       else:
7482         return self._ExecMigration()
7483
7484
7485 def _CreateBlockDev(lu, node, instance, device, force_create,
7486                     info, force_open):
7487   """Create a tree of block devices on a given node.
7488
7489   If this device type has to be created on secondaries, create it and
7490   all its children.
7491
7492   If not, just recurse to children keeping the same 'force' value.
7493
7494   @param lu: the lu on whose behalf we execute
7495   @param node: the node on which to create the device
7496   @type instance: L{objects.Instance}
7497   @param instance: the instance which owns the device
7498   @type device: L{objects.Disk}
7499   @param device: the device to create
7500   @type force_create: boolean
7501   @param force_create: whether to force creation of this device; this
7502       will be change to True whenever we find a device which has
7503       CreateOnSecondary() attribute
7504   @param info: the extra 'metadata' we should attach to the device
7505       (this will be represented as a LVM tag)
7506   @type force_open: boolean
7507   @param force_open: this parameter will be passes to the
7508       L{backend.BlockdevCreate} function where it specifies
7509       whether we run on primary or not, and it affects both
7510       the child assembly and the device own Open() execution
7511
7512   """
7513   if device.CreateOnSecondary():
7514     force_create = True
7515
7516   if device.children:
7517     for child in device.children:
7518       _CreateBlockDev(lu, node, instance, child, force_create,
7519                       info, force_open)
7520
7521   if not force_create:
7522     return
7523
7524   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7525
7526
7527 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7528   """Create a single block device on a given node.
7529
7530   This will not recurse over children of the device, so they must be
7531   created in advance.
7532
7533   @param lu: the lu on whose behalf we execute
7534   @param node: the node on which to create the device
7535   @type instance: L{objects.Instance}
7536   @param instance: the instance which owns the device
7537   @type device: L{objects.Disk}
7538   @param device: the device to create
7539   @param info: the extra 'metadata' we should attach to the device
7540       (this will be represented as a LVM tag)
7541   @type force_open: boolean
7542   @param force_open: this parameter will be passes to the
7543       L{backend.BlockdevCreate} function where it specifies
7544       whether we run on primary or not, and it affects both
7545       the child assembly and the device own Open() execution
7546
7547   """
7548   lu.cfg.SetDiskID(device, node)
7549   result = lu.rpc.call_blockdev_create(node, device, device.size,
7550                                        instance.name, force_open, info)
7551   result.Raise("Can't create block device %s on"
7552                " node %s for instance %s" % (device, node, instance.name))
7553   if device.physical_id is None:
7554     device.physical_id = result.payload
7555
7556
7557 def _GenerateUniqueNames(lu, exts):
7558   """Generate a suitable LV name.
7559
7560   This will generate a logical volume name for the given instance.
7561
7562   """
7563   results = []
7564   for val in exts:
7565     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7566     results.append("%s%s" % (new_id, val))
7567   return results
7568
7569
7570 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7571                          iv_name, p_minor, s_minor):
7572   """Generate a drbd8 device complete with its children.
7573
7574   """
7575   assert len(vgnames) == len(names) == 2
7576   port = lu.cfg.AllocatePort()
7577   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7578   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7579                           logical_id=(vgnames[0], names[0]))
7580   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7581                           logical_id=(vgnames[1], names[1]))
7582   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7583                           logical_id=(primary, secondary, port,
7584                                       p_minor, s_minor,
7585                                       shared_secret),
7586                           children=[dev_data, dev_meta],
7587                           iv_name=iv_name)
7588   return drbd_dev
7589
7590
7591 def _GenerateDiskTemplate(lu, template_name,
7592                           instance_name, primary_node,
7593                           secondary_nodes, disk_info,
7594                           file_storage_dir, file_driver,
7595                           base_index, feedback_fn):
7596   """Generate the entire disk layout for a given template type.
7597
7598   """
7599   #TODO: compute space requirements
7600
7601   vgname = lu.cfg.GetVGName()
7602   disk_count = len(disk_info)
7603   disks = []
7604   if template_name == constants.DT_DISKLESS:
7605     pass
7606   elif template_name == constants.DT_PLAIN:
7607     if len(secondary_nodes) != 0:
7608       raise errors.ProgrammerError("Wrong template configuration")
7609
7610     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7611                                       for i in range(disk_count)])
7612     for idx, disk in enumerate(disk_info):
7613       disk_index = idx + base_index
7614       vg = disk.get(constants.IDISK_VG, vgname)
7615       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7616       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7617                               size=disk[constants.IDISK_SIZE],
7618                               logical_id=(vg, names[idx]),
7619                               iv_name="disk/%d" % disk_index,
7620                               mode=disk[constants.IDISK_MODE])
7621       disks.append(disk_dev)
7622   elif template_name == constants.DT_DRBD8:
7623     if len(secondary_nodes) != 1:
7624       raise errors.ProgrammerError("Wrong template configuration")
7625     remote_node = secondary_nodes[0]
7626     minors = lu.cfg.AllocateDRBDMinor(
7627       [primary_node, remote_node] * len(disk_info), instance_name)
7628
7629     names = []
7630     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7631                                                for i in range(disk_count)]):
7632       names.append(lv_prefix + "_data")
7633       names.append(lv_prefix + "_meta")
7634     for idx, disk in enumerate(disk_info):
7635       disk_index = idx + base_index
7636       data_vg = disk.get(constants.IDISK_VG, vgname)
7637       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7638       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7639                                       disk[constants.IDISK_SIZE],
7640                                       [data_vg, meta_vg],
7641                                       names[idx * 2:idx * 2 + 2],
7642                                       "disk/%d" % disk_index,
7643                                       minors[idx * 2], minors[idx * 2 + 1])
7644       disk_dev.mode = disk[constants.IDISK_MODE]
7645       disks.append(disk_dev)
7646   elif template_name == constants.DT_FILE:
7647     if len(secondary_nodes) != 0:
7648       raise errors.ProgrammerError("Wrong template configuration")
7649
7650     opcodes.RequireFileStorage()
7651
7652     for idx, disk in enumerate(disk_info):
7653       disk_index = idx + base_index
7654       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7655                               size=disk[constants.IDISK_SIZE],
7656                               iv_name="disk/%d" % disk_index,
7657                               logical_id=(file_driver,
7658                                           "%s/disk%d" % (file_storage_dir,
7659                                                          disk_index)),
7660                               mode=disk[constants.IDISK_MODE])
7661       disks.append(disk_dev)
7662   elif template_name == constants.DT_SHARED_FILE:
7663     if len(secondary_nodes) != 0:
7664       raise errors.ProgrammerError("Wrong template configuration")
7665
7666     opcodes.RequireSharedFileStorage()
7667
7668     for idx, disk in enumerate(disk_info):
7669       disk_index = idx + base_index
7670       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7671                               size=disk[constants.IDISK_SIZE],
7672                               iv_name="disk/%d" % disk_index,
7673                               logical_id=(file_driver,
7674                                           "%s/disk%d" % (file_storage_dir,
7675                                                          disk_index)),
7676                               mode=disk[constants.IDISK_MODE])
7677       disks.append(disk_dev)
7678   elif template_name == constants.DT_BLOCK:
7679     if len(secondary_nodes) != 0:
7680       raise errors.ProgrammerError("Wrong template configuration")
7681
7682     for idx, disk in enumerate(disk_info):
7683       disk_index = idx + base_index
7684       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7685                               size=disk[constants.IDISK_SIZE],
7686                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7687                                           disk[constants.IDISK_ADOPT]),
7688                               iv_name="disk/%d" % disk_index,
7689                               mode=disk[constants.IDISK_MODE])
7690       disks.append(disk_dev)
7691
7692   else:
7693     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7694   return disks
7695
7696
7697 def _GetInstanceInfoText(instance):
7698   """Compute that text that should be added to the disk's metadata.
7699
7700   """
7701   return "originstname+%s" % instance.name
7702
7703
7704 def _CalcEta(time_taken, written, total_size):
7705   """Calculates the ETA based on size written and total size.
7706
7707   @param time_taken: The time taken so far
7708   @param written: amount written so far
7709   @param total_size: The total size of data to be written
7710   @return: The remaining time in seconds
7711
7712   """
7713   avg_time = time_taken / float(written)
7714   return (total_size - written) * avg_time
7715
7716
7717 def _WipeDisks(lu, instance):
7718   """Wipes instance disks.
7719
7720   @type lu: L{LogicalUnit}
7721   @param lu: the logical unit on whose behalf we execute
7722   @type instance: L{objects.Instance}
7723   @param instance: the instance whose disks we should create
7724   @return: the success of the wipe
7725
7726   """
7727   node = instance.primary_node
7728
7729   for device in instance.disks:
7730     lu.cfg.SetDiskID(device, node)
7731
7732   logging.info("Pause sync of instance %s disks", instance.name)
7733   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7734
7735   for idx, success in enumerate(result.payload):
7736     if not success:
7737       logging.warn("pause-sync of instance %s for disks %d failed",
7738                    instance.name, idx)
7739
7740   try:
7741     for idx, device in enumerate(instance.disks):
7742       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7743       # MAX_WIPE_CHUNK at max
7744       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7745                             constants.MIN_WIPE_CHUNK_PERCENT)
7746       # we _must_ make this an int, otherwise rounding errors will
7747       # occur
7748       wipe_chunk_size = int(wipe_chunk_size)
7749
7750       lu.LogInfo("* Wiping disk %d", idx)
7751       logging.info("Wiping disk %d for instance %s, node %s using"
7752                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7753
7754       offset = 0
7755       size = device.size
7756       last_output = 0
7757       start_time = time.time()
7758
7759       while offset < size:
7760         wipe_size = min(wipe_chunk_size, size - offset)
7761         logging.debug("Wiping disk %d, offset %s, chunk %s",
7762                       idx, offset, wipe_size)
7763         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7764         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7765                      (idx, offset, wipe_size))
7766         now = time.time()
7767         offset += wipe_size
7768         if now - last_output >= 60:
7769           eta = _CalcEta(now - start_time, offset, size)
7770           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7771                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7772           last_output = now
7773   finally:
7774     logging.info("Resume sync of instance %s disks", instance.name)
7775
7776     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7777
7778     for idx, success in enumerate(result.payload):
7779       if not success:
7780         lu.LogWarning("Resume sync of disk %d failed, please have a"
7781                       " look at the status and troubleshoot the issue", idx)
7782         logging.warn("resume-sync of instance %s for disks %d failed",
7783                      instance.name, idx)
7784
7785
7786 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7787   """Create all disks for an instance.
7788
7789   This abstracts away some work from AddInstance.
7790
7791   @type lu: L{LogicalUnit}
7792   @param lu: the logical unit on whose behalf we execute
7793   @type instance: L{objects.Instance}
7794   @param instance: the instance whose disks we should create
7795   @type to_skip: list
7796   @param to_skip: list of indices to skip
7797   @type target_node: string
7798   @param target_node: if passed, overrides the target node for creation
7799   @rtype: boolean
7800   @return: the success of the creation
7801
7802   """
7803   info = _GetInstanceInfoText(instance)
7804   if target_node is None:
7805     pnode = instance.primary_node
7806     all_nodes = instance.all_nodes
7807   else:
7808     pnode = target_node
7809     all_nodes = [pnode]
7810
7811   if instance.disk_template in constants.DTS_FILEBASED:
7812     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7813     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7814
7815     result.Raise("Failed to create directory '%s' on"
7816                  " node %s" % (file_storage_dir, pnode))
7817
7818   # Note: this needs to be kept in sync with adding of disks in
7819   # LUInstanceSetParams
7820   for idx, device in enumerate(instance.disks):
7821     if to_skip and idx in to_skip:
7822       continue
7823     logging.info("Creating volume %s for instance %s",
7824                  device.iv_name, instance.name)
7825     #HARDCODE
7826     for node in all_nodes:
7827       f_create = node == pnode
7828       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7829
7830
7831 def _RemoveDisks(lu, instance, target_node=None):
7832   """Remove all disks for an instance.
7833
7834   This abstracts away some work from `AddInstance()` and
7835   `RemoveInstance()`. Note that in case some of the devices couldn't
7836   be removed, the removal will continue with the other ones (compare
7837   with `_CreateDisks()`).
7838
7839   @type lu: L{LogicalUnit}
7840   @param lu: the logical unit on whose behalf we execute
7841   @type instance: L{objects.Instance}
7842   @param instance: the instance whose disks we should remove
7843   @type target_node: string
7844   @param target_node: used to override the node on which to remove the disks
7845   @rtype: boolean
7846   @return: the success of the removal
7847
7848   """
7849   logging.info("Removing block devices for instance %s", instance.name)
7850
7851   all_result = True
7852   for device in instance.disks:
7853     if target_node:
7854       edata = [(target_node, device)]
7855     else:
7856       edata = device.ComputeNodeTree(instance.primary_node)
7857     for node, disk in edata:
7858       lu.cfg.SetDiskID(disk, node)
7859       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7860       if msg:
7861         lu.LogWarning("Could not remove block device %s on node %s,"
7862                       " continuing anyway: %s", device.iv_name, node, msg)
7863         all_result = False
7864
7865   if instance.disk_template == constants.DT_FILE:
7866     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7867     if target_node:
7868       tgt = target_node
7869     else:
7870       tgt = instance.primary_node
7871     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7872     if result.fail_msg:
7873       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7874                     file_storage_dir, instance.primary_node, result.fail_msg)
7875       all_result = False
7876
7877   return all_result
7878
7879
7880 def _ComputeDiskSizePerVG(disk_template, disks):
7881   """Compute disk size requirements in the volume group
7882
7883   """
7884   def _compute(disks, payload):
7885     """Universal algorithm.
7886
7887     """
7888     vgs = {}
7889     for disk in disks:
7890       vgs[disk[constants.IDISK_VG]] = \
7891         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7892
7893     return vgs
7894
7895   # Required free disk space as a function of disk and swap space
7896   req_size_dict = {
7897     constants.DT_DISKLESS: {},
7898     constants.DT_PLAIN: _compute(disks, 0),
7899     # 128 MB are added for drbd metadata for each disk
7900     constants.DT_DRBD8: _compute(disks, 128),
7901     constants.DT_FILE: {},
7902     constants.DT_SHARED_FILE: {},
7903   }
7904
7905   if disk_template not in req_size_dict:
7906     raise errors.ProgrammerError("Disk template '%s' size requirement"
7907                                  " is unknown" %  disk_template)
7908
7909   return req_size_dict[disk_template]
7910
7911
7912 def _ComputeDiskSize(disk_template, disks):
7913   """Compute disk size requirements in the volume group
7914
7915   """
7916   # Required free disk space as a function of disk and swap space
7917   req_size_dict = {
7918     constants.DT_DISKLESS: None,
7919     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7920     # 128 MB are added for drbd metadata for each disk
7921     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7922     constants.DT_FILE: None,
7923     constants.DT_SHARED_FILE: 0,
7924     constants.DT_BLOCK: 0,
7925   }
7926
7927   if disk_template not in req_size_dict:
7928     raise errors.ProgrammerError("Disk template '%s' size requirement"
7929                                  " is unknown" %  disk_template)
7930
7931   return req_size_dict[disk_template]
7932
7933
7934 def _FilterVmNodes(lu, nodenames):
7935   """Filters out non-vm_capable nodes from a list.
7936
7937   @type lu: L{LogicalUnit}
7938   @param lu: the logical unit for which we check
7939   @type nodenames: list
7940   @param nodenames: the list of nodes on which we should check
7941   @rtype: list
7942   @return: the list of vm-capable nodes
7943
7944   """
7945   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7946   return [name for name in nodenames if name not in vm_nodes]
7947
7948
7949 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7950   """Hypervisor parameter validation.
7951
7952   This function abstract the hypervisor parameter validation to be
7953   used in both instance create and instance modify.
7954
7955   @type lu: L{LogicalUnit}
7956   @param lu: the logical unit for which we check
7957   @type nodenames: list
7958   @param nodenames: the list of nodes on which we should check
7959   @type hvname: string
7960   @param hvname: the name of the hypervisor we should use
7961   @type hvparams: dict
7962   @param hvparams: the parameters which we need to check
7963   @raise errors.OpPrereqError: if the parameters are not valid
7964
7965   """
7966   nodenames = _FilterVmNodes(lu, nodenames)
7967   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7968                                                   hvname,
7969                                                   hvparams)
7970   for node in nodenames:
7971     info = hvinfo[node]
7972     if info.offline:
7973       continue
7974     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7975
7976
7977 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7978   """OS parameters validation.
7979
7980   @type lu: L{LogicalUnit}
7981   @param lu: the logical unit for which we check
7982   @type required: boolean
7983   @param required: whether the validation should fail if the OS is not
7984       found
7985   @type nodenames: list
7986   @param nodenames: the list of nodes on which we should check
7987   @type osname: string
7988   @param osname: the name of the hypervisor we should use
7989   @type osparams: dict
7990   @param osparams: the parameters which we need to check
7991   @raise errors.OpPrereqError: if the parameters are not valid
7992
7993   """
7994   nodenames = _FilterVmNodes(lu, nodenames)
7995   result = lu.rpc.call_os_validate(required, nodenames, osname,
7996                                    [constants.OS_VALIDATE_PARAMETERS],
7997                                    osparams)
7998   for node, nres in result.items():
7999     # we don't check for offline cases since this should be run only
8000     # against the master node and/or an instance's nodes
8001     nres.Raise("OS Parameters validation failed on node %s" % node)
8002     if not nres.payload:
8003       lu.LogInfo("OS %s not found on node %s, validation skipped",
8004                  osname, node)
8005
8006
8007 class LUInstanceCreate(LogicalUnit):
8008   """Create an instance.
8009
8010   """
8011   HPATH = "instance-add"
8012   HTYPE = constants.HTYPE_INSTANCE
8013   REQ_BGL = False
8014
8015   def CheckArguments(self):
8016     """Check arguments.
8017
8018     """
8019     # do not require name_check to ease forward/backward compatibility
8020     # for tools
8021     if self.op.no_install and self.op.start:
8022       self.LogInfo("No-installation mode selected, disabling startup")
8023       self.op.start = False
8024     # validate/normalize the instance name
8025     self.op.instance_name = \
8026       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8027
8028     if self.op.ip_check and not self.op.name_check:
8029       # TODO: make the ip check more flexible and not depend on the name check
8030       raise errors.OpPrereqError("Cannot do IP address check without a name"
8031                                  " check", errors.ECODE_INVAL)
8032
8033     # check nics' parameter names
8034     for nic in self.op.nics:
8035       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8036
8037     # check disks. parameter names and consistent adopt/no-adopt strategy
8038     has_adopt = has_no_adopt = False
8039     for disk in self.op.disks:
8040       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8041       if constants.IDISK_ADOPT in disk:
8042         has_adopt = True
8043       else:
8044         has_no_adopt = True
8045     if has_adopt and has_no_adopt:
8046       raise errors.OpPrereqError("Either all disks are adopted or none is",
8047                                  errors.ECODE_INVAL)
8048     if has_adopt:
8049       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8050         raise errors.OpPrereqError("Disk adoption is not supported for the"
8051                                    " '%s' disk template" %
8052                                    self.op.disk_template,
8053                                    errors.ECODE_INVAL)
8054       if self.op.iallocator is not None:
8055         raise errors.OpPrereqError("Disk adoption not allowed with an"
8056                                    " iallocator script", errors.ECODE_INVAL)
8057       if self.op.mode == constants.INSTANCE_IMPORT:
8058         raise errors.OpPrereqError("Disk adoption not allowed for"
8059                                    " instance import", errors.ECODE_INVAL)
8060     else:
8061       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8062         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8063                                    " but no 'adopt' parameter given" %
8064                                    self.op.disk_template,
8065                                    errors.ECODE_INVAL)
8066
8067     self.adopt_disks = has_adopt
8068
8069     # instance name verification
8070     if self.op.name_check:
8071       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8072       self.op.instance_name = self.hostname1.name
8073       # used in CheckPrereq for ip ping check
8074       self.check_ip = self.hostname1.ip
8075     else:
8076       self.check_ip = None
8077
8078     # file storage checks
8079     if (self.op.file_driver and
8080         not self.op.file_driver in constants.FILE_DRIVER):
8081       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8082                                  self.op.file_driver, errors.ECODE_INVAL)
8083
8084     if self.op.disk_template == constants.DT_FILE:
8085       opcodes.RequireFileStorage()
8086     elif self.op.disk_template == constants.DT_SHARED_FILE:
8087       opcodes.RequireSharedFileStorage()
8088
8089     ### Node/iallocator related checks
8090     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8091
8092     if self.op.pnode is not None:
8093       if self.op.disk_template in constants.DTS_INT_MIRROR:
8094         if self.op.snode is None:
8095           raise errors.OpPrereqError("The networked disk templates need"
8096                                      " a mirror node", errors.ECODE_INVAL)
8097       elif self.op.snode:
8098         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8099                         " template")
8100         self.op.snode = None
8101
8102     self._cds = _GetClusterDomainSecret()
8103
8104     if self.op.mode == constants.INSTANCE_IMPORT:
8105       # On import force_variant must be True, because if we forced it at
8106       # initial install, our only chance when importing it back is that it
8107       # works again!
8108       self.op.force_variant = True
8109
8110       if self.op.no_install:
8111         self.LogInfo("No-installation mode has no effect during import")
8112
8113     elif self.op.mode == constants.INSTANCE_CREATE:
8114       if self.op.os_type is None:
8115         raise errors.OpPrereqError("No guest OS specified",
8116                                    errors.ECODE_INVAL)
8117       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8118         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8119                                    " installation" % self.op.os_type,
8120                                    errors.ECODE_STATE)
8121       if self.op.disk_template is None:
8122         raise errors.OpPrereqError("No disk template specified",
8123                                    errors.ECODE_INVAL)
8124
8125     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8126       # Check handshake to ensure both clusters have the same domain secret
8127       src_handshake = self.op.source_handshake
8128       if not src_handshake:
8129         raise errors.OpPrereqError("Missing source handshake",
8130                                    errors.ECODE_INVAL)
8131
8132       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8133                                                            src_handshake)
8134       if errmsg:
8135         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8136                                    errors.ECODE_INVAL)
8137
8138       # Load and check source CA
8139       self.source_x509_ca_pem = self.op.source_x509_ca
8140       if not self.source_x509_ca_pem:
8141         raise errors.OpPrereqError("Missing source X509 CA",
8142                                    errors.ECODE_INVAL)
8143
8144       try:
8145         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8146                                                     self._cds)
8147       except OpenSSL.crypto.Error, err:
8148         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8149                                    (err, ), errors.ECODE_INVAL)
8150
8151       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8152       if errcode is not None:
8153         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8154                                    errors.ECODE_INVAL)
8155
8156       self.source_x509_ca = cert
8157
8158       src_instance_name = self.op.source_instance_name
8159       if not src_instance_name:
8160         raise errors.OpPrereqError("Missing source instance name",
8161                                    errors.ECODE_INVAL)
8162
8163       self.source_instance_name = \
8164           netutils.GetHostname(name=src_instance_name).name
8165
8166     else:
8167       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8168                                  self.op.mode, errors.ECODE_INVAL)
8169
8170   def ExpandNames(self):
8171     """ExpandNames for CreateInstance.
8172
8173     Figure out the right locks for instance creation.
8174
8175     """
8176     self.needed_locks = {}
8177
8178     instance_name = self.op.instance_name
8179     # this is just a preventive check, but someone might still add this
8180     # instance in the meantime, and creation will fail at lock-add time
8181     if instance_name in self.cfg.GetInstanceList():
8182       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8183                                  instance_name, errors.ECODE_EXISTS)
8184
8185     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8186
8187     if self.op.iallocator:
8188       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8189     else:
8190       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8191       nodelist = [self.op.pnode]
8192       if self.op.snode is not None:
8193         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8194         nodelist.append(self.op.snode)
8195       self.needed_locks[locking.LEVEL_NODE] = nodelist
8196
8197     # in case of import lock the source node too
8198     if self.op.mode == constants.INSTANCE_IMPORT:
8199       src_node = self.op.src_node
8200       src_path = self.op.src_path
8201
8202       if src_path is None:
8203         self.op.src_path = src_path = self.op.instance_name
8204
8205       if src_node is None:
8206         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8207         self.op.src_node = None
8208         if os.path.isabs(src_path):
8209           raise errors.OpPrereqError("Importing an instance from an absolute"
8210                                      " path requires a source node option",
8211                                      errors.ECODE_INVAL)
8212       else:
8213         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8214         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8215           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8216         if not os.path.isabs(src_path):
8217           self.op.src_path = src_path = \
8218             utils.PathJoin(constants.EXPORT_DIR, src_path)
8219
8220   def _RunAllocator(self):
8221     """Run the allocator based on input opcode.
8222
8223     """
8224     nics = [n.ToDict() for n in self.nics]
8225     ial = IAllocator(self.cfg, self.rpc,
8226                      mode=constants.IALLOCATOR_MODE_ALLOC,
8227                      name=self.op.instance_name,
8228                      disk_template=self.op.disk_template,
8229                      tags=self.op.tags,
8230                      os=self.op.os_type,
8231                      vcpus=self.be_full[constants.BE_VCPUS],
8232                      memory=self.be_full[constants.BE_MEMORY],
8233                      disks=self.disks,
8234                      nics=nics,
8235                      hypervisor=self.op.hypervisor,
8236                      )
8237
8238     ial.Run(self.op.iallocator)
8239
8240     if not ial.success:
8241       raise errors.OpPrereqError("Can't compute nodes using"
8242                                  " iallocator '%s': %s" %
8243                                  (self.op.iallocator, ial.info),
8244                                  errors.ECODE_NORES)
8245     if len(ial.result) != ial.required_nodes:
8246       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8247                                  " of nodes (%s), required %s" %
8248                                  (self.op.iallocator, len(ial.result),
8249                                   ial.required_nodes), errors.ECODE_FAULT)
8250     self.op.pnode = ial.result[0]
8251     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8252                  self.op.instance_name, self.op.iallocator,
8253                  utils.CommaJoin(ial.result))
8254     if ial.required_nodes == 2:
8255       self.op.snode = ial.result[1]
8256
8257   def BuildHooksEnv(self):
8258     """Build hooks env.
8259
8260     This runs on master, primary and secondary nodes of the instance.
8261
8262     """
8263     env = {
8264       "ADD_MODE": self.op.mode,
8265       }
8266     if self.op.mode == constants.INSTANCE_IMPORT:
8267       env["SRC_NODE"] = self.op.src_node
8268       env["SRC_PATH"] = self.op.src_path
8269       env["SRC_IMAGES"] = self.src_images
8270
8271     env.update(_BuildInstanceHookEnv(
8272       name=self.op.instance_name,
8273       primary_node=self.op.pnode,
8274       secondary_nodes=self.secondaries,
8275       status=self.op.start,
8276       os_type=self.op.os_type,
8277       memory=self.be_full[constants.BE_MEMORY],
8278       vcpus=self.be_full[constants.BE_VCPUS],
8279       nics=_NICListToTuple(self, self.nics),
8280       disk_template=self.op.disk_template,
8281       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8282              for d in self.disks],
8283       bep=self.be_full,
8284       hvp=self.hv_full,
8285       hypervisor_name=self.op.hypervisor,
8286       tags=self.op.tags,
8287     ))
8288
8289     return env
8290
8291   def BuildHooksNodes(self):
8292     """Build hooks nodes.
8293
8294     """
8295     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8296     return nl, nl
8297
8298   def _ReadExportInfo(self):
8299     """Reads the export information from disk.
8300
8301     It will override the opcode source node and path with the actual
8302     information, if these two were not specified before.
8303
8304     @return: the export information
8305
8306     """
8307     assert self.op.mode == constants.INSTANCE_IMPORT
8308
8309     src_node = self.op.src_node
8310     src_path = self.op.src_path
8311
8312     if src_node is None:
8313       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8314       exp_list = self.rpc.call_export_list(locked_nodes)
8315       found = False
8316       for node in exp_list:
8317         if exp_list[node].fail_msg:
8318           continue
8319         if src_path in exp_list[node].payload:
8320           found = True
8321           self.op.src_node = src_node = node
8322           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8323                                                        src_path)
8324           break
8325       if not found:
8326         raise errors.OpPrereqError("No export found for relative path %s" %
8327                                     src_path, errors.ECODE_INVAL)
8328
8329     _CheckNodeOnline(self, src_node)
8330     result = self.rpc.call_export_info(src_node, src_path)
8331     result.Raise("No export or invalid export found in dir %s" % src_path)
8332
8333     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8334     if not export_info.has_section(constants.INISECT_EXP):
8335       raise errors.ProgrammerError("Corrupted export config",
8336                                    errors.ECODE_ENVIRON)
8337
8338     ei_version = export_info.get(constants.INISECT_EXP, "version")
8339     if (int(ei_version) != constants.EXPORT_VERSION):
8340       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8341                                  (ei_version, constants.EXPORT_VERSION),
8342                                  errors.ECODE_ENVIRON)
8343     return export_info
8344
8345   def _ReadExportParams(self, einfo):
8346     """Use export parameters as defaults.
8347
8348     In case the opcode doesn't specify (as in override) some instance
8349     parameters, then try to use them from the export information, if
8350     that declares them.
8351
8352     """
8353     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8354
8355     if self.op.disk_template is None:
8356       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8357         self.op.disk_template = einfo.get(constants.INISECT_INS,
8358                                           "disk_template")
8359       else:
8360         raise errors.OpPrereqError("No disk template specified and the export"
8361                                    " is missing the disk_template information",
8362                                    errors.ECODE_INVAL)
8363
8364     if not self.op.disks:
8365       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8366         disks = []
8367         # TODO: import the disk iv_name too
8368         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8369           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8370           disks.append({constants.IDISK_SIZE: disk_sz})
8371         self.op.disks = disks
8372       else:
8373         raise errors.OpPrereqError("No disk info specified and the export"
8374                                    " is missing the disk information",
8375                                    errors.ECODE_INVAL)
8376
8377     if (not self.op.nics and
8378         einfo.has_option(constants.INISECT_INS, "nic_count")):
8379       nics = []
8380       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8381         ndict = {}
8382         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8383           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8384           ndict[name] = v
8385         nics.append(ndict)
8386       self.op.nics = nics
8387
8388     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8389       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8390
8391     if (self.op.hypervisor is None and
8392         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8393       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8394
8395     if einfo.has_section(constants.INISECT_HYP):
8396       # use the export parameters but do not override the ones
8397       # specified by the user
8398       for name, value in einfo.items(constants.INISECT_HYP):
8399         if name not in self.op.hvparams:
8400           self.op.hvparams[name] = value
8401
8402     if einfo.has_section(constants.INISECT_BEP):
8403       # use the parameters, without overriding
8404       for name, value in einfo.items(constants.INISECT_BEP):
8405         if name not in self.op.beparams:
8406           self.op.beparams[name] = value
8407     else:
8408       # try to read the parameters old style, from the main section
8409       for name in constants.BES_PARAMETERS:
8410         if (name not in self.op.beparams and
8411             einfo.has_option(constants.INISECT_INS, name)):
8412           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8413
8414     if einfo.has_section(constants.INISECT_OSP):
8415       # use the parameters, without overriding
8416       for name, value in einfo.items(constants.INISECT_OSP):
8417         if name not in self.op.osparams:
8418           self.op.osparams[name] = value
8419
8420   def _RevertToDefaults(self, cluster):
8421     """Revert the instance parameters to the default values.
8422
8423     """
8424     # hvparams
8425     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8426     for name in self.op.hvparams.keys():
8427       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8428         del self.op.hvparams[name]
8429     # beparams
8430     be_defs = cluster.SimpleFillBE({})
8431     for name in self.op.beparams.keys():
8432       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8433         del self.op.beparams[name]
8434     # nic params
8435     nic_defs = cluster.SimpleFillNIC({})
8436     for nic in self.op.nics:
8437       for name in constants.NICS_PARAMETERS:
8438         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8439           del nic[name]
8440     # osparams
8441     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8442     for name in self.op.osparams.keys():
8443       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8444         del self.op.osparams[name]
8445
8446   def _CalculateFileStorageDir(self):
8447     """Calculate final instance file storage dir.
8448
8449     """
8450     # file storage dir calculation/check
8451     self.instance_file_storage_dir = None
8452     if self.op.disk_template in constants.DTS_FILEBASED:
8453       # build the full file storage dir path
8454       joinargs = []
8455
8456       if self.op.disk_template == constants.DT_SHARED_FILE:
8457         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8458       else:
8459         get_fsd_fn = self.cfg.GetFileStorageDir
8460
8461       cfg_storagedir = get_fsd_fn()
8462       if not cfg_storagedir:
8463         raise errors.OpPrereqError("Cluster file storage dir not defined")
8464       joinargs.append(cfg_storagedir)
8465
8466       if self.op.file_storage_dir is not None:
8467         joinargs.append(self.op.file_storage_dir)
8468
8469       joinargs.append(self.op.instance_name)
8470
8471       # pylint: disable-msg=W0142
8472       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8473
8474   def CheckPrereq(self):
8475     """Check prerequisites.
8476
8477     """
8478     self._CalculateFileStorageDir()
8479
8480     if self.op.mode == constants.INSTANCE_IMPORT:
8481       export_info = self._ReadExportInfo()
8482       self._ReadExportParams(export_info)
8483
8484     if (not self.cfg.GetVGName() and
8485         self.op.disk_template not in constants.DTS_NOT_LVM):
8486       raise errors.OpPrereqError("Cluster does not support lvm-based"
8487                                  " instances", errors.ECODE_STATE)
8488
8489     if self.op.hypervisor is None:
8490       self.op.hypervisor = self.cfg.GetHypervisorType()
8491
8492     cluster = self.cfg.GetClusterInfo()
8493     enabled_hvs = cluster.enabled_hypervisors
8494     if self.op.hypervisor not in enabled_hvs:
8495       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8496                                  " cluster (%s)" % (self.op.hypervisor,
8497                                   ",".join(enabled_hvs)),
8498                                  errors.ECODE_STATE)
8499
8500     # Check tag validity
8501     for tag in self.op.tags:
8502       objects.TaggableObject.ValidateTag(tag)
8503
8504     # check hypervisor parameter syntax (locally)
8505     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8506     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8507                                       self.op.hvparams)
8508     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8509     hv_type.CheckParameterSyntax(filled_hvp)
8510     self.hv_full = filled_hvp
8511     # check that we don't specify global parameters on an instance
8512     _CheckGlobalHvParams(self.op.hvparams)
8513
8514     # fill and remember the beparams dict
8515     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8516     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8517
8518     # build os parameters
8519     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8520
8521     # now that hvp/bep are in final format, let's reset to defaults,
8522     # if told to do so
8523     if self.op.identify_defaults:
8524       self._RevertToDefaults(cluster)
8525
8526     # NIC buildup
8527     self.nics = []
8528     for idx, nic in enumerate(self.op.nics):
8529       nic_mode_req = nic.get(constants.INIC_MODE, None)
8530       nic_mode = nic_mode_req
8531       if nic_mode is None:
8532         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8533
8534       # in routed mode, for the first nic, the default ip is 'auto'
8535       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8536         default_ip_mode = constants.VALUE_AUTO
8537       else:
8538         default_ip_mode = constants.VALUE_NONE
8539
8540       # ip validity checks
8541       ip = nic.get(constants.INIC_IP, default_ip_mode)
8542       if ip is None or ip.lower() == constants.VALUE_NONE:
8543         nic_ip = None
8544       elif ip.lower() == constants.VALUE_AUTO:
8545         if not self.op.name_check:
8546           raise errors.OpPrereqError("IP address set to auto but name checks"
8547                                      " have been skipped",
8548                                      errors.ECODE_INVAL)
8549         nic_ip = self.hostname1.ip
8550       else:
8551         if not netutils.IPAddress.IsValid(ip):
8552           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8553                                      errors.ECODE_INVAL)
8554         nic_ip = ip
8555
8556       # TODO: check the ip address for uniqueness
8557       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8558         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8559                                    errors.ECODE_INVAL)
8560
8561       # MAC address verification
8562       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8563       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8564         mac = utils.NormalizeAndValidateMac(mac)
8565
8566         try:
8567           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8568         except errors.ReservationError:
8569           raise errors.OpPrereqError("MAC address %s already in use"
8570                                      " in cluster" % mac,
8571                                      errors.ECODE_NOTUNIQUE)
8572
8573       #  Build nic parameters
8574       link = nic.get(constants.INIC_LINK, None)
8575       nicparams = {}
8576       if nic_mode_req:
8577         nicparams[constants.NIC_MODE] = nic_mode_req
8578       if link:
8579         nicparams[constants.NIC_LINK] = link
8580
8581       check_params = cluster.SimpleFillNIC(nicparams)
8582       objects.NIC.CheckParameterSyntax(check_params)
8583       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8584
8585     # disk checks/pre-build
8586     default_vg = self.cfg.GetVGName()
8587     self.disks = []
8588     for disk in self.op.disks:
8589       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8590       if mode not in constants.DISK_ACCESS_SET:
8591         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8592                                    mode, errors.ECODE_INVAL)
8593       size = disk.get(constants.IDISK_SIZE, None)
8594       if size is None:
8595         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8596       try:
8597         size = int(size)
8598       except (TypeError, ValueError):
8599         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8600                                    errors.ECODE_INVAL)
8601
8602       data_vg = disk.get(constants.IDISK_VG, default_vg)
8603       new_disk = {
8604         constants.IDISK_SIZE: size,
8605         constants.IDISK_MODE: mode,
8606         constants.IDISK_VG: data_vg,
8607         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8608         }
8609       if constants.IDISK_ADOPT in disk:
8610         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8611       self.disks.append(new_disk)
8612
8613     if self.op.mode == constants.INSTANCE_IMPORT:
8614
8615       # Check that the new instance doesn't have less disks than the export
8616       instance_disks = len(self.disks)
8617       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8618       if instance_disks < export_disks:
8619         raise errors.OpPrereqError("Not enough disks to import."
8620                                    " (instance: %d, export: %d)" %
8621                                    (instance_disks, export_disks),
8622                                    errors.ECODE_INVAL)
8623
8624       disk_images = []
8625       for idx in range(export_disks):
8626         option = "disk%d_dump" % idx
8627         if export_info.has_option(constants.INISECT_INS, option):
8628           # FIXME: are the old os-es, disk sizes, etc. useful?
8629           export_name = export_info.get(constants.INISECT_INS, option)
8630           image = utils.PathJoin(self.op.src_path, export_name)
8631           disk_images.append(image)
8632         else:
8633           disk_images.append(False)
8634
8635       self.src_images = disk_images
8636
8637       old_name = export_info.get(constants.INISECT_INS, "name")
8638       try:
8639         exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8640       except (TypeError, ValueError), err:
8641         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8642                                    " an integer: %s" % str(err),
8643                                    errors.ECODE_STATE)
8644       if self.op.instance_name == old_name:
8645         for idx, nic in enumerate(self.nics):
8646           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8647             nic_mac_ini = "nic%d_mac" % idx
8648             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8649
8650     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8651
8652     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8653     if self.op.ip_check:
8654       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8655         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8656                                    (self.check_ip, self.op.instance_name),
8657                                    errors.ECODE_NOTUNIQUE)
8658
8659     #### mac address generation
8660     # By generating here the mac address both the allocator and the hooks get
8661     # the real final mac address rather than the 'auto' or 'generate' value.
8662     # There is a race condition between the generation and the instance object
8663     # creation, which means that we know the mac is valid now, but we're not
8664     # sure it will be when we actually add the instance. If things go bad
8665     # adding the instance will abort because of a duplicate mac, and the
8666     # creation job will fail.
8667     for nic in self.nics:
8668       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8669         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8670
8671     #### allocator run
8672
8673     if self.op.iallocator is not None:
8674       self._RunAllocator()
8675
8676     #### node related checks
8677
8678     # check primary node
8679     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8680     assert self.pnode is not None, \
8681       "Cannot retrieve locked node %s" % self.op.pnode
8682     if pnode.offline:
8683       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8684                                  pnode.name, errors.ECODE_STATE)
8685     if pnode.drained:
8686       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8687                                  pnode.name, errors.ECODE_STATE)
8688     if not pnode.vm_capable:
8689       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8690                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8691
8692     self.secondaries = []
8693
8694     # mirror node verification
8695     if self.op.disk_template in constants.DTS_INT_MIRROR:
8696       if self.op.snode == pnode.name:
8697         raise errors.OpPrereqError("The secondary node cannot be the"
8698                                    " primary node", errors.ECODE_INVAL)
8699       _CheckNodeOnline(self, self.op.snode)
8700       _CheckNodeNotDrained(self, self.op.snode)
8701       _CheckNodeVmCapable(self, self.op.snode)
8702       self.secondaries.append(self.op.snode)
8703
8704     nodenames = [pnode.name] + self.secondaries
8705
8706     if not self.adopt_disks:
8707       # Check lv size requirements, if not adopting
8708       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8709       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8710
8711     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8712       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8713                                 disk[constants.IDISK_ADOPT])
8714                      for disk in self.disks])
8715       if len(all_lvs) != len(self.disks):
8716         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8717                                    errors.ECODE_INVAL)
8718       for lv_name in all_lvs:
8719         try:
8720           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8721           # to ReserveLV uses the same syntax
8722           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8723         except errors.ReservationError:
8724           raise errors.OpPrereqError("LV named %s used by another instance" %
8725                                      lv_name, errors.ECODE_NOTUNIQUE)
8726
8727       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8728       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8729
8730       node_lvs = self.rpc.call_lv_list([pnode.name],
8731                                        vg_names.payload.keys())[pnode.name]
8732       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8733       node_lvs = node_lvs.payload
8734
8735       delta = all_lvs.difference(node_lvs.keys())
8736       if delta:
8737         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8738                                    utils.CommaJoin(delta),
8739                                    errors.ECODE_INVAL)
8740       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8741       if online_lvs:
8742         raise errors.OpPrereqError("Online logical volumes found, cannot"
8743                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8744                                    errors.ECODE_STATE)
8745       # update the size of disk based on what is found
8746       for dsk in self.disks:
8747         dsk[constants.IDISK_SIZE] = \
8748           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8749                                         dsk[constants.IDISK_ADOPT])][0]))
8750
8751     elif self.op.disk_template == constants.DT_BLOCK:
8752       # Normalize and de-duplicate device paths
8753       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8754                        for disk in self.disks])
8755       if len(all_disks) != len(self.disks):
8756         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8757                                    errors.ECODE_INVAL)
8758       baddisks = [d for d in all_disks
8759                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8760       if baddisks:
8761         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8762                                    " cannot be adopted" %
8763                                    (", ".join(baddisks),
8764                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8765                                    errors.ECODE_INVAL)
8766
8767       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8768                                             list(all_disks))[pnode.name]
8769       node_disks.Raise("Cannot get block device information from node %s" %
8770                        pnode.name)
8771       node_disks = node_disks.payload
8772       delta = all_disks.difference(node_disks.keys())
8773       if delta:
8774         raise errors.OpPrereqError("Missing block device(s): %s" %
8775                                    utils.CommaJoin(delta),
8776                                    errors.ECODE_INVAL)
8777       for dsk in self.disks:
8778         dsk[constants.IDISK_SIZE] = \
8779           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8780
8781     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8782
8783     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8784     # check OS parameters (remotely)
8785     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8786
8787     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8788
8789     # memory check on primary node
8790     if self.op.start:
8791       _CheckNodeFreeMemory(self, self.pnode.name,
8792                            "creating instance %s" % self.op.instance_name,
8793                            self.be_full[constants.BE_MEMORY],
8794                            self.op.hypervisor)
8795
8796     self.dry_run_result = list(nodenames)
8797
8798   def Exec(self, feedback_fn):
8799     """Create and add the instance to the cluster.
8800
8801     """
8802     instance = self.op.instance_name
8803     pnode_name = self.pnode.name
8804
8805     ht_kind = self.op.hypervisor
8806     if ht_kind in constants.HTS_REQ_PORT:
8807       network_port = self.cfg.AllocatePort()
8808     else:
8809       network_port = None
8810
8811     disks = _GenerateDiskTemplate(self,
8812                                   self.op.disk_template,
8813                                   instance, pnode_name,
8814                                   self.secondaries,
8815                                   self.disks,
8816                                   self.instance_file_storage_dir,
8817                                   self.op.file_driver,
8818                                   0,
8819                                   feedback_fn)
8820
8821     iobj = objects.Instance(name=instance, os=self.op.os_type,
8822                             primary_node=pnode_name,
8823                             nics=self.nics, disks=disks,
8824                             disk_template=self.op.disk_template,
8825                             admin_up=False,
8826                             network_port=network_port,
8827                             beparams=self.op.beparams,
8828                             hvparams=self.op.hvparams,
8829                             hypervisor=self.op.hypervisor,
8830                             osparams=self.op.osparams,
8831                             )
8832
8833     if self.op.tags:
8834       for tag in self.op.tags:
8835         iobj.AddTag(tag)
8836
8837     if self.adopt_disks:
8838       if self.op.disk_template == constants.DT_PLAIN:
8839         # rename LVs to the newly-generated names; we need to construct
8840         # 'fake' LV disks with the old data, plus the new unique_id
8841         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8842         rename_to = []
8843         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8844           rename_to.append(t_dsk.logical_id)
8845           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8846           self.cfg.SetDiskID(t_dsk, pnode_name)
8847         result = self.rpc.call_blockdev_rename(pnode_name,
8848                                                zip(tmp_disks, rename_to))
8849         result.Raise("Failed to rename adoped LVs")
8850     else:
8851       feedback_fn("* creating instance disks...")
8852       try:
8853         _CreateDisks(self, iobj)
8854       except errors.OpExecError:
8855         self.LogWarning("Device creation failed, reverting...")
8856         try:
8857           _RemoveDisks(self, iobj)
8858         finally:
8859           self.cfg.ReleaseDRBDMinors(instance)
8860           raise
8861
8862     feedback_fn("adding instance %s to cluster config" % instance)
8863
8864     self.cfg.AddInstance(iobj, self.proc.GetECId())
8865
8866     # Declare that we don't want to remove the instance lock anymore, as we've
8867     # added the instance to the config
8868     del self.remove_locks[locking.LEVEL_INSTANCE]
8869
8870     if self.op.mode == constants.INSTANCE_IMPORT:
8871       # Release unused nodes
8872       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8873     else:
8874       # Release all nodes
8875       _ReleaseLocks(self, locking.LEVEL_NODE)
8876
8877     disk_abort = False
8878     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8879       feedback_fn("* wiping instance disks...")
8880       try:
8881         _WipeDisks(self, iobj)
8882       except errors.OpExecError, err:
8883         logging.exception("Wiping disks failed")
8884         self.LogWarning("Wiping instance disks failed (%s)", err)
8885         disk_abort = True
8886
8887     if disk_abort:
8888       # Something is already wrong with the disks, don't do anything else
8889       pass
8890     elif self.op.wait_for_sync:
8891       disk_abort = not _WaitForSync(self, iobj)
8892     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8893       # make sure the disks are not degraded (still sync-ing is ok)
8894       feedback_fn("* checking mirrors status")
8895       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8896     else:
8897       disk_abort = False
8898
8899     if disk_abort:
8900       _RemoveDisks(self, iobj)
8901       self.cfg.RemoveInstance(iobj.name)
8902       # Make sure the instance lock gets removed
8903       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8904       raise errors.OpExecError("There are some degraded disks for"
8905                                " this instance")
8906
8907     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8908       if self.op.mode == constants.INSTANCE_CREATE:
8909         if not self.op.no_install:
8910           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8911                         not self.op.wait_for_sync)
8912           if pause_sync:
8913             feedback_fn("* pausing disk sync to install instance OS")
8914             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8915                                                               iobj.disks, True)
8916             for idx, success in enumerate(result.payload):
8917               if not success:
8918                 logging.warn("pause-sync of instance %s for disk %d failed",
8919                              instance, idx)
8920
8921           feedback_fn("* running the instance OS create scripts...")
8922           # FIXME: pass debug option from opcode to backend
8923           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8924                                                  self.op.debug_level)
8925           if pause_sync:
8926             feedback_fn("* resuming disk sync")
8927             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8928                                                               iobj.disks, False)
8929             for idx, success in enumerate(result.payload):
8930               if not success:
8931                 logging.warn("resume-sync of instance %s for disk %d failed",
8932                              instance, idx)
8933
8934           result.Raise("Could not add os for instance %s"
8935                        " on node %s" % (instance, pnode_name))
8936
8937       elif self.op.mode == constants.INSTANCE_IMPORT:
8938         feedback_fn("* running the instance OS import scripts...")
8939
8940         transfers = []
8941
8942         for idx, image in enumerate(self.src_images):
8943           if not image:
8944             continue
8945
8946           # FIXME: pass debug option from opcode to backend
8947           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8948                                              constants.IEIO_FILE, (image, ),
8949                                              constants.IEIO_SCRIPT,
8950                                              (iobj.disks[idx], idx),
8951                                              None)
8952           transfers.append(dt)
8953
8954         import_result = \
8955           masterd.instance.TransferInstanceData(self, feedback_fn,
8956                                                 self.op.src_node, pnode_name,
8957                                                 self.pnode.secondary_ip,
8958                                                 iobj, transfers)
8959         if not compat.all(import_result):
8960           self.LogWarning("Some disks for instance %s on node %s were not"
8961                           " imported successfully" % (instance, pnode_name))
8962
8963       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8964         feedback_fn("* preparing remote import...")
8965         # The source cluster will stop the instance before attempting to make a
8966         # connection. In some cases stopping an instance can take a long time,
8967         # hence the shutdown timeout is added to the connection timeout.
8968         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8969                            self.op.source_shutdown_timeout)
8970         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8971
8972         assert iobj.primary_node == self.pnode.name
8973         disk_results = \
8974           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8975                                         self.source_x509_ca,
8976                                         self._cds, timeouts)
8977         if not compat.all(disk_results):
8978           # TODO: Should the instance still be started, even if some disks
8979           # failed to import (valid for local imports, too)?
8980           self.LogWarning("Some disks for instance %s on node %s were not"
8981                           " imported successfully" % (instance, pnode_name))
8982
8983         # Run rename script on newly imported instance
8984         assert iobj.name == instance
8985         feedback_fn("Running rename script for %s" % instance)
8986         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8987                                                    self.source_instance_name,
8988                                                    self.op.debug_level)
8989         if result.fail_msg:
8990           self.LogWarning("Failed to run rename script for %s on node"
8991                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8992
8993       else:
8994         # also checked in the prereq part
8995         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8996                                      % self.op.mode)
8997
8998     if self.op.start:
8999       iobj.admin_up = True
9000       self.cfg.Update(iobj, feedback_fn)
9001       logging.info("Starting instance %s on node %s", instance, pnode_name)
9002       feedback_fn("* starting instance...")
9003       result = self.rpc.call_instance_start(pnode_name, iobj,
9004                                             None, None, False)
9005       result.Raise("Could not start instance")
9006
9007     return list(iobj.all_nodes)
9008
9009
9010 class LUInstanceConsole(NoHooksLU):
9011   """Connect to an instance's console.
9012
9013   This is somewhat special in that it returns the command line that
9014   you need to run on the master node in order to connect to the
9015   console.
9016
9017   """
9018   REQ_BGL = False
9019
9020   def ExpandNames(self):
9021     self._ExpandAndLockInstance()
9022
9023   def CheckPrereq(self):
9024     """Check prerequisites.
9025
9026     This checks that the instance is in the cluster.
9027
9028     """
9029     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9030     assert self.instance is not None, \
9031       "Cannot retrieve locked instance %s" % self.op.instance_name
9032     _CheckNodeOnline(self, self.instance.primary_node)
9033
9034   def Exec(self, feedback_fn):
9035     """Connect to the console of an instance
9036
9037     """
9038     instance = self.instance
9039     node = instance.primary_node
9040
9041     node_insts = self.rpc.call_instance_list([node],
9042                                              [instance.hypervisor])[node]
9043     node_insts.Raise("Can't get node information from %s" % node)
9044
9045     if instance.name not in node_insts.payload:
9046       if instance.admin_up:
9047         state = constants.INSTST_ERRORDOWN
9048       else:
9049         state = constants.INSTST_ADMINDOWN
9050       raise errors.OpExecError("Instance %s is not running (state %s)" %
9051                                (instance.name, state))
9052
9053     logging.debug("Connecting to console of %s on %s", instance.name, node)
9054
9055     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9056
9057
9058 def _GetInstanceConsole(cluster, instance):
9059   """Returns console information for an instance.
9060
9061   @type cluster: L{objects.Cluster}
9062   @type instance: L{objects.Instance}
9063   @rtype: dict
9064
9065   """
9066   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9067   # beparams and hvparams are passed separately, to avoid editing the
9068   # instance and then saving the defaults in the instance itself.
9069   hvparams = cluster.FillHV(instance)
9070   beparams = cluster.FillBE(instance)
9071   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9072
9073   assert console.instance == instance.name
9074   assert console.Validate()
9075
9076   return console.ToDict()
9077
9078
9079 class LUInstanceReplaceDisks(LogicalUnit):
9080   """Replace the disks of an instance.
9081
9082   """
9083   HPATH = "mirrors-replace"
9084   HTYPE = constants.HTYPE_INSTANCE
9085   REQ_BGL = False
9086
9087   def CheckArguments(self):
9088     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9089                                   self.op.iallocator)
9090
9091   def ExpandNames(self):
9092     self._ExpandAndLockInstance()
9093
9094     assert locking.LEVEL_NODE not in self.needed_locks
9095     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9096
9097     assert self.op.iallocator is None or self.op.remote_node is None, \
9098       "Conflicting options"
9099
9100     if self.op.remote_node is not None:
9101       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9102
9103       # Warning: do not remove the locking of the new secondary here
9104       # unless DRBD8.AddChildren is changed to work in parallel;
9105       # currently it doesn't since parallel invocations of
9106       # FindUnusedMinor will conflict
9107       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9108       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9109     else:
9110       self.needed_locks[locking.LEVEL_NODE] = []
9111       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9112
9113       if self.op.iallocator is not None:
9114         # iallocator will select a new node in the same group
9115         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9116
9117     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9118                                    self.op.iallocator, self.op.remote_node,
9119                                    self.op.disks, False, self.op.early_release)
9120
9121     self.tasklets = [self.replacer]
9122
9123   def DeclareLocks(self, level):
9124     if level == locking.LEVEL_NODEGROUP:
9125       assert self.op.remote_node is None
9126       assert self.op.iallocator is not None
9127       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9128
9129       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9130       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9131         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9132
9133     elif level == locking.LEVEL_NODE:
9134       if self.op.iallocator is not None:
9135         assert self.op.remote_node is None
9136         assert not self.needed_locks[locking.LEVEL_NODE]
9137
9138         # Lock member nodes of all locked groups
9139         self.needed_locks[locking.LEVEL_NODE] = [node_name
9140           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9141           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9142       else:
9143         self._LockInstancesNodes()
9144
9145   def BuildHooksEnv(self):
9146     """Build hooks env.
9147
9148     This runs on the master, the primary and all the secondaries.
9149
9150     """
9151     instance = self.replacer.instance
9152     env = {
9153       "MODE": self.op.mode,
9154       "NEW_SECONDARY": self.op.remote_node,
9155       "OLD_SECONDARY": instance.secondary_nodes[0],
9156       }
9157     env.update(_BuildInstanceHookEnvByObject(self, instance))
9158     return env
9159
9160   def BuildHooksNodes(self):
9161     """Build hooks nodes.
9162
9163     """
9164     instance = self.replacer.instance
9165     nl = [
9166       self.cfg.GetMasterNode(),
9167       instance.primary_node,
9168       ]
9169     if self.op.remote_node is not None:
9170       nl.append(self.op.remote_node)
9171     return nl, nl
9172
9173   def CheckPrereq(self):
9174     """Check prerequisites.
9175
9176     """
9177     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9178             self.op.iallocator is None)
9179
9180     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9181     if owned_groups:
9182       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9183
9184     return LogicalUnit.CheckPrereq(self)
9185
9186
9187 class TLReplaceDisks(Tasklet):
9188   """Replaces disks for an instance.
9189
9190   Note: Locking is not within the scope of this class.
9191
9192   """
9193   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9194                disks, delay_iallocator, early_release):
9195     """Initializes this class.
9196
9197     """
9198     Tasklet.__init__(self, lu)
9199
9200     # Parameters
9201     self.instance_name = instance_name
9202     self.mode = mode
9203     self.iallocator_name = iallocator_name
9204     self.remote_node = remote_node
9205     self.disks = disks
9206     self.delay_iallocator = delay_iallocator
9207     self.early_release = early_release
9208
9209     # Runtime data
9210     self.instance = None
9211     self.new_node = None
9212     self.target_node = None
9213     self.other_node = None
9214     self.remote_node_info = None
9215     self.node_secondary_ip = None
9216
9217   @staticmethod
9218   def CheckArguments(mode, remote_node, iallocator):
9219     """Helper function for users of this class.
9220
9221     """
9222     # check for valid parameter combination
9223     if mode == constants.REPLACE_DISK_CHG:
9224       if remote_node is None and iallocator is None:
9225         raise errors.OpPrereqError("When changing the secondary either an"
9226                                    " iallocator script must be used or the"
9227                                    " new node given", errors.ECODE_INVAL)
9228
9229       if remote_node is not None and iallocator is not None:
9230         raise errors.OpPrereqError("Give either the iallocator or the new"
9231                                    " secondary, not both", errors.ECODE_INVAL)
9232
9233     elif remote_node is not None or iallocator is not None:
9234       # Not replacing the secondary
9235       raise errors.OpPrereqError("The iallocator and new node options can"
9236                                  " only be used when changing the"
9237                                  " secondary node", errors.ECODE_INVAL)
9238
9239   @staticmethod
9240   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9241     """Compute a new secondary node using an IAllocator.
9242
9243     """
9244     ial = IAllocator(lu.cfg, lu.rpc,
9245                      mode=constants.IALLOCATOR_MODE_RELOC,
9246                      name=instance_name,
9247                      relocate_from=list(relocate_from))
9248
9249     ial.Run(iallocator_name)
9250
9251     if not ial.success:
9252       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9253                                  " %s" % (iallocator_name, ial.info),
9254                                  errors.ECODE_NORES)
9255
9256     if len(ial.result) != ial.required_nodes:
9257       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9258                                  " of nodes (%s), required %s" %
9259                                  (iallocator_name,
9260                                   len(ial.result), ial.required_nodes),
9261                                  errors.ECODE_FAULT)
9262
9263     remote_node_name = ial.result[0]
9264
9265     lu.LogInfo("Selected new secondary for instance '%s': %s",
9266                instance_name, remote_node_name)
9267
9268     return remote_node_name
9269
9270   def _FindFaultyDisks(self, node_name):
9271     """Wrapper for L{_FindFaultyInstanceDisks}.
9272
9273     """
9274     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9275                                     node_name, True)
9276
9277   def _CheckDisksActivated(self, instance):
9278     """Checks if the instance disks are activated.
9279
9280     @param instance: The instance to check disks
9281     @return: True if they are activated, False otherwise
9282
9283     """
9284     nodes = instance.all_nodes
9285
9286     for idx, dev in enumerate(instance.disks):
9287       for node in nodes:
9288         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9289         self.cfg.SetDiskID(dev, node)
9290
9291         result = self.rpc.call_blockdev_find(node, dev)
9292
9293         if result.offline:
9294           continue
9295         elif result.fail_msg or not result.payload:
9296           return False
9297
9298     return True
9299
9300   def CheckPrereq(self):
9301     """Check prerequisites.
9302
9303     This checks that the instance is in the cluster.
9304
9305     """
9306     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9307     assert instance is not None, \
9308       "Cannot retrieve locked instance %s" % self.instance_name
9309
9310     if instance.disk_template != constants.DT_DRBD8:
9311       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9312                                  " instances", errors.ECODE_INVAL)
9313
9314     if len(instance.secondary_nodes) != 1:
9315       raise errors.OpPrereqError("The instance has a strange layout,"
9316                                  " expected one secondary but found %d" %
9317                                  len(instance.secondary_nodes),
9318                                  errors.ECODE_FAULT)
9319
9320     if not self.delay_iallocator:
9321       self._CheckPrereq2()
9322
9323   def _CheckPrereq2(self):
9324     """Check prerequisites, second part.
9325
9326     This function should always be part of CheckPrereq. It was separated and is
9327     now called from Exec because during node evacuation iallocator was only
9328     called with an unmodified cluster model, not taking planned changes into
9329     account.
9330
9331     """
9332     instance = self.instance
9333     secondary_node = instance.secondary_nodes[0]
9334
9335     if self.iallocator_name is None:
9336       remote_node = self.remote_node
9337     else:
9338       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9339                                        instance.name, instance.secondary_nodes)
9340
9341     if remote_node is None:
9342       self.remote_node_info = None
9343     else:
9344       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9345              "Remote node '%s' is not locked" % remote_node
9346
9347       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9348       assert self.remote_node_info is not None, \
9349         "Cannot retrieve locked node %s" % remote_node
9350
9351     if remote_node == self.instance.primary_node:
9352       raise errors.OpPrereqError("The specified node is the primary node of"
9353                                  " the instance", errors.ECODE_INVAL)
9354
9355     if remote_node == secondary_node:
9356       raise errors.OpPrereqError("The specified node is already the"
9357                                  " secondary node of the instance",
9358                                  errors.ECODE_INVAL)
9359
9360     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9361                                     constants.REPLACE_DISK_CHG):
9362       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9363                                  errors.ECODE_INVAL)
9364
9365     if self.mode == constants.REPLACE_DISK_AUTO:
9366       if not self._CheckDisksActivated(instance):
9367         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9368                                    " first" % self.instance_name,
9369                                    errors.ECODE_STATE)
9370       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9371       faulty_secondary = self._FindFaultyDisks(secondary_node)
9372
9373       if faulty_primary and faulty_secondary:
9374         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9375                                    " one node and can not be repaired"
9376                                    " automatically" % self.instance_name,
9377                                    errors.ECODE_STATE)
9378
9379       if faulty_primary:
9380         self.disks = faulty_primary
9381         self.target_node = instance.primary_node
9382         self.other_node = secondary_node
9383         check_nodes = [self.target_node, self.other_node]
9384       elif faulty_secondary:
9385         self.disks = faulty_secondary
9386         self.target_node = secondary_node
9387         self.other_node = instance.primary_node
9388         check_nodes = [self.target_node, self.other_node]
9389       else:
9390         self.disks = []
9391         check_nodes = []
9392
9393     else:
9394       # Non-automatic modes
9395       if self.mode == constants.REPLACE_DISK_PRI:
9396         self.target_node = instance.primary_node
9397         self.other_node = secondary_node
9398         check_nodes = [self.target_node, self.other_node]
9399
9400       elif self.mode == constants.REPLACE_DISK_SEC:
9401         self.target_node = secondary_node
9402         self.other_node = instance.primary_node
9403         check_nodes = [self.target_node, self.other_node]
9404
9405       elif self.mode == constants.REPLACE_DISK_CHG:
9406         self.new_node = remote_node
9407         self.other_node = instance.primary_node
9408         self.target_node = secondary_node
9409         check_nodes = [self.new_node, self.other_node]
9410
9411         _CheckNodeNotDrained(self.lu, remote_node)
9412         _CheckNodeVmCapable(self.lu, remote_node)
9413
9414         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9415         assert old_node_info is not None
9416         if old_node_info.offline and not self.early_release:
9417           # doesn't make sense to delay the release
9418           self.early_release = True
9419           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9420                           " early-release mode", secondary_node)
9421
9422       else:
9423         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9424                                      self.mode)
9425
9426       # If not specified all disks should be replaced
9427       if not self.disks:
9428         self.disks = range(len(self.instance.disks))
9429
9430     for node in check_nodes:
9431       _CheckNodeOnline(self.lu, node)
9432
9433     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9434                                                           self.other_node,
9435                                                           self.target_node]
9436                               if node_name is not None)
9437
9438     # Release unneeded node locks
9439     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9440
9441     # Release any owned node group
9442     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9443       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9444
9445     # Check whether disks are valid
9446     for disk_idx in self.disks:
9447       instance.FindDisk(disk_idx)
9448
9449     # Get secondary node IP addresses
9450     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9451                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9452
9453   def Exec(self, feedback_fn):
9454     """Execute disk replacement.
9455
9456     This dispatches the disk replacement to the appropriate handler.
9457
9458     """
9459     if self.delay_iallocator:
9460       self._CheckPrereq2()
9461
9462     if __debug__:
9463       # Verify owned locks before starting operation
9464       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9465       assert set(owned_nodes) == set(self.node_secondary_ip), \
9466           ("Incorrect node locks, owning %s, expected %s" %
9467            (owned_nodes, self.node_secondary_ip.keys()))
9468
9469       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9470       assert list(owned_instances) == [self.instance_name], \
9471           "Instance '%s' not locked" % self.instance_name
9472
9473       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9474           "Should not own any node group lock at this point"
9475
9476     if not self.disks:
9477       feedback_fn("No disks need replacement")
9478       return
9479
9480     feedback_fn("Replacing disk(s) %s for %s" %
9481                 (utils.CommaJoin(self.disks), self.instance.name))
9482
9483     activate_disks = (not self.instance.admin_up)
9484
9485     # Activate the instance disks if we're replacing them on a down instance
9486     if activate_disks:
9487       _StartInstanceDisks(self.lu, self.instance, True)
9488
9489     try:
9490       # Should we replace the secondary node?
9491       if self.new_node is not None:
9492         fn = self._ExecDrbd8Secondary
9493       else:
9494         fn = self._ExecDrbd8DiskOnly
9495
9496       result = fn(feedback_fn)
9497     finally:
9498       # Deactivate the instance disks if we're replacing them on a
9499       # down instance
9500       if activate_disks:
9501         _SafeShutdownInstanceDisks(self.lu, self.instance)
9502
9503     if __debug__:
9504       # Verify owned locks
9505       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9506       nodes = frozenset(self.node_secondary_ip)
9507       assert ((self.early_release and not owned_nodes) or
9508               (not self.early_release and not (set(owned_nodes) - nodes))), \
9509         ("Not owning the correct locks, early_release=%s, owned=%r,"
9510          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9511
9512     return result
9513
9514   def _CheckVolumeGroup(self, nodes):
9515     self.lu.LogInfo("Checking volume groups")
9516
9517     vgname = self.cfg.GetVGName()
9518
9519     # Make sure volume group exists on all involved nodes
9520     results = self.rpc.call_vg_list(nodes)
9521     if not results:
9522       raise errors.OpExecError("Can't list volume groups on the nodes")
9523
9524     for node in nodes:
9525       res = results[node]
9526       res.Raise("Error checking node %s" % node)
9527       if vgname not in res.payload:
9528         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9529                                  (vgname, node))
9530
9531   def _CheckDisksExistence(self, nodes):
9532     # Check disk existence
9533     for idx, dev in enumerate(self.instance.disks):
9534       if idx not in self.disks:
9535         continue
9536
9537       for node in nodes:
9538         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9539         self.cfg.SetDiskID(dev, node)
9540
9541         result = self.rpc.call_blockdev_find(node, dev)
9542
9543         msg = result.fail_msg
9544         if msg or not result.payload:
9545           if not msg:
9546             msg = "disk not found"
9547           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9548                                    (idx, node, msg))
9549
9550   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9551     for idx, dev in enumerate(self.instance.disks):
9552       if idx not in self.disks:
9553         continue
9554
9555       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9556                       (idx, node_name))
9557
9558       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9559                                    ldisk=ldisk):
9560         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9561                                  " replace disks for instance %s" %
9562                                  (node_name, self.instance.name))
9563
9564   def _CreateNewStorage(self, node_name):
9565     """Create new storage on the primary or secondary node.
9566
9567     This is only used for same-node replaces, not for changing the
9568     secondary node, hence we don't want to modify the existing disk.
9569
9570     """
9571     iv_names = {}
9572
9573     for idx, dev in enumerate(self.instance.disks):
9574       if idx not in self.disks:
9575         continue
9576
9577       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9578
9579       self.cfg.SetDiskID(dev, node_name)
9580
9581       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9582       names = _GenerateUniqueNames(self.lu, lv_names)
9583
9584       vg_data = dev.children[0].logical_id[0]
9585       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9586                              logical_id=(vg_data, names[0]))
9587       vg_meta = dev.children[1].logical_id[0]
9588       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9589                              logical_id=(vg_meta, names[1]))
9590
9591       new_lvs = [lv_data, lv_meta]
9592       old_lvs = [child.Copy() for child in dev.children]
9593       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9594
9595       # we pass force_create=True to force the LVM creation
9596       for new_lv in new_lvs:
9597         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9598                         _GetInstanceInfoText(self.instance), False)
9599
9600     return iv_names
9601
9602   def _CheckDevices(self, node_name, iv_names):
9603     for name, (dev, _, _) in iv_names.iteritems():
9604       self.cfg.SetDiskID(dev, node_name)
9605
9606       result = self.rpc.call_blockdev_find(node_name, dev)
9607
9608       msg = result.fail_msg
9609       if msg or not result.payload:
9610         if not msg:
9611           msg = "disk not found"
9612         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9613                                  (name, msg))
9614
9615       if result.payload.is_degraded:
9616         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9617
9618   def _RemoveOldStorage(self, node_name, iv_names):
9619     for name, (_, old_lvs, _) in iv_names.iteritems():
9620       self.lu.LogInfo("Remove logical volumes for %s" % name)
9621
9622       for lv in old_lvs:
9623         self.cfg.SetDiskID(lv, node_name)
9624
9625         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9626         if msg:
9627           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9628                              hint="remove unused LVs manually")
9629
9630   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9631     """Replace a disk on the primary or secondary for DRBD 8.
9632
9633     The algorithm for replace is quite complicated:
9634
9635       1. for each disk to be replaced:
9636
9637         1. create new LVs on the target node with unique names
9638         1. detach old LVs from the drbd device
9639         1. rename old LVs to name_replaced.<time_t>
9640         1. rename new LVs to old LVs
9641         1. attach the new LVs (with the old names now) to the drbd device
9642
9643       1. wait for sync across all devices
9644
9645       1. for each modified disk:
9646
9647         1. remove old LVs (which have the name name_replaces.<time_t>)
9648
9649     Failures are not very well handled.
9650
9651     """
9652     steps_total = 6
9653
9654     # Step: check device activation
9655     self.lu.LogStep(1, steps_total, "Check device existence")
9656     self._CheckDisksExistence([self.other_node, self.target_node])
9657     self._CheckVolumeGroup([self.target_node, self.other_node])
9658
9659     # Step: check other node consistency
9660     self.lu.LogStep(2, steps_total, "Check peer consistency")
9661     self._CheckDisksConsistency(self.other_node,
9662                                 self.other_node == self.instance.primary_node,
9663                                 False)
9664
9665     # Step: create new storage
9666     self.lu.LogStep(3, steps_total, "Allocate new storage")
9667     iv_names = self._CreateNewStorage(self.target_node)
9668
9669     # Step: for each lv, detach+rename*2+attach
9670     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9671     for dev, old_lvs, new_lvs in iv_names.itervalues():
9672       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9673
9674       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9675                                                      old_lvs)
9676       result.Raise("Can't detach drbd from local storage on node"
9677                    " %s for device %s" % (self.target_node, dev.iv_name))
9678       #dev.children = []
9679       #cfg.Update(instance)
9680
9681       # ok, we created the new LVs, so now we know we have the needed
9682       # storage; as such, we proceed on the target node to rename
9683       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9684       # using the assumption that logical_id == physical_id (which in
9685       # turn is the unique_id on that node)
9686
9687       # FIXME(iustin): use a better name for the replaced LVs
9688       temp_suffix = int(time.time())
9689       ren_fn = lambda d, suff: (d.physical_id[0],
9690                                 d.physical_id[1] + "_replaced-%s" % suff)
9691
9692       # Build the rename list based on what LVs exist on the node
9693       rename_old_to_new = []
9694       for to_ren in old_lvs:
9695         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9696         if not result.fail_msg and result.payload:
9697           # device exists
9698           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9699
9700       self.lu.LogInfo("Renaming the old LVs on the target node")
9701       result = self.rpc.call_blockdev_rename(self.target_node,
9702                                              rename_old_to_new)
9703       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9704
9705       # Now we rename the new LVs to the old LVs
9706       self.lu.LogInfo("Renaming the new LVs on the target node")
9707       rename_new_to_old = [(new, old.physical_id)
9708                            for old, new in zip(old_lvs, new_lvs)]
9709       result = self.rpc.call_blockdev_rename(self.target_node,
9710                                              rename_new_to_old)
9711       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9712
9713       # Intermediate steps of in memory modifications
9714       for old, new in zip(old_lvs, new_lvs):
9715         new.logical_id = old.logical_id
9716         self.cfg.SetDiskID(new, self.target_node)
9717
9718       # We need to modify old_lvs so that removal later removes the
9719       # right LVs, not the newly added ones; note that old_lvs is a
9720       # copy here
9721       for disk in old_lvs:
9722         disk.logical_id = ren_fn(disk, temp_suffix)
9723         self.cfg.SetDiskID(disk, self.target_node)
9724
9725       # Now that the new lvs have the old name, we can add them to the device
9726       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9727       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9728                                                   new_lvs)
9729       msg = result.fail_msg
9730       if msg:
9731         for new_lv in new_lvs:
9732           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9733                                                new_lv).fail_msg
9734           if msg2:
9735             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9736                                hint=("cleanup manually the unused logical"
9737                                      "volumes"))
9738         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9739
9740     cstep = 5
9741     if self.early_release:
9742       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9743       cstep += 1
9744       self._RemoveOldStorage(self.target_node, iv_names)
9745       # WARNING: we release both node locks here, do not do other RPCs
9746       # than WaitForSync to the primary node
9747       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9748                     names=[self.target_node, self.other_node])
9749
9750     # Wait for sync
9751     # This can fail as the old devices are degraded and _WaitForSync
9752     # does a combined result over all disks, so we don't check its return value
9753     self.lu.LogStep(cstep, steps_total, "Sync devices")
9754     cstep += 1
9755     _WaitForSync(self.lu, self.instance)
9756
9757     # Check all devices manually
9758     self._CheckDevices(self.instance.primary_node, iv_names)
9759
9760     # Step: remove old storage
9761     if not self.early_release:
9762       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9763       cstep += 1
9764       self._RemoveOldStorage(self.target_node, iv_names)
9765
9766   def _ExecDrbd8Secondary(self, feedback_fn):
9767     """Replace the secondary node for DRBD 8.
9768
9769     The algorithm for replace is quite complicated:
9770       - for all disks of the instance:
9771         - create new LVs on the new node with same names
9772         - shutdown the drbd device on the old secondary
9773         - disconnect the drbd network on the primary
9774         - create the drbd device on the new secondary
9775         - network attach the drbd on the primary, using an artifice:
9776           the drbd code for Attach() will connect to the network if it
9777           finds a device which is connected to the good local disks but
9778           not network enabled
9779       - wait for sync across all devices
9780       - remove all disks from the old secondary
9781
9782     Failures are not very well handled.
9783
9784     """
9785     steps_total = 6
9786
9787     # Step: check device activation
9788     self.lu.LogStep(1, steps_total, "Check device existence")
9789     self._CheckDisksExistence([self.instance.primary_node])
9790     self._CheckVolumeGroup([self.instance.primary_node])
9791
9792     # Step: check other node consistency
9793     self.lu.LogStep(2, steps_total, "Check peer consistency")
9794     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9795
9796     # Step: create new storage
9797     self.lu.LogStep(3, steps_total, "Allocate new storage")
9798     for idx, dev in enumerate(self.instance.disks):
9799       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9800                       (self.new_node, idx))
9801       # we pass force_create=True to force LVM creation
9802       for new_lv in dev.children:
9803         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9804                         _GetInstanceInfoText(self.instance), False)
9805
9806     # Step 4: dbrd minors and drbd setups changes
9807     # after this, we must manually remove the drbd minors on both the
9808     # error and the success paths
9809     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9810     minors = self.cfg.AllocateDRBDMinor([self.new_node
9811                                          for dev in self.instance.disks],
9812                                         self.instance.name)
9813     logging.debug("Allocated minors %r", minors)
9814
9815     iv_names = {}
9816     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9817       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9818                       (self.new_node, idx))
9819       # create new devices on new_node; note that we create two IDs:
9820       # one without port, so the drbd will be activated without
9821       # networking information on the new node at this stage, and one
9822       # with network, for the latter activation in step 4
9823       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9824       if self.instance.primary_node == o_node1:
9825         p_minor = o_minor1
9826       else:
9827         assert self.instance.primary_node == o_node2, "Three-node instance?"
9828         p_minor = o_minor2
9829
9830       new_alone_id = (self.instance.primary_node, self.new_node, None,
9831                       p_minor, new_minor, o_secret)
9832       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9833                     p_minor, new_minor, o_secret)
9834
9835       iv_names[idx] = (dev, dev.children, new_net_id)
9836       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9837                     new_net_id)
9838       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9839                               logical_id=new_alone_id,
9840                               children=dev.children,
9841                               size=dev.size)
9842       try:
9843         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9844                               _GetInstanceInfoText(self.instance), False)
9845       except errors.GenericError:
9846         self.cfg.ReleaseDRBDMinors(self.instance.name)
9847         raise
9848
9849     # We have new devices, shutdown the drbd on the old secondary
9850     for idx, dev in enumerate(self.instance.disks):
9851       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9852       self.cfg.SetDiskID(dev, self.target_node)
9853       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9854       if msg:
9855         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9856                            "node: %s" % (idx, msg),
9857                            hint=("Please cleanup this device manually as"
9858                                  " soon as possible"))
9859
9860     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9861     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9862                                                self.node_secondary_ip,
9863                                                self.instance.disks)\
9864                                               [self.instance.primary_node]
9865
9866     msg = result.fail_msg
9867     if msg:
9868       # detaches didn't succeed (unlikely)
9869       self.cfg.ReleaseDRBDMinors(self.instance.name)
9870       raise errors.OpExecError("Can't detach the disks from the network on"
9871                                " old node: %s" % (msg,))
9872
9873     # if we managed to detach at least one, we update all the disks of
9874     # the instance to point to the new secondary
9875     self.lu.LogInfo("Updating instance configuration")
9876     for dev, _, new_logical_id in iv_names.itervalues():
9877       dev.logical_id = new_logical_id
9878       self.cfg.SetDiskID(dev, self.instance.primary_node)
9879
9880     self.cfg.Update(self.instance, feedback_fn)
9881
9882     # and now perform the drbd attach
9883     self.lu.LogInfo("Attaching primary drbds to new secondary"
9884                     " (standalone => connected)")
9885     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9886                                             self.new_node],
9887                                            self.node_secondary_ip,
9888                                            self.instance.disks,
9889                                            self.instance.name,
9890                                            False)
9891     for to_node, to_result in result.items():
9892       msg = to_result.fail_msg
9893       if msg:
9894         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9895                            to_node, msg,
9896                            hint=("please do a gnt-instance info to see the"
9897                                  " status of disks"))
9898     cstep = 5
9899     if self.early_release:
9900       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9901       cstep += 1
9902       self._RemoveOldStorage(self.target_node, iv_names)
9903       # WARNING: we release all node locks here, do not do other RPCs
9904       # than WaitForSync to the primary node
9905       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9906                     names=[self.instance.primary_node,
9907                            self.target_node,
9908                            self.new_node])
9909
9910     # Wait for sync
9911     # This can fail as the old devices are degraded and _WaitForSync
9912     # does a combined result over all disks, so we don't check its return value
9913     self.lu.LogStep(cstep, steps_total, "Sync devices")
9914     cstep += 1
9915     _WaitForSync(self.lu, self.instance)
9916
9917     # Check all devices manually
9918     self._CheckDevices(self.instance.primary_node, iv_names)
9919
9920     # Step: remove old storage
9921     if not self.early_release:
9922       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9923       self._RemoveOldStorage(self.target_node, iv_names)
9924
9925
9926 class LURepairNodeStorage(NoHooksLU):
9927   """Repairs the volume group on a node.
9928
9929   """
9930   REQ_BGL = False
9931
9932   def CheckArguments(self):
9933     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9934
9935     storage_type = self.op.storage_type
9936
9937     if (constants.SO_FIX_CONSISTENCY not in
9938         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9939       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9940                                  " repaired" % storage_type,
9941                                  errors.ECODE_INVAL)
9942
9943   def ExpandNames(self):
9944     self.needed_locks = {
9945       locking.LEVEL_NODE: [self.op.node_name],
9946       }
9947
9948   def _CheckFaultyDisks(self, instance, node_name):
9949     """Ensure faulty disks abort the opcode or at least warn."""
9950     try:
9951       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9952                                   node_name, True):
9953         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9954                                    " node '%s'" % (instance.name, node_name),
9955                                    errors.ECODE_STATE)
9956     except errors.OpPrereqError, err:
9957       if self.op.ignore_consistency:
9958         self.proc.LogWarning(str(err.args[0]))
9959       else:
9960         raise
9961
9962   def CheckPrereq(self):
9963     """Check prerequisites.
9964
9965     """
9966     # Check whether any instance on this node has faulty disks
9967     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9968       if not inst.admin_up:
9969         continue
9970       check_nodes = set(inst.all_nodes)
9971       check_nodes.discard(self.op.node_name)
9972       for inst_node_name in check_nodes:
9973         self._CheckFaultyDisks(inst, inst_node_name)
9974
9975   def Exec(self, feedback_fn):
9976     feedback_fn("Repairing storage unit '%s' on %s ..." %
9977                 (self.op.name, self.op.node_name))
9978
9979     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9980     result = self.rpc.call_storage_execute(self.op.node_name,
9981                                            self.op.storage_type, st_args,
9982                                            self.op.name,
9983                                            constants.SO_FIX_CONSISTENCY)
9984     result.Raise("Failed to repair storage unit '%s' on %s" %
9985                  (self.op.name, self.op.node_name))
9986
9987
9988 class LUNodeEvacuate(NoHooksLU):
9989   """Evacuates instances off a list of nodes.
9990
9991   """
9992   REQ_BGL = False
9993
9994   def CheckArguments(self):
9995     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9996
9997   def ExpandNames(self):
9998     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9999
10000     if self.op.remote_node is not None:
10001       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10002       assert self.op.remote_node
10003
10004       if self.op.remote_node == self.op.node_name:
10005         raise errors.OpPrereqError("Can not use evacuated node as a new"
10006                                    " secondary node", errors.ECODE_INVAL)
10007
10008       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10009         raise errors.OpPrereqError("Without the use of an iallocator only"
10010                                    " secondary instances can be evacuated",
10011                                    errors.ECODE_INVAL)
10012
10013     # Declare locks
10014     self.share_locks = _ShareAll()
10015     self.needed_locks = {
10016       locking.LEVEL_INSTANCE: [],
10017       locking.LEVEL_NODEGROUP: [],
10018       locking.LEVEL_NODE: [],
10019       }
10020
10021     if self.op.remote_node is None:
10022       # Iallocator will choose any node(s) in the same group
10023       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10024     else:
10025       group_nodes = frozenset([self.op.remote_node])
10026
10027     # Determine nodes to be locked
10028     self.lock_nodes = set([self.op.node_name]) | group_nodes
10029
10030   def _DetermineInstances(self):
10031     """Builds list of instances to operate on.
10032
10033     """
10034     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10035
10036     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10037       # Primary instances only
10038       inst_fn = _GetNodePrimaryInstances
10039       assert self.op.remote_node is None, \
10040         "Evacuating primary instances requires iallocator"
10041     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10042       # Secondary instances only
10043       inst_fn = _GetNodeSecondaryInstances
10044     else:
10045       # All instances
10046       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10047       inst_fn = _GetNodeInstances
10048
10049     return inst_fn(self.cfg, self.op.node_name)
10050
10051   def DeclareLocks(self, level):
10052     if level == locking.LEVEL_INSTANCE:
10053       # Lock instances optimistically, needs verification once node and group
10054       # locks have been acquired
10055       self.needed_locks[locking.LEVEL_INSTANCE] = \
10056         set(i.name for i in self._DetermineInstances())
10057
10058     elif level == locking.LEVEL_NODEGROUP:
10059       # Lock node groups optimistically, needs verification once nodes have
10060       # been acquired
10061       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10062         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10063
10064     elif level == locking.LEVEL_NODE:
10065       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10066
10067   def CheckPrereq(self):
10068     # Verify locks
10069     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10070     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10071     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10072
10073     assert owned_nodes == self.lock_nodes
10074
10075     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10076     if owned_groups != wanted_groups:
10077       raise errors.OpExecError("Node groups changed since locks were acquired,"
10078                                " current groups are '%s', used to be '%s'" %
10079                                (utils.CommaJoin(wanted_groups),
10080                                 utils.CommaJoin(owned_groups)))
10081
10082     # Determine affected instances
10083     self.instances = self._DetermineInstances()
10084     self.instance_names = [i.name for i in self.instances]
10085
10086     if set(self.instance_names) != owned_instances:
10087       raise errors.OpExecError("Instances on node '%s' changed since locks"
10088                                " were acquired, current instances are '%s',"
10089                                " used to be '%s'" %
10090                                (self.op.node_name,
10091                                 utils.CommaJoin(self.instance_names),
10092                                 utils.CommaJoin(owned_instances)))
10093
10094     if self.instance_names:
10095       self.LogInfo("Evacuating instances from node '%s': %s",
10096                    self.op.node_name,
10097                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10098     else:
10099       self.LogInfo("No instances to evacuate from node '%s'",
10100                    self.op.node_name)
10101
10102     if self.op.remote_node is not None:
10103       for i in self.instances:
10104         if i.primary_node == self.op.remote_node:
10105           raise errors.OpPrereqError("Node %s is the primary node of"
10106                                      " instance %s, cannot use it as"
10107                                      " secondary" %
10108                                      (self.op.remote_node, i.name),
10109                                      errors.ECODE_INVAL)
10110
10111   def Exec(self, feedback_fn):
10112     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10113
10114     if not self.instance_names:
10115       # No instances to evacuate
10116       jobs = []
10117
10118     elif self.op.iallocator is not None:
10119       # TODO: Implement relocation to other group
10120       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10121                        evac_mode=self.op.mode,
10122                        instances=list(self.instance_names))
10123
10124       ial.Run(self.op.iallocator)
10125
10126       if not ial.success:
10127         raise errors.OpPrereqError("Can't compute node evacuation using"
10128                                    " iallocator '%s': %s" %
10129                                    (self.op.iallocator, ial.info),
10130                                    errors.ECODE_NORES)
10131
10132       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10133
10134     elif self.op.remote_node is not None:
10135       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10136       jobs = [
10137         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10138                                         remote_node=self.op.remote_node,
10139                                         disks=[],
10140                                         mode=constants.REPLACE_DISK_CHG,
10141                                         early_release=self.op.early_release)]
10142         for instance_name in self.instance_names
10143         ]
10144
10145     else:
10146       raise errors.ProgrammerError("No iallocator or remote node")
10147
10148     return ResultWithJobs(jobs)
10149
10150
10151 def _SetOpEarlyRelease(early_release, op):
10152   """Sets C{early_release} flag on opcodes if available.
10153
10154   """
10155   try:
10156     op.early_release = early_release
10157   except AttributeError:
10158     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10159
10160   return op
10161
10162
10163 def _NodeEvacDest(use_nodes, group, nodes):
10164   """Returns group or nodes depending on caller's choice.
10165
10166   """
10167   if use_nodes:
10168     return utils.CommaJoin(nodes)
10169   else:
10170     return group
10171
10172
10173 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10174   """Unpacks the result of change-group and node-evacuate iallocator requests.
10175
10176   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10177   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10178
10179   @type lu: L{LogicalUnit}
10180   @param lu: Logical unit instance
10181   @type alloc_result: tuple/list
10182   @param alloc_result: Result from iallocator
10183   @type early_release: bool
10184   @param early_release: Whether to release locks early if possible
10185   @type use_nodes: bool
10186   @param use_nodes: Whether to display node names instead of groups
10187
10188   """
10189   (moved, failed, jobs) = alloc_result
10190
10191   if failed:
10192     lu.LogWarning("Unable to evacuate instances %s",
10193                   utils.CommaJoin("%s (%s)" % (name, reason)
10194                                   for (name, reason) in failed))
10195
10196   if moved:
10197     lu.LogInfo("Instances to be moved: %s",
10198                utils.CommaJoin("%s (to %s)" %
10199                                (name, _NodeEvacDest(use_nodes, group, nodes))
10200                                for (name, group, nodes) in moved))
10201
10202   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10203               map(opcodes.OpCode.LoadOpCode, ops))
10204           for ops in jobs]
10205
10206
10207 class LUInstanceGrowDisk(LogicalUnit):
10208   """Grow a disk of an instance.
10209
10210   """
10211   HPATH = "disk-grow"
10212   HTYPE = constants.HTYPE_INSTANCE
10213   REQ_BGL = False
10214
10215   def ExpandNames(self):
10216     self._ExpandAndLockInstance()
10217     self.needed_locks[locking.LEVEL_NODE] = []
10218     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10219
10220   def DeclareLocks(self, level):
10221     if level == locking.LEVEL_NODE:
10222       self._LockInstancesNodes()
10223
10224   def BuildHooksEnv(self):
10225     """Build hooks env.
10226
10227     This runs on the master, the primary and all the secondaries.
10228
10229     """
10230     env = {
10231       "DISK": self.op.disk,
10232       "AMOUNT": self.op.amount,
10233       }
10234     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10235     return env
10236
10237   def BuildHooksNodes(self):
10238     """Build hooks nodes.
10239
10240     """
10241     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10242     return (nl, nl)
10243
10244   def CheckPrereq(self):
10245     """Check prerequisites.
10246
10247     This checks that the instance is in the cluster.
10248
10249     """
10250     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10251     assert instance is not None, \
10252       "Cannot retrieve locked instance %s" % self.op.instance_name
10253     nodenames = list(instance.all_nodes)
10254     for node in nodenames:
10255       _CheckNodeOnline(self, node)
10256
10257     self.instance = instance
10258
10259     if instance.disk_template not in constants.DTS_GROWABLE:
10260       raise errors.OpPrereqError("Instance's disk layout does not support"
10261                                  " growing", errors.ECODE_INVAL)
10262
10263     self.disk = instance.FindDisk(self.op.disk)
10264
10265     if instance.disk_template not in (constants.DT_FILE,
10266                                       constants.DT_SHARED_FILE):
10267       # TODO: check the free disk space for file, when that feature will be
10268       # supported
10269       _CheckNodesFreeDiskPerVG(self, nodenames,
10270                                self.disk.ComputeGrowth(self.op.amount))
10271
10272   def Exec(self, feedback_fn):
10273     """Execute disk grow.
10274
10275     """
10276     instance = self.instance
10277     disk = self.disk
10278
10279     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10280     if not disks_ok:
10281       raise errors.OpExecError("Cannot activate block device to grow")
10282
10283     # First run all grow ops in dry-run mode
10284     for node in instance.all_nodes:
10285       self.cfg.SetDiskID(disk, node)
10286       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10287       result.Raise("Grow request failed to node %s" % node)
10288
10289     # We know that (as far as we can test) operations across different
10290     # nodes will succeed, time to run it for real
10291     for node in instance.all_nodes:
10292       self.cfg.SetDiskID(disk, node)
10293       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10294       result.Raise("Grow request failed to node %s" % node)
10295
10296       # TODO: Rewrite code to work properly
10297       # DRBD goes into sync mode for a short amount of time after executing the
10298       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10299       # calling "resize" in sync mode fails. Sleeping for a short amount of
10300       # time is a work-around.
10301       time.sleep(5)
10302
10303     disk.RecordGrow(self.op.amount)
10304     self.cfg.Update(instance, feedback_fn)
10305     if self.op.wait_for_sync:
10306       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10307       if disk_abort:
10308         self.proc.LogWarning("Disk sync-ing has not returned a good"
10309                              " status; please check the instance")
10310       if not instance.admin_up:
10311         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10312     elif not instance.admin_up:
10313       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10314                            " not supposed to be running because no wait for"
10315                            " sync mode was requested")
10316
10317
10318 class LUInstanceQueryData(NoHooksLU):
10319   """Query runtime instance data.
10320
10321   """
10322   REQ_BGL = False
10323
10324   def ExpandNames(self):
10325     self.needed_locks = {}
10326
10327     # Use locking if requested or when non-static information is wanted
10328     if not (self.op.static or self.op.use_locking):
10329       self.LogWarning("Non-static data requested, locks need to be acquired")
10330       self.op.use_locking = True
10331
10332     if self.op.instances or not self.op.use_locking:
10333       # Expand instance names right here
10334       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10335     else:
10336       # Will use acquired locks
10337       self.wanted_names = None
10338
10339     if self.op.use_locking:
10340       self.share_locks = _ShareAll()
10341
10342       if self.wanted_names is None:
10343         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10344       else:
10345         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10346
10347       self.needed_locks[locking.LEVEL_NODE] = []
10348       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10349
10350   def DeclareLocks(self, level):
10351     if self.op.use_locking and level == locking.LEVEL_NODE:
10352       self._LockInstancesNodes()
10353
10354   def CheckPrereq(self):
10355     """Check prerequisites.
10356
10357     This only checks the optional instance list against the existing names.
10358
10359     """
10360     if self.wanted_names is None:
10361       assert self.op.use_locking, "Locking was not used"
10362       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10363
10364     self.wanted_instances = \
10365         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10366
10367   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10368     """Returns the status of a block device
10369
10370     """
10371     if self.op.static or not node:
10372       return None
10373
10374     self.cfg.SetDiskID(dev, node)
10375
10376     result = self.rpc.call_blockdev_find(node, dev)
10377     if result.offline:
10378       return None
10379
10380     result.Raise("Can't compute disk status for %s" % instance_name)
10381
10382     status = result.payload
10383     if status is None:
10384       return None
10385
10386     return (status.dev_path, status.major, status.minor,
10387             status.sync_percent, status.estimated_time,
10388             status.is_degraded, status.ldisk_status)
10389
10390   def _ComputeDiskStatus(self, instance, snode, dev):
10391     """Compute block device status.
10392
10393     """
10394     if dev.dev_type in constants.LDS_DRBD:
10395       # we change the snode then (otherwise we use the one passed in)
10396       if dev.logical_id[0] == instance.primary_node:
10397         snode = dev.logical_id[1]
10398       else:
10399         snode = dev.logical_id[0]
10400
10401     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10402                                               instance.name, dev)
10403     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10404
10405     if dev.children:
10406       dev_children = map(compat.partial(self._ComputeDiskStatus,
10407                                         instance, snode),
10408                          dev.children)
10409     else:
10410       dev_children = []
10411
10412     return {
10413       "iv_name": dev.iv_name,
10414       "dev_type": dev.dev_type,
10415       "logical_id": dev.logical_id,
10416       "physical_id": dev.physical_id,
10417       "pstatus": dev_pstatus,
10418       "sstatus": dev_sstatus,
10419       "children": dev_children,
10420       "mode": dev.mode,
10421       "size": dev.size,
10422       }
10423
10424   def Exec(self, feedback_fn):
10425     """Gather and return data"""
10426     result = {}
10427
10428     cluster = self.cfg.GetClusterInfo()
10429
10430     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10431                                           for i in self.wanted_instances)
10432     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10433       if self.op.static or pnode.offline:
10434         remote_state = None
10435         if pnode.offline:
10436           self.LogWarning("Primary node %s is marked offline, returning static"
10437                           " information only for instance %s" %
10438                           (pnode.name, instance.name))
10439       else:
10440         remote_info = self.rpc.call_instance_info(instance.primary_node,
10441                                                   instance.name,
10442                                                   instance.hypervisor)
10443         remote_info.Raise("Error checking node %s" % instance.primary_node)
10444         remote_info = remote_info.payload
10445         if remote_info and "state" in remote_info:
10446           remote_state = "up"
10447         else:
10448           remote_state = "down"
10449
10450       if instance.admin_up:
10451         config_state = "up"
10452       else:
10453         config_state = "down"
10454
10455       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10456                   instance.disks)
10457
10458       result[instance.name] = {
10459         "name": instance.name,
10460         "config_state": config_state,
10461         "run_state": remote_state,
10462         "pnode": instance.primary_node,
10463         "snodes": instance.secondary_nodes,
10464         "os": instance.os,
10465         # this happens to be the same format used for hooks
10466         "nics": _NICListToTuple(self, instance.nics),
10467         "disk_template": instance.disk_template,
10468         "disks": disks,
10469         "hypervisor": instance.hypervisor,
10470         "network_port": instance.network_port,
10471         "hv_instance": instance.hvparams,
10472         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10473         "be_instance": instance.beparams,
10474         "be_actual": cluster.FillBE(instance),
10475         "os_instance": instance.osparams,
10476         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10477         "serial_no": instance.serial_no,
10478         "mtime": instance.mtime,
10479         "ctime": instance.ctime,
10480         "uuid": instance.uuid,
10481         }
10482
10483     return result
10484
10485
10486 class LUInstanceSetParams(LogicalUnit):
10487   """Modifies an instances's parameters.
10488
10489   """
10490   HPATH = "instance-modify"
10491   HTYPE = constants.HTYPE_INSTANCE
10492   REQ_BGL = False
10493
10494   def CheckArguments(self):
10495     if not (self.op.nics or self.op.disks or self.op.disk_template or
10496             self.op.hvparams or self.op.beparams or self.op.os_name):
10497       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10498
10499     if self.op.hvparams:
10500       _CheckGlobalHvParams(self.op.hvparams)
10501
10502     # Disk validation
10503     disk_addremove = 0
10504     for disk_op, disk_dict in self.op.disks:
10505       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10506       if disk_op == constants.DDM_REMOVE:
10507         disk_addremove += 1
10508         continue
10509       elif disk_op == constants.DDM_ADD:
10510         disk_addremove += 1
10511       else:
10512         if not isinstance(disk_op, int):
10513           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10514         if not isinstance(disk_dict, dict):
10515           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10516           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10517
10518       if disk_op == constants.DDM_ADD:
10519         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10520         if mode not in constants.DISK_ACCESS_SET:
10521           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10522                                      errors.ECODE_INVAL)
10523         size = disk_dict.get(constants.IDISK_SIZE, None)
10524         if size is None:
10525           raise errors.OpPrereqError("Required disk parameter size missing",
10526                                      errors.ECODE_INVAL)
10527         try:
10528           size = int(size)
10529         except (TypeError, ValueError), err:
10530           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10531                                      str(err), errors.ECODE_INVAL)
10532         disk_dict[constants.IDISK_SIZE] = size
10533       else:
10534         # modification of disk
10535         if constants.IDISK_SIZE in disk_dict:
10536           raise errors.OpPrereqError("Disk size change not possible, use"
10537                                      " grow-disk", errors.ECODE_INVAL)
10538
10539     if disk_addremove > 1:
10540       raise errors.OpPrereqError("Only one disk add or remove operation"
10541                                  " supported at a time", errors.ECODE_INVAL)
10542
10543     if self.op.disks and self.op.disk_template is not None:
10544       raise errors.OpPrereqError("Disk template conversion and other disk"
10545                                  " changes not supported at the same time",
10546                                  errors.ECODE_INVAL)
10547
10548     if (self.op.disk_template and
10549         self.op.disk_template in constants.DTS_INT_MIRROR and
10550         self.op.remote_node is None):
10551       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10552                                  " one requires specifying a secondary node",
10553                                  errors.ECODE_INVAL)
10554
10555     # NIC validation
10556     nic_addremove = 0
10557     for nic_op, nic_dict in self.op.nics:
10558       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10559       if nic_op == constants.DDM_REMOVE:
10560         nic_addremove += 1
10561         continue
10562       elif nic_op == constants.DDM_ADD:
10563         nic_addremove += 1
10564       else:
10565         if not isinstance(nic_op, int):
10566           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10567         if not isinstance(nic_dict, dict):
10568           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10569           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10570
10571       # nic_dict should be a dict
10572       nic_ip = nic_dict.get(constants.INIC_IP, None)
10573       if nic_ip is not None:
10574         if nic_ip.lower() == constants.VALUE_NONE:
10575           nic_dict[constants.INIC_IP] = None
10576         else:
10577           if not netutils.IPAddress.IsValid(nic_ip):
10578             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10579                                        errors.ECODE_INVAL)
10580
10581       nic_bridge = nic_dict.get("bridge", None)
10582       nic_link = nic_dict.get(constants.INIC_LINK, None)
10583       if nic_bridge and nic_link:
10584         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10585                                    " at the same time", errors.ECODE_INVAL)
10586       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10587         nic_dict["bridge"] = None
10588       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10589         nic_dict[constants.INIC_LINK] = None
10590
10591       if nic_op == constants.DDM_ADD:
10592         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10593         if nic_mac is None:
10594           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10595
10596       if constants.INIC_MAC in nic_dict:
10597         nic_mac = nic_dict[constants.INIC_MAC]
10598         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10599           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10600
10601         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10602           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10603                                      " modifying an existing nic",
10604                                      errors.ECODE_INVAL)
10605
10606     if nic_addremove > 1:
10607       raise errors.OpPrereqError("Only one NIC add or remove operation"
10608                                  " supported at a time", errors.ECODE_INVAL)
10609
10610   def ExpandNames(self):
10611     self._ExpandAndLockInstance()
10612     self.needed_locks[locking.LEVEL_NODE] = []
10613     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10614
10615   def DeclareLocks(self, level):
10616     if level == locking.LEVEL_NODE:
10617       self._LockInstancesNodes()
10618       if self.op.disk_template and self.op.remote_node:
10619         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10620         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10621
10622   def BuildHooksEnv(self):
10623     """Build hooks env.
10624
10625     This runs on the master, primary and secondaries.
10626
10627     """
10628     args = dict()
10629     if constants.BE_MEMORY in self.be_new:
10630       args["memory"] = self.be_new[constants.BE_MEMORY]
10631     if constants.BE_VCPUS in self.be_new:
10632       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10633     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10634     # information at all.
10635     if self.op.nics:
10636       args["nics"] = []
10637       nic_override = dict(self.op.nics)
10638       for idx, nic in enumerate(self.instance.nics):
10639         if idx in nic_override:
10640           this_nic_override = nic_override[idx]
10641         else:
10642           this_nic_override = {}
10643         if constants.INIC_IP in this_nic_override:
10644           ip = this_nic_override[constants.INIC_IP]
10645         else:
10646           ip = nic.ip
10647         if constants.INIC_MAC in this_nic_override:
10648           mac = this_nic_override[constants.INIC_MAC]
10649         else:
10650           mac = nic.mac
10651         if idx in self.nic_pnew:
10652           nicparams = self.nic_pnew[idx]
10653         else:
10654           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10655         mode = nicparams[constants.NIC_MODE]
10656         link = nicparams[constants.NIC_LINK]
10657         args["nics"].append((ip, mac, mode, link))
10658       if constants.DDM_ADD in nic_override:
10659         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10660         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10661         nicparams = self.nic_pnew[constants.DDM_ADD]
10662         mode = nicparams[constants.NIC_MODE]
10663         link = nicparams[constants.NIC_LINK]
10664         args["nics"].append((ip, mac, mode, link))
10665       elif constants.DDM_REMOVE in nic_override:
10666         del args["nics"][-1]
10667
10668     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10669     if self.op.disk_template:
10670       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10671
10672     return env
10673
10674   def BuildHooksNodes(self):
10675     """Build hooks nodes.
10676
10677     """
10678     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10679     return (nl, nl)
10680
10681   def CheckPrereq(self):
10682     """Check prerequisites.
10683
10684     This only checks the instance list against the existing names.
10685
10686     """
10687     # checking the new params on the primary/secondary nodes
10688
10689     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10690     cluster = self.cluster = self.cfg.GetClusterInfo()
10691     assert self.instance is not None, \
10692       "Cannot retrieve locked instance %s" % self.op.instance_name
10693     pnode = instance.primary_node
10694     nodelist = list(instance.all_nodes)
10695
10696     # OS change
10697     if self.op.os_name and not self.op.force:
10698       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10699                       self.op.force_variant)
10700       instance_os = self.op.os_name
10701     else:
10702       instance_os = instance.os
10703
10704     if self.op.disk_template:
10705       if instance.disk_template == self.op.disk_template:
10706         raise errors.OpPrereqError("Instance already has disk template %s" %
10707                                    instance.disk_template, errors.ECODE_INVAL)
10708
10709       if (instance.disk_template,
10710           self.op.disk_template) not in self._DISK_CONVERSIONS:
10711         raise errors.OpPrereqError("Unsupported disk template conversion from"
10712                                    " %s to %s" % (instance.disk_template,
10713                                                   self.op.disk_template),
10714                                    errors.ECODE_INVAL)
10715       _CheckInstanceDown(self, instance, "cannot change disk template")
10716       if self.op.disk_template in constants.DTS_INT_MIRROR:
10717         if self.op.remote_node == pnode:
10718           raise errors.OpPrereqError("Given new secondary node %s is the same"
10719                                      " as the primary node of the instance" %
10720                                      self.op.remote_node, errors.ECODE_STATE)
10721         _CheckNodeOnline(self, self.op.remote_node)
10722         _CheckNodeNotDrained(self, self.op.remote_node)
10723         # FIXME: here we assume that the old instance type is DT_PLAIN
10724         assert instance.disk_template == constants.DT_PLAIN
10725         disks = [{constants.IDISK_SIZE: d.size,
10726                   constants.IDISK_VG: d.logical_id[0]}
10727                  for d in instance.disks]
10728         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10729         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10730
10731     # hvparams processing
10732     if self.op.hvparams:
10733       hv_type = instance.hypervisor
10734       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10735       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10736       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10737
10738       # local check
10739       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10740       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10741       self.hv_new = hv_new # the new actual values
10742       self.hv_inst = i_hvdict # the new dict (without defaults)
10743     else:
10744       self.hv_new = self.hv_inst = {}
10745
10746     # beparams processing
10747     if self.op.beparams:
10748       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10749                                    use_none=True)
10750       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10751       be_new = cluster.SimpleFillBE(i_bedict)
10752       self.be_new = be_new # the new actual values
10753       self.be_inst = i_bedict # the new dict (without defaults)
10754     else:
10755       self.be_new = self.be_inst = {}
10756     be_old = cluster.FillBE(instance)
10757
10758     # osparams processing
10759     if self.op.osparams:
10760       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10761       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10762       self.os_inst = i_osdict # the new dict (without defaults)
10763     else:
10764       self.os_inst = {}
10765
10766     self.warn = []
10767
10768     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10769         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10770       mem_check_list = [pnode]
10771       if be_new[constants.BE_AUTO_BALANCE]:
10772         # either we changed auto_balance to yes or it was from before
10773         mem_check_list.extend(instance.secondary_nodes)
10774       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10775                                                   instance.hypervisor)
10776       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10777                                          instance.hypervisor)
10778       pninfo = nodeinfo[pnode]
10779       msg = pninfo.fail_msg
10780       if msg:
10781         # Assume the primary node is unreachable and go ahead
10782         self.warn.append("Can't get info from primary node %s: %s" %
10783                          (pnode,  msg))
10784       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10785         self.warn.append("Node data from primary node %s doesn't contain"
10786                          " free memory information" % pnode)
10787       elif instance_info.fail_msg:
10788         self.warn.append("Can't get instance runtime information: %s" %
10789                         instance_info.fail_msg)
10790       else:
10791         if instance_info.payload:
10792           current_mem = int(instance_info.payload["memory"])
10793         else:
10794           # Assume instance not running
10795           # (there is a slight race condition here, but it's not very probable,
10796           # and we have no other way to check)
10797           current_mem = 0
10798         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10799                     pninfo.payload["memory_free"])
10800         if miss_mem > 0:
10801           raise errors.OpPrereqError("This change will prevent the instance"
10802                                      " from starting, due to %d MB of memory"
10803                                      " missing on its primary node" % miss_mem,
10804                                      errors.ECODE_NORES)
10805
10806       if be_new[constants.BE_AUTO_BALANCE]:
10807         for node, nres in nodeinfo.items():
10808           if node not in instance.secondary_nodes:
10809             continue
10810           nres.Raise("Can't get info from secondary node %s" % node,
10811                      prereq=True, ecode=errors.ECODE_STATE)
10812           if not isinstance(nres.payload.get("memory_free", None), int):
10813             raise errors.OpPrereqError("Secondary node %s didn't return free"
10814                                        " memory information" % node,
10815                                        errors.ECODE_STATE)
10816           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10817             raise errors.OpPrereqError("This change will prevent the instance"
10818                                        " from failover to its secondary node"
10819                                        " %s, due to not enough memory" % node,
10820                                        errors.ECODE_STATE)
10821
10822     # NIC processing
10823     self.nic_pnew = {}
10824     self.nic_pinst = {}
10825     for nic_op, nic_dict in self.op.nics:
10826       if nic_op == constants.DDM_REMOVE:
10827         if not instance.nics:
10828           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10829                                      errors.ECODE_INVAL)
10830         continue
10831       if nic_op != constants.DDM_ADD:
10832         # an existing nic
10833         if not instance.nics:
10834           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10835                                      " no NICs" % nic_op,
10836                                      errors.ECODE_INVAL)
10837         if nic_op < 0 or nic_op >= len(instance.nics):
10838           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10839                                      " are 0 to %d" %
10840                                      (nic_op, len(instance.nics) - 1),
10841                                      errors.ECODE_INVAL)
10842         old_nic_params = instance.nics[nic_op].nicparams
10843         old_nic_ip = instance.nics[nic_op].ip
10844       else:
10845         old_nic_params = {}
10846         old_nic_ip = None
10847
10848       update_params_dict = dict([(key, nic_dict[key])
10849                                  for key in constants.NICS_PARAMETERS
10850                                  if key in nic_dict])
10851
10852       if "bridge" in nic_dict:
10853         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10854
10855       new_nic_params = _GetUpdatedParams(old_nic_params,
10856                                          update_params_dict)
10857       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10858       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10859       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10860       self.nic_pinst[nic_op] = new_nic_params
10861       self.nic_pnew[nic_op] = new_filled_nic_params
10862       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10863
10864       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10865         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10866         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10867         if msg:
10868           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10869           if self.op.force:
10870             self.warn.append(msg)
10871           else:
10872             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10873       if new_nic_mode == constants.NIC_MODE_ROUTED:
10874         if constants.INIC_IP in nic_dict:
10875           nic_ip = nic_dict[constants.INIC_IP]
10876         else:
10877           nic_ip = old_nic_ip
10878         if nic_ip is None:
10879           raise errors.OpPrereqError("Cannot set the nic ip to None"
10880                                      " on a routed nic", errors.ECODE_INVAL)
10881       if constants.INIC_MAC in nic_dict:
10882         nic_mac = nic_dict[constants.INIC_MAC]
10883         if nic_mac is None:
10884           raise errors.OpPrereqError("Cannot set the nic mac to None",
10885                                      errors.ECODE_INVAL)
10886         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10887           # otherwise generate the mac
10888           nic_dict[constants.INIC_MAC] = \
10889             self.cfg.GenerateMAC(self.proc.GetECId())
10890         else:
10891           # or validate/reserve the current one
10892           try:
10893             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10894           except errors.ReservationError:
10895             raise errors.OpPrereqError("MAC address %s already in use"
10896                                        " in cluster" % nic_mac,
10897                                        errors.ECODE_NOTUNIQUE)
10898
10899     # DISK processing
10900     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10901       raise errors.OpPrereqError("Disk operations not supported for"
10902                                  " diskless instances",
10903                                  errors.ECODE_INVAL)
10904     for disk_op, _ in self.op.disks:
10905       if disk_op == constants.DDM_REMOVE:
10906         if len(instance.disks) == 1:
10907           raise errors.OpPrereqError("Cannot remove the last disk of"
10908                                      " an instance", errors.ECODE_INVAL)
10909         _CheckInstanceDown(self, instance, "cannot remove disks")
10910
10911       if (disk_op == constants.DDM_ADD and
10912           len(instance.disks) >= constants.MAX_DISKS):
10913         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10914                                    " add more" % constants.MAX_DISKS,
10915                                    errors.ECODE_STATE)
10916       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10917         # an existing disk
10918         if disk_op < 0 or disk_op >= len(instance.disks):
10919           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10920                                      " are 0 to %d" %
10921                                      (disk_op, len(instance.disks)),
10922                                      errors.ECODE_INVAL)
10923
10924     return
10925
10926   def _ConvertPlainToDrbd(self, feedback_fn):
10927     """Converts an instance from plain to drbd.
10928
10929     """
10930     feedback_fn("Converting template to drbd")
10931     instance = self.instance
10932     pnode = instance.primary_node
10933     snode = self.op.remote_node
10934
10935     # create a fake disk info for _GenerateDiskTemplate
10936     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10937                   constants.IDISK_VG: d.logical_id[0]}
10938                  for d in instance.disks]
10939     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10940                                       instance.name, pnode, [snode],
10941                                       disk_info, None, None, 0, feedback_fn)
10942     info = _GetInstanceInfoText(instance)
10943     feedback_fn("Creating aditional volumes...")
10944     # first, create the missing data and meta devices
10945     for disk in new_disks:
10946       # unfortunately this is... not too nice
10947       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10948                             info, True)
10949       for child in disk.children:
10950         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10951     # at this stage, all new LVs have been created, we can rename the
10952     # old ones
10953     feedback_fn("Renaming original volumes...")
10954     rename_list = [(o, n.children[0].logical_id)
10955                    for (o, n) in zip(instance.disks, new_disks)]
10956     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10957     result.Raise("Failed to rename original LVs")
10958
10959     feedback_fn("Initializing DRBD devices...")
10960     # all child devices are in place, we can now create the DRBD devices
10961     for disk in new_disks:
10962       for node in [pnode, snode]:
10963         f_create = node == pnode
10964         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10965
10966     # at this point, the instance has been modified
10967     instance.disk_template = constants.DT_DRBD8
10968     instance.disks = new_disks
10969     self.cfg.Update(instance, feedback_fn)
10970
10971     # disks are created, waiting for sync
10972     disk_abort = not _WaitForSync(self, instance,
10973                                   oneshot=not self.op.wait_for_sync)
10974     if disk_abort:
10975       raise errors.OpExecError("There are some degraded disks for"
10976                                " this instance, please cleanup manually")
10977
10978   def _ConvertDrbdToPlain(self, feedback_fn):
10979     """Converts an instance from drbd to plain.
10980
10981     """
10982     instance = self.instance
10983     assert len(instance.secondary_nodes) == 1
10984     pnode = instance.primary_node
10985     snode = instance.secondary_nodes[0]
10986     feedback_fn("Converting template to plain")
10987
10988     old_disks = instance.disks
10989     new_disks = [d.children[0] for d in old_disks]
10990
10991     # copy over size and mode
10992     for parent, child in zip(old_disks, new_disks):
10993       child.size = parent.size
10994       child.mode = parent.mode
10995
10996     # update instance structure
10997     instance.disks = new_disks
10998     instance.disk_template = constants.DT_PLAIN
10999     self.cfg.Update(instance, feedback_fn)
11000
11001     feedback_fn("Removing volumes on the secondary node...")
11002     for disk in old_disks:
11003       self.cfg.SetDiskID(disk, snode)
11004       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11005       if msg:
11006         self.LogWarning("Could not remove block device %s on node %s,"
11007                         " continuing anyway: %s", disk.iv_name, snode, msg)
11008
11009     feedback_fn("Removing unneeded volumes on the primary node...")
11010     for idx, disk in enumerate(old_disks):
11011       meta = disk.children[1]
11012       self.cfg.SetDiskID(meta, pnode)
11013       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11014       if msg:
11015         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11016                         " continuing anyway: %s", idx, pnode, msg)
11017
11018   def Exec(self, feedback_fn):
11019     """Modifies an instance.
11020
11021     All parameters take effect only at the next restart of the instance.
11022
11023     """
11024     # Process here the warnings from CheckPrereq, as we don't have a
11025     # feedback_fn there.
11026     for warn in self.warn:
11027       feedback_fn("WARNING: %s" % warn)
11028
11029     result = []
11030     instance = self.instance
11031     # disk changes
11032     for disk_op, disk_dict in self.op.disks:
11033       if disk_op == constants.DDM_REMOVE:
11034         # remove the last disk
11035         device = instance.disks.pop()
11036         device_idx = len(instance.disks)
11037         for node, disk in device.ComputeNodeTree(instance.primary_node):
11038           self.cfg.SetDiskID(disk, node)
11039           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11040           if msg:
11041             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11042                             " continuing anyway", device_idx, node, msg)
11043         result.append(("disk/%d" % device_idx, "remove"))
11044       elif disk_op == constants.DDM_ADD:
11045         # add a new disk
11046         if instance.disk_template in (constants.DT_FILE,
11047                                         constants.DT_SHARED_FILE):
11048           file_driver, file_path = instance.disks[0].logical_id
11049           file_path = os.path.dirname(file_path)
11050         else:
11051           file_driver = file_path = None
11052         disk_idx_base = len(instance.disks)
11053         new_disk = _GenerateDiskTemplate(self,
11054                                          instance.disk_template,
11055                                          instance.name, instance.primary_node,
11056                                          instance.secondary_nodes,
11057                                          [disk_dict],
11058                                          file_path,
11059                                          file_driver,
11060                                          disk_idx_base, feedback_fn)[0]
11061         instance.disks.append(new_disk)
11062         info = _GetInstanceInfoText(instance)
11063
11064         logging.info("Creating volume %s for instance %s",
11065                      new_disk.iv_name, instance.name)
11066         # Note: this needs to be kept in sync with _CreateDisks
11067         #HARDCODE
11068         for node in instance.all_nodes:
11069           f_create = node == instance.primary_node
11070           try:
11071             _CreateBlockDev(self, node, instance, new_disk,
11072                             f_create, info, f_create)
11073           except errors.OpExecError, err:
11074             self.LogWarning("Failed to create volume %s (%s) on"
11075                             " node %s: %s",
11076                             new_disk.iv_name, new_disk, node, err)
11077         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11078                        (new_disk.size, new_disk.mode)))
11079       else:
11080         # change a given disk
11081         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11082         result.append(("disk.mode/%d" % disk_op,
11083                        disk_dict[constants.IDISK_MODE]))
11084
11085     if self.op.disk_template:
11086       r_shut = _ShutdownInstanceDisks(self, instance)
11087       if not r_shut:
11088         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11089                                  " proceed with disk template conversion")
11090       mode = (instance.disk_template, self.op.disk_template)
11091       try:
11092         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11093       except:
11094         self.cfg.ReleaseDRBDMinors(instance.name)
11095         raise
11096       result.append(("disk_template", self.op.disk_template))
11097
11098     # NIC changes
11099     for nic_op, nic_dict in self.op.nics:
11100       if nic_op == constants.DDM_REMOVE:
11101         # remove the last nic
11102         del instance.nics[-1]
11103         result.append(("nic.%d" % len(instance.nics), "remove"))
11104       elif nic_op == constants.DDM_ADD:
11105         # mac and bridge should be set, by now
11106         mac = nic_dict[constants.INIC_MAC]
11107         ip = nic_dict.get(constants.INIC_IP, None)
11108         nicparams = self.nic_pinst[constants.DDM_ADD]
11109         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11110         instance.nics.append(new_nic)
11111         result.append(("nic.%d" % (len(instance.nics) - 1),
11112                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11113                        (new_nic.mac, new_nic.ip,
11114                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11115                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11116                        )))
11117       else:
11118         for key in (constants.INIC_MAC, constants.INIC_IP):
11119           if key in nic_dict:
11120             setattr(instance.nics[nic_op], key, nic_dict[key])
11121         if nic_op in self.nic_pinst:
11122           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11123         for key, val in nic_dict.iteritems():
11124           result.append(("nic.%s/%d" % (key, nic_op), val))
11125
11126     # hvparams changes
11127     if self.op.hvparams:
11128       instance.hvparams = self.hv_inst
11129       for key, val in self.op.hvparams.iteritems():
11130         result.append(("hv/%s" % key, val))
11131
11132     # beparams changes
11133     if self.op.beparams:
11134       instance.beparams = self.be_inst
11135       for key, val in self.op.beparams.iteritems():
11136         result.append(("be/%s" % key, val))
11137
11138     # OS change
11139     if self.op.os_name:
11140       instance.os = self.op.os_name
11141
11142     # osparams changes
11143     if self.op.osparams:
11144       instance.osparams = self.os_inst
11145       for key, val in self.op.osparams.iteritems():
11146         result.append(("os/%s" % key, val))
11147
11148     self.cfg.Update(instance, feedback_fn)
11149
11150     return result
11151
11152   _DISK_CONVERSIONS = {
11153     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11154     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11155     }
11156
11157
11158 class LUInstanceChangeGroup(LogicalUnit):
11159   HPATH = "instance-change-group"
11160   HTYPE = constants.HTYPE_INSTANCE
11161   REQ_BGL = False
11162
11163   def ExpandNames(self):
11164     self.share_locks = _ShareAll()
11165     self.needed_locks = {
11166       locking.LEVEL_NODEGROUP: [],
11167       locking.LEVEL_NODE: [],
11168       }
11169
11170     self._ExpandAndLockInstance()
11171
11172     if self.op.target_groups:
11173       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11174                                   self.op.target_groups)
11175     else:
11176       self.req_target_uuids = None
11177
11178     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11179
11180   def DeclareLocks(self, level):
11181     if level == locking.LEVEL_NODEGROUP:
11182       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11183
11184       if self.req_target_uuids:
11185         lock_groups = set(self.req_target_uuids)
11186
11187         # Lock all groups used by instance optimistically; this requires going
11188         # via the node before it's locked, requiring verification later on
11189         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11190         lock_groups.update(instance_groups)
11191       else:
11192         # No target groups, need to lock all of them
11193         lock_groups = locking.ALL_SET
11194
11195       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11196
11197     elif level == locking.LEVEL_NODE:
11198       if self.req_target_uuids:
11199         # Lock all nodes used by instances
11200         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11201         self._LockInstancesNodes()
11202
11203         # Lock all nodes in all potential target groups
11204         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11205                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11206         member_nodes = [node_name
11207                         for group in lock_groups
11208                         for node_name in self.cfg.GetNodeGroup(group).members]
11209         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11210       else:
11211         # Lock all nodes as all groups are potential targets
11212         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11213
11214   def CheckPrereq(self):
11215     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11216     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11217     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11218
11219     assert (self.req_target_uuids is None or
11220             owned_groups.issuperset(self.req_target_uuids))
11221     assert owned_instances == set([self.op.instance_name])
11222
11223     # Get instance information
11224     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11225
11226     # Check if node groups for locked instance are still correct
11227     assert owned_nodes.issuperset(self.instance.all_nodes), \
11228       ("Instance %s's nodes changed while we kept the lock" %
11229        self.op.instance_name)
11230
11231     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11232                                            owned_groups)
11233
11234     if self.req_target_uuids:
11235       # User requested specific target groups
11236       self.target_uuids = self.req_target_uuids
11237     else:
11238       # All groups except those used by the instance are potential targets
11239       self.target_uuids = owned_groups - inst_groups
11240
11241     conflicting_groups = self.target_uuids & inst_groups
11242     if conflicting_groups:
11243       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11244                                  " used by the instance '%s'" %
11245                                  (utils.CommaJoin(conflicting_groups),
11246                                   self.op.instance_name),
11247                                  errors.ECODE_INVAL)
11248
11249     if not self.target_uuids:
11250       raise errors.OpPrereqError("There are no possible target groups",
11251                                  errors.ECODE_INVAL)
11252
11253   def BuildHooksEnv(self):
11254     """Build hooks env.
11255
11256     """
11257     assert self.target_uuids
11258
11259     env = {
11260       "TARGET_GROUPS": " ".join(self.target_uuids),
11261       }
11262
11263     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11264
11265     return env
11266
11267   def BuildHooksNodes(self):
11268     """Build hooks nodes.
11269
11270     """
11271     mn = self.cfg.GetMasterNode()
11272     return ([mn], [mn])
11273
11274   def Exec(self, feedback_fn):
11275     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11276
11277     assert instances == [self.op.instance_name], "Instance not locked"
11278
11279     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11280                      instances=instances, target_groups=list(self.target_uuids))
11281
11282     ial.Run(self.op.iallocator)
11283
11284     if not ial.success:
11285       raise errors.OpPrereqError("Can't compute solution for changing group of"
11286                                  " instance '%s' using iallocator '%s': %s" %
11287                                  (self.op.instance_name, self.op.iallocator,
11288                                   ial.info),
11289                                  errors.ECODE_NORES)
11290
11291     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11292
11293     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11294                  " instance '%s'", len(jobs), self.op.instance_name)
11295
11296     return ResultWithJobs(jobs)
11297
11298
11299 class LUBackupQuery(NoHooksLU):
11300   """Query the exports list
11301
11302   """
11303   REQ_BGL = False
11304
11305   def ExpandNames(self):
11306     self.needed_locks = {}
11307     self.share_locks[locking.LEVEL_NODE] = 1
11308     if not self.op.nodes:
11309       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11310     else:
11311       self.needed_locks[locking.LEVEL_NODE] = \
11312         _GetWantedNodes(self, self.op.nodes)
11313
11314   def Exec(self, feedback_fn):
11315     """Compute the list of all the exported system images.
11316
11317     @rtype: dict
11318     @return: a dictionary with the structure node->(export-list)
11319         where export-list is a list of the instances exported on
11320         that node.
11321
11322     """
11323     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11324     rpcresult = self.rpc.call_export_list(self.nodes)
11325     result = {}
11326     for node in rpcresult:
11327       if rpcresult[node].fail_msg:
11328         result[node] = False
11329       else:
11330         result[node] = rpcresult[node].payload
11331
11332     return result
11333
11334
11335 class LUBackupPrepare(NoHooksLU):
11336   """Prepares an instance for an export and returns useful information.
11337
11338   """
11339   REQ_BGL = False
11340
11341   def ExpandNames(self):
11342     self._ExpandAndLockInstance()
11343
11344   def CheckPrereq(self):
11345     """Check prerequisites.
11346
11347     """
11348     instance_name = self.op.instance_name
11349
11350     self.instance = self.cfg.GetInstanceInfo(instance_name)
11351     assert self.instance is not None, \
11352           "Cannot retrieve locked instance %s" % self.op.instance_name
11353     _CheckNodeOnline(self, self.instance.primary_node)
11354
11355     self._cds = _GetClusterDomainSecret()
11356
11357   def Exec(self, feedback_fn):
11358     """Prepares an instance for an export.
11359
11360     """
11361     instance = self.instance
11362
11363     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11364       salt = utils.GenerateSecret(8)
11365
11366       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11367       result = self.rpc.call_x509_cert_create(instance.primary_node,
11368                                               constants.RIE_CERT_VALIDITY)
11369       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11370
11371       (name, cert_pem) = result.payload
11372
11373       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11374                                              cert_pem)
11375
11376       return {
11377         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11378         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11379                           salt),
11380         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11381         }
11382
11383     return None
11384
11385
11386 class LUBackupExport(LogicalUnit):
11387   """Export an instance to an image in the cluster.
11388
11389   """
11390   HPATH = "instance-export"
11391   HTYPE = constants.HTYPE_INSTANCE
11392   REQ_BGL = False
11393
11394   def CheckArguments(self):
11395     """Check the arguments.
11396
11397     """
11398     self.x509_key_name = self.op.x509_key_name
11399     self.dest_x509_ca_pem = self.op.destination_x509_ca
11400
11401     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11402       if not self.x509_key_name:
11403         raise errors.OpPrereqError("Missing X509 key name for encryption",
11404                                    errors.ECODE_INVAL)
11405
11406       if not self.dest_x509_ca_pem:
11407         raise errors.OpPrereqError("Missing destination X509 CA",
11408                                    errors.ECODE_INVAL)
11409
11410   def ExpandNames(self):
11411     self._ExpandAndLockInstance()
11412
11413     # Lock all nodes for local exports
11414     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11415       # FIXME: lock only instance primary and destination node
11416       #
11417       # Sad but true, for now we have do lock all nodes, as we don't know where
11418       # the previous export might be, and in this LU we search for it and
11419       # remove it from its current node. In the future we could fix this by:
11420       #  - making a tasklet to search (share-lock all), then create the
11421       #    new one, then one to remove, after
11422       #  - removing the removal operation altogether
11423       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11424
11425   def DeclareLocks(self, level):
11426     """Last minute lock declaration."""
11427     # All nodes are locked anyway, so nothing to do here.
11428
11429   def BuildHooksEnv(self):
11430     """Build hooks env.
11431
11432     This will run on the master, primary node and target node.
11433
11434     """
11435     env = {
11436       "EXPORT_MODE": self.op.mode,
11437       "EXPORT_NODE": self.op.target_node,
11438       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11439       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11440       # TODO: Generic function for boolean env variables
11441       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11442       }
11443
11444     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11445
11446     return env
11447
11448   def BuildHooksNodes(self):
11449     """Build hooks nodes.
11450
11451     """
11452     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11453
11454     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11455       nl.append(self.op.target_node)
11456
11457     return (nl, nl)
11458
11459   def CheckPrereq(self):
11460     """Check prerequisites.
11461
11462     This checks that the instance and node names are valid.
11463
11464     """
11465     instance_name = self.op.instance_name
11466
11467     self.instance = self.cfg.GetInstanceInfo(instance_name)
11468     assert self.instance is not None, \
11469           "Cannot retrieve locked instance %s" % self.op.instance_name
11470     _CheckNodeOnline(self, self.instance.primary_node)
11471
11472     if (self.op.remove_instance and self.instance.admin_up and
11473         not self.op.shutdown):
11474       raise errors.OpPrereqError("Can not remove instance without shutting it"
11475                                  " down before")
11476
11477     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11478       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11479       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11480       assert self.dst_node is not None
11481
11482       _CheckNodeOnline(self, self.dst_node.name)
11483       _CheckNodeNotDrained(self, self.dst_node.name)
11484
11485       self._cds = None
11486       self.dest_disk_info = None
11487       self.dest_x509_ca = None
11488
11489     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11490       self.dst_node = None
11491
11492       if len(self.op.target_node) != len(self.instance.disks):
11493         raise errors.OpPrereqError(("Received destination information for %s"
11494                                     " disks, but instance %s has %s disks") %
11495                                    (len(self.op.target_node), instance_name,
11496                                     len(self.instance.disks)),
11497                                    errors.ECODE_INVAL)
11498
11499       cds = _GetClusterDomainSecret()
11500
11501       # Check X509 key name
11502       try:
11503         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11504       except (TypeError, ValueError), err:
11505         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11506
11507       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11508         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11509                                    errors.ECODE_INVAL)
11510
11511       # Load and verify CA
11512       try:
11513         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11514       except OpenSSL.crypto.Error, err:
11515         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11516                                    (err, ), errors.ECODE_INVAL)
11517
11518       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11519       if errcode is not None:
11520         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11521                                    (msg, ), errors.ECODE_INVAL)
11522
11523       self.dest_x509_ca = cert
11524
11525       # Verify target information
11526       disk_info = []
11527       for idx, disk_data in enumerate(self.op.target_node):
11528         try:
11529           (host, port, magic) = \
11530             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11531         except errors.GenericError, err:
11532           raise errors.OpPrereqError("Target info for disk %s: %s" %
11533                                      (idx, err), errors.ECODE_INVAL)
11534
11535         disk_info.append((host, port, magic))
11536
11537       assert len(disk_info) == len(self.op.target_node)
11538       self.dest_disk_info = disk_info
11539
11540     else:
11541       raise errors.ProgrammerError("Unhandled export mode %r" %
11542                                    self.op.mode)
11543
11544     # instance disk type verification
11545     # TODO: Implement export support for file-based disks
11546     for disk in self.instance.disks:
11547       if disk.dev_type == constants.LD_FILE:
11548         raise errors.OpPrereqError("Export not supported for instances with"
11549                                    " file-based disks", errors.ECODE_INVAL)
11550
11551   def _CleanupExports(self, feedback_fn):
11552     """Removes exports of current instance from all other nodes.
11553
11554     If an instance in a cluster with nodes A..D was exported to node C, its
11555     exports will be removed from the nodes A, B and D.
11556
11557     """
11558     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11559
11560     nodelist = self.cfg.GetNodeList()
11561     nodelist.remove(self.dst_node.name)
11562
11563     # on one-node clusters nodelist will be empty after the removal
11564     # if we proceed the backup would be removed because OpBackupQuery
11565     # substitutes an empty list with the full cluster node list.
11566     iname = self.instance.name
11567     if nodelist:
11568       feedback_fn("Removing old exports for instance %s" % iname)
11569       exportlist = self.rpc.call_export_list(nodelist)
11570       for node in exportlist:
11571         if exportlist[node].fail_msg:
11572           continue
11573         if iname in exportlist[node].payload:
11574           msg = self.rpc.call_export_remove(node, iname).fail_msg
11575           if msg:
11576             self.LogWarning("Could not remove older export for instance %s"
11577                             " on node %s: %s", iname, node, msg)
11578
11579   def Exec(self, feedback_fn):
11580     """Export an instance to an image in the cluster.
11581
11582     """
11583     assert self.op.mode in constants.EXPORT_MODES
11584
11585     instance = self.instance
11586     src_node = instance.primary_node
11587
11588     if self.op.shutdown:
11589       # shutdown the instance, but not the disks
11590       feedback_fn("Shutting down instance %s" % instance.name)
11591       result = self.rpc.call_instance_shutdown(src_node, instance,
11592                                                self.op.shutdown_timeout)
11593       # TODO: Maybe ignore failures if ignore_remove_failures is set
11594       result.Raise("Could not shutdown instance %s on"
11595                    " node %s" % (instance.name, src_node))
11596
11597     # set the disks ID correctly since call_instance_start needs the
11598     # correct drbd minor to create the symlinks
11599     for disk in instance.disks:
11600       self.cfg.SetDiskID(disk, src_node)
11601
11602     activate_disks = (not instance.admin_up)
11603
11604     if activate_disks:
11605       # Activate the instance disks if we'exporting a stopped instance
11606       feedback_fn("Activating disks for %s" % instance.name)
11607       _StartInstanceDisks(self, instance, None)
11608
11609     try:
11610       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11611                                                      instance)
11612
11613       helper.CreateSnapshots()
11614       try:
11615         if (self.op.shutdown and instance.admin_up and
11616             not self.op.remove_instance):
11617           assert not activate_disks
11618           feedback_fn("Starting instance %s" % instance.name)
11619           result = self.rpc.call_instance_start(src_node, instance,
11620                                                 None, None, False)
11621           msg = result.fail_msg
11622           if msg:
11623             feedback_fn("Failed to start instance: %s" % msg)
11624             _ShutdownInstanceDisks(self, instance)
11625             raise errors.OpExecError("Could not start instance: %s" % msg)
11626
11627         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11628           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11629         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11630           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11631           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11632
11633           (key_name, _, _) = self.x509_key_name
11634
11635           dest_ca_pem = \
11636             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11637                                             self.dest_x509_ca)
11638
11639           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11640                                                      key_name, dest_ca_pem,
11641                                                      timeouts)
11642       finally:
11643         helper.Cleanup()
11644
11645       # Check for backwards compatibility
11646       assert len(dresults) == len(instance.disks)
11647       assert compat.all(isinstance(i, bool) for i in dresults), \
11648              "Not all results are boolean: %r" % dresults
11649
11650     finally:
11651       if activate_disks:
11652         feedback_fn("Deactivating disks for %s" % instance.name)
11653         _ShutdownInstanceDisks(self, instance)
11654
11655     if not (compat.all(dresults) and fin_resu):
11656       failures = []
11657       if not fin_resu:
11658         failures.append("export finalization")
11659       if not compat.all(dresults):
11660         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11661                                if not dsk)
11662         failures.append("disk export: disk(s) %s" % fdsk)
11663
11664       raise errors.OpExecError("Export failed, errors in %s" %
11665                                utils.CommaJoin(failures))
11666
11667     # At this point, the export was successful, we can cleanup/finish
11668
11669     # Remove instance if requested
11670     if self.op.remove_instance:
11671       feedback_fn("Removing instance %s" % instance.name)
11672       _RemoveInstance(self, feedback_fn, instance,
11673                       self.op.ignore_remove_failures)
11674
11675     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11676       self._CleanupExports(feedback_fn)
11677
11678     return fin_resu, dresults
11679
11680
11681 class LUBackupRemove(NoHooksLU):
11682   """Remove exports related to the named instance.
11683
11684   """
11685   REQ_BGL = False
11686
11687   def ExpandNames(self):
11688     self.needed_locks = {}
11689     # We need all nodes to be locked in order for RemoveExport to work, but we
11690     # don't need to lock the instance itself, as nothing will happen to it (and
11691     # we can remove exports also for a removed instance)
11692     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11693
11694   def Exec(self, feedback_fn):
11695     """Remove any export.
11696
11697     """
11698     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11699     # If the instance was not found we'll try with the name that was passed in.
11700     # This will only work if it was an FQDN, though.
11701     fqdn_warn = False
11702     if not instance_name:
11703       fqdn_warn = True
11704       instance_name = self.op.instance_name
11705
11706     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11707     exportlist = self.rpc.call_export_list(locked_nodes)
11708     found = False
11709     for node in exportlist:
11710       msg = exportlist[node].fail_msg
11711       if msg:
11712         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11713         continue
11714       if instance_name in exportlist[node].payload:
11715         found = True
11716         result = self.rpc.call_export_remove(node, instance_name)
11717         msg = result.fail_msg
11718         if msg:
11719           logging.error("Could not remove export for instance %s"
11720                         " on node %s: %s", instance_name, node, msg)
11721
11722     if fqdn_warn and not found:
11723       feedback_fn("Export not found. If trying to remove an export belonging"
11724                   " to a deleted instance please use its Fully Qualified"
11725                   " Domain Name.")
11726
11727
11728 class LUGroupAdd(LogicalUnit):
11729   """Logical unit for creating node groups.
11730
11731   """
11732   HPATH = "group-add"
11733   HTYPE = constants.HTYPE_GROUP
11734   REQ_BGL = False
11735
11736   def ExpandNames(self):
11737     # We need the new group's UUID here so that we can create and acquire the
11738     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11739     # that it should not check whether the UUID exists in the configuration.
11740     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11741     self.needed_locks = {}
11742     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11743
11744   def CheckPrereq(self):
11745     """Check prerequisites.
11746
11747     This checks that the given group name is not an existing node group
11748     already.
11749
11750     """
11751     try:
11752       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11753     except errors.OpPrereqError:
11754       pass
11755     else:
11756       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11757                                  " node group (UUID: %s)" %
11758                                  (self.op.group_name, existing_uuid),
11759                                  errors.ECODE_EXISTS)
11760
11761     if self.op.ndparams:
11762       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11763
11764   def BuildHooksEnv(self):
11765     """Build hooks env.
11766
11767     """
11768     return {
11769       "GROUP_NAME": self.op.group_name,
11770       }
11771
11772   def BuildHooksNodes(self):
11773     """Build hooks nodes.
11774
11775     """
11776     mn = self.cfg.GetMasterNode()
11777     return ([mn], [mn])
11778
11779   def Exec(self, feedback_fn):
11780     """Add the node group to the cluster.
11781
11782     """
11783     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11784                                   uuid=self.group_uuid,
11785                                   alloc_policy=self.op.alloc_policy,
11786                                   ndparams=self.op.ndparams)
11787
11788     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11789     del self.remove_locks[locking.LEVEL_NODEGROUP]
11790
11791
11792 class LUGroupAssignNodes(NoHooksLU):
11793   """Logical unit for assigning nodes to groups.
11794
11795   """
11796   REQ_BGL = False
11797
11798   def ExpandNames(self):
11799     # These raise errors.OpPrereqError on their own:
11800     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11801     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11802
11803     # We want to lock all the affected nodes and groups. We have readily
11804     # available the list of nodes, and the *destination* group. To gather the
11805     # list of "source" groups, we need to fetch node information later on.
11806     self.needed_locks = {
11807       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11808       locking.LEVEL_NODE: self.op.nodes,
11809       }
11810
11811   def DeclareLocks(self, level):
11812     if level == locking.LEVEL_NODEGROUP:
11813       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11814
11815       # Try to get all affected nodes' groups without having the group or node
11816       # lock yet. Needs verification later in the code flow.
11817       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11818
11819       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11820
11821   def CheckPrereq(self):
11822     """Check prerequisites.
11823
11824     """
11825     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11826     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11827             frozenset(self.op.nodes))
11828
11829     expected_locks = (set([self.group_uuid]) |
11830                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11831     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11832     if actual_locks != expected_locks:
11833       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11834                                " current groups are '%s', used to be '%s'" %
11835                                (utils.CommaJoin(expected_locks),
11836                                 utils.CommaJoin(actual_locks)))
11837
11838     self.node_data = self.cfg.GetAllNodesInfo()
11839     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11840     instance_data = self.cfg.GetAllInstancesInfo()
11841
11842     if self.group is None:
11843       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11844                                (self.op.group_name, self.group_uuid))
11845
11846     (new_splits, previous_splits) = \
11847       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11848                                              for node in self.op.nodes],
11849                                             self.node_data, instance_data)
11850
11851     if new_splits:
11852       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11853
11854       if not self.op.force:
11855         raise errors.OpExecError("The following instances get split by this"
11856                                  " change and --force was not given: %s" %
11857                                  fmt_new_splits)
11858       else:
11859         self.LogWarning("This operation will split the following instances: %s",
11860                         fmt_new_splits)
11861
11862         if previous_splits:
11863           self.LogWarning("In addition, these already-split instances continue"
11864                           " to be split across groups: %s",
11865                           utils.CommaJoin(utils.NiceSort(previous_splits)))
11866
11867   def Exec(self, feedback_fn):
11868     """Assign nodes to a new group.
11869
11870     """
11871     for node in self.op.nodes:
11872       self.node_data[node].group = self.group_uuid
11873
11874     # FIXME: Depends on side-effects of modifying the result of
11875     # C{cfg.GetAllNodesInfo}
11876
11877     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11878
11879   @staticmethod
11880   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11881     """Check for split instances after a node assignment.
11882
11883     This method considers a series of node assignments as an atomic operation,
11884     and returns information about split instances after applying the set of
11885     changes.
11886
11887     In particular, it returns information about newly split instances, and
11888     instances that were already split, and remain so after the change.
11889
11890     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11891     considered.
11892
11893     @type changes: list of (node_name, new_group_uuid) pairs.
11894     @param changes: list of node assignments to consider.
11895     @param node_data: a dict with data for all nodes
11896     @param instance_data: a dict with all instances to consider
11897     @rtype: a two-tuple
11898     @return: a list of instances that were previously okay and result split as a
11899       consequence of this change, and a list of instances that were previously
11900       split and this change does not fix.
11901
11902     """
11903     changed_nodes = dict((node, group) for node, group in changes
11904                          if node_data[node].group != group)
11905
11906     all_split_instances = set()
11907     previously_split_instances = set()
11908
11909     def InstanceNodes(instance):
11910       return [instance.primary_node] + list(instance.secondary_nodes)
11911
11912     for inst in instance_data.values():
11913       if inst.disk_template not in constants.DTS_INT_MIRROR:
11914         continue
11915
11916       instance_nodes = InstanceNodes(inst)
11917
11918       if len(set(node_data[node].group for node in instance_nodes)) > 1:
11919         previously_split_instances.add(inst.name)
11920
11921       if len(set(changed_nodes.get(node, node_data[node].group)
11922                  for node in instance_nodes)) > 1:
11923         all_split_instances.add(inst.name)
11924
11925     return (list(all_split_instances - previously_split_instances),
11926             list(previously_split_instances & all_split_instances))
11927
11928
11929 class _GroupQuery(_QueryBase):
11930   FIELDS = query.GROUP_FIELDS
11931
11932   def ExpandNames(self, lu):
11933     lu.needed_locks = {}
11934
11935     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11936     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11937
11938     if not self.names:
11939       self.wanted = [name_to_uuid[name]
11940                      for name in utils.NiceSort(name_to_uuid.keys())]
11941     else:
11942       # Accept names to be either names or UUIDs.
11943       missing = []
11944       self.wanted = []
11945       all_uuid = frozenset(self._all_groups.keys())
11946
11947       for name in self.names:
11948         if name in all_uuid:
11949           self.wanted.append(name)
11950         elif name in name_to_uuid:
11951           self.wanted.append(name_to_uuid[name])
11952         else:
11953           missing.append(name)
11954
11955       if missing:
11956         raise errors.OpPrereqError("Some groups do not exist: %s" %
11957                                    utils.CommaJoin(missing),
11958                                    errors.ECODE_NOENT)
11959
11960   def DeclareLocks(self, lu, level):
11961     pass
11962
11963   def _GetQueryData(self, lu):
11964     """Computes the list of node groups and their attributes.
11965
11966     """
11967     do_nodes = query.GQ_NODE in self.requested_data
11968     do_instances = query.GQ_INST in self.requested_data
11969
11970     group_to_nodes = None
11971     group_to_instances = None
11972
11973     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11974     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11975     # latter GetAllInstancesInfo() is not enough, for we have to go through
11976     # instance->node. Hence, we will need to process nodes even if we only need
11977     # instance information.
11978     if do_nodes or do_instances:
11979       all_nodes = lu.cfg.GetAllNodesInfo()
11980       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11981       node_to_group = {}
11982
11983       for node in all_nodes.values():
11984         if node.group in group_to_nodes:
11985           group_to_nodes[node.group].append(node.name)
11986           node_to_group[node.name] = node.group
11987
11988       if do_instances:
11989         all_instances = lu.cfg.GetAllInstancesInfo()
11990         group_to_instances = dict((uuid, []) for uuid in self.wanted)
11991
11992         for instance in all_instances.values():
11993           node = instance.primary_node
11994           if node in node_to_group:
11995             group_to_instances[node_to_group[node]].append(instance.name)
11996
11997         if not do_nodes:
11998           # Do not pass on node information if it was not requested.
11999           group_to_nodes = None
12000
12001     return query.GroupQueryData([self._all_groups[uuid]
12002                                  for uuid in self.wanted],
12003                                 group_to_nodes, group_to_instances)
12004
12005
12006 class LUGroupQuery(NoHooksLU):
12007   """Logical unit for querying node groups.
12008
12009   """
12010   REQ_BGL = False
12011
12012   def CheckArguments(self):
12013     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12014                           self.op.output_fields, False)
12015
12016   def ExpandNames(self):
12017     self.gq.ExpandNames(self)
12018
12019   def Exec(self, feedback_fn):
12020     return self.gq.OldStyleQuery(self)
12021
12022
12023 class LUGroupSetParams(LogicalUnit):
12024   """Modifies the parameters of a node group.
12025
12026   """
12027   HPATH = "group-modify"
12028   HTYPE = constants.HTYPE_GROUP
12029   REQ_BGL = False
12030
12031   def CheckArguments(self):
12032     all_changes = [
12033       self.op.ndparams,
12034       self.op.alloc_policy,
12035       ]
12036
12037     if all_changes.count(None) == len(all_changes):
12038       raise errors.OpPrereqError("Please pass at least one modification",
12039                                  errors.ECODE_INVAL)
12040
12041   def ExpandNames(self):
12042     # This raises errors.OpPrereqError on its own:
12043     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12044
12045     self.needed_locks = {
12046       locking.LEVEL_NODEGROUP: [self.group_uuid],
12047       }
12048
12049   def CheckPrereq(self):
12050     """Check prerequisites.
12051
12052     """
12053     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12054
12055     if self.group is None:
12056       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12057                                (self.op.group_name, self.group_uuid))
12058
12059     if self.op.ndparams:
12060       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12061       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12062       self.new_ndparams = new_ndparams
12063
12064   def BuildHooksEnv(self):
12065     """Build hooks env.
12066
12067     """
12068     return {
12069       "GROUP_NAME": self.op.group_name,
12070       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12071       }
12072
12073   def BuildHooksNodes(self):
12074     """Build hooks nodes.
12075
12076     """
12077     mn = self.cfg.GetMasterNode()
12078     return ([mn], [mn])
12079
12080   def Exec(self, feedback_fn):
12081     """Modifies the node group.
12082
12083     """
12084     result = []
12085
12086     if self.op.ndparams:
12087       self.group.ndparams = self.new_ndparams
12088       result.append(("ndparams", str(self.group.ndparams)))
12089
12090     if self.op.alloc_policy:
12091       self.group.alloc_policy = self.op.alloc_policy
12092
12093     self.cfg.Update(self.group, feedback_fn)
12094     return result
12095
12096
12097
12098 class LUGroupRemove(LogicalUnit):
12099   HPATH = "group-remove"
12100   HTYPE = constants.HTYPE_GROUP
12101   REQ_BGL = False
12102
12103   def ExpandNames(self):
12104     # This will raises errors.OpPrereqError on its own:
12105     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12106     self.needed_locks = {
12107       locking.LEVEL_NODEGROUP: [self.group_uuid],
12108       }
12109
12110   def CheckPrereq(self):
12111     """Check prerequisites.
12112
12113     This checks that the given group name exists as a node group, that is
12114     empty (i.e., contains no nodes), and that is not the last group of the
12115     cluster.
12116
12117     """
12118     # Verify that the group is empty.
12119     group_nodes = [node.name
12120                    for node in self.cfg.GetAllNodesInfo().values()
12121                    if node.group == self.group_uuid]
12122
12123     if group_nodes:
12124       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12125                                  " nodes: %s" %
12126                                  (self.op.group_name,
12127                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12128                                  errors.ECODE_STATE)
12129
12130     # Verify the cluster would not be left group-less.
12131     if len(self.cfg.GetNodeGroupList()) == 1:
12132       raise errors.OpPrereqError("Group '%s' is the only group,"
12133                                  " cannot be removed" %
12134                                  self.op.group_name,
12135                                  errors.ECODE_STATE)
12136
12137   def BuildHooksEnv(self):
12138     """Build hooks env.
12139
12140     """
12141     return {
12142       "GROUP_NAME": self.op.group_name,
12143       }
12144
12145   def BuildHooksNodes(self):
12146     """Build hooks nodes.
12147
12148     """
12149     mn = self.cfg.GetMasterNode()
12150     return ([mn], [mn])
12151
12152   def Exec(self, feedback_fn):
12153     """Remove the node group.
12154
12155     """
12156     try:
12157       self.cfg.RemoveNodeGroup(self.group_uuid)
12158     except errors.ConfigurationError:
12159       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12160                                (self.op.group_name, self.group_uuid))
12161
12162     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12163
12164
12165 class LUGroupRename(LogicalUnit):
12166   HPATH = "group-rename"
12167   HTYPE = constants.HTYPE_GROUP
12168   REQ_BGL = False
12169
12170   def ExpandNames(self):
12171     # This raises errors.OpPrereqError on its own:
12172     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12173
12174     self.needed_locks = {
12175       locking.LEVEL_NODEGROUP: [self.group_uuid],
12176       }
12177
12178   def CheckPrereq(self):
12179     """Check prerequisites.
12180
12181     Ensures requested new name is not yet used.
12182
12183     """
12184     try:
12185       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12186     except errors.OpPrereqError:
12187       pass
12188     else:
12189       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12190                                  " node group (UUID: %s)" %
12191                                  (self.op.new_name, new_name_uuid),
12192                                  errors.ECODE_EXISTS)
12193
12194   def BuildHooksEnv(self):
12195     """Build hooks env.
12196
12197     """
12198     return {
12199       "OLD_NAME": self.op.group_name,
12200       "NEW_NAME": self.op.new_name,
12201       }
12202
12203   def BuildHooksNodes(self):
12204     """Build hooks nodes.
12205
12206     """
12207     mn = self.cfg.GetMasterNode()
12208
12209     all_nodes = self.cfg.GetAllNodesInfo()
12210     all_nodes.pop(mn, None)
12211
12212     run_nodes = [mn]
12213     run_nodes.extend(node.name for node in all_nodes.values()
12214                      if node.group == self.group_uuid)
12215
12216     return (run_nodes, run_nodes)
12217
12218   def Exec(self, feedback_fn):
12219     """Rename the node group.
12220
12221     """
12222     group = self.cfg.GetNodeGroup(self.group_uuid)
12223
12224     if group is None:
12225       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12226                                (self.op.group_name, self.group_uuid))
12227
12228     group.name = self.op.new_name
12229     self.cfg.Update(group, feedback_fn)
12230
12231     return self.op.new_name
12232
12233
12234 class LUGroupEvacuate(LogicalUnit):
12235   HPATH = "group-evacuate"
12236   HTYPE = constants.HTYPE_GROUP
12237   REQ_BGL = False
12238
12239   def ExpandNames(self):
12240     # This raises errors.OpPrereqError on its own:
12241     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12242
12243     if self.op.target_groups:
12244       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12245                                   self.op.target_groups)
12246     else:
12247       self.req_target_uuids = []
12248
12249     if self.group_uuid in self.req_target_uuids:
12250       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12251                                  " as a target group (targets are %s)" %
12252                                  (self.group_uuid,
12253                                   utils.CommaJoin(self.req_target_uuids)),
12254                                  errors.ECODE_INVAL)
12255
12256     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12257
12258     self.share_locks = _ShareAll()
12259     self.needed_locks = {
12260       locking.LEVEL_INSTANCE: [],
12261       locking.LEVEL_NODEGROUP: [],
12262       locking.LEVEL_NODE: [],
12263       }
12264
12265   def DeclareLocks(self, level):
12266     if level == locking.LEVEL_INSTANCE:
12267       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12268
12269       # Lock instances optimistically, needs verification once node and group
12270       # locks have been acquired
12271       self.needed_locks[locking.LEVEL_INSTANCE] = \
12272         self.cfg.GetNodeGroupInstances(self.group_uuid)
12273
12274     elif level == locking.LEVEL_NODEGROUP:
12275       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12276
12277       if self.req_target_uuids:
12278         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12279
12280         # Lock all groups used by instances optimistically; this requires going
12281         # via the node before it's locked, requiring verification later on
12282         lock_groups.update(group_uuid
12283                            for instance_name in
12284                              self.owned_locks(locking.LEVEL_INSTANCE)
12285                            for group_uuid in
12286                              self.cfg.GetInstanceNodeGroups(instance_name))
12287       else:
12288         # No target groups, need to lock all of them
12289         lock_groups = locking.ALL_SET
12290
12291       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12292
12293     elif level == locking.LEVEL_NODE:
12294       # This will only lock the nodes in the group to be evacuated which
12295       # contain actual instances
12296       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12297       self._LockInstancesNodes()
12298
12299       # Lock all nodes in group to be evacuated and target groups
12300       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12301       assert self.group_uuid in owned_groups
12302       member_nodes = [node_name
12303                       for group in owned_groups
12304                       for node_name in self.cfg.GetNodeGroup(group).members]
12305       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12306
12307   def CheckPrereq(self):
12308     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12309     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12310     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12311
12312     assert owned_groups.issuperset(self.req_target_uuids)
12313     assert self.group_uuid in owned_groups
12314
12315     # Check if locked instances are still correct
12316     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12317
12318     # Get instance information
12319     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12320
12321     # Check if node groups for locked instances are still correct
12322     for instance_name in owned_instances:
12323       inst = self.instances[instance_name]
12324       assert owned_nodes.issuperset(inst.all_nodes), \
12325         "Instance %s's nodes changed while we kept the lock" % instance_name
12326
12327       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12328                                              owned_groups)
12329
12330       assert self.group_uuid in inst_groups, \
12331         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12332
12333     if self.req_target_uuids:
12334       # User requested specific target groups
12335       self.target_uuids = self.req_target_uuids
12336     else:
12337       # All groups except the one to be evacuated are potential targets
12338       self.target_uuids = [group_uuid for group_uuid in owned_groups
12339                            if group_uuid != self.group_uuid]
12340
12341       if not self.target_uuids:
12342         raise errors.OpPrereqError("There are no possible target groups",
12343                                    errors.ECODE_INVAL)
12344
12345   def BuildHooksEnv(self):
12346     """Build hooks env.
12347
12348     """
12349     return {
12350       "GROUP_NAME": self.op.group_name,
12351       "TARGET_GROUPS": " ".join(self.target_uuids),
12352       }
12353
12354   def BuildHooksNodes(self):
12355     """Build hooks nodes.
12356
12357     """
12358     mn = self.cfg.GetMasterNode()
12359
12360     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12361
12362     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12363
12364     return (run_nodes, run_nodes)
12365
12366   def Exec(self, feedback_fn):
12367     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12368
12369     assert self.group_uuid not in self.target_uuids
12370
12371     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12372                      instances=instances, target_groups=self.target_uuids)
12373
12374     ial.Run(self.op.iallocator)
12375
12376     if not ial.success:
12377       raise errors.OpPrereqError("Can't compute group evacuation using"
12378                                  " iallocator '%s': %s" %
12379                                  (self.op.iallocator, ial.info),
12380                                  errors.ECODE_NORES)
12381
12382     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12383
12384     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12385                  len(jobs), self.op.group_name)
12386
12387     return ResultWithJobs(jobs)
12388
12389
12390 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12391   """Generic tags LU.
12392
12393   This is an abstract class which is the parent of all the other tags LUs.
12394
12395   """
12396   def ExpandNames(self):
12397     self.group_uuid = None
12398     self.needed_locks = {}
12399     if self.op.kind == constants.TAG_NODE:
12400       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12401       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12402     elif self.op.kind == constants.TAG_INSTANCE:
12403       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12404       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12405     elif self.op.kind == constants.TAG_NODEGROUP:
12406       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12407
12408     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12409     # not possible to acquire the BGL based on opcode parameters)
12410
12411   def CheckPrereq(self):
12412     """Check prerequisites.
12413
12414     """
12415     if self.op.kind == constants.TAG_CLUSTER:
12416       self.target = self.cfg.GetClusterInfo()
12417     elif self.op.kind == constants.TAG_NODE:
12418       self.target = self.cfg.GetNodeInfo(self.op.name)
12419     elif self.op.kind == constants.TAG_INSTANCE:
12420       self.target = self.cfg.GetInstanceInfo(self.op.name)
12421     elif self.op.kind == constants.TAG_NODEGROUP:
12422       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12423     else:
12424       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12425                                  str(self.op.kind), errors.ECODE_INVAL)
12426
12427
12428 class LUTagsGet(TagsLU):
12429   """Returns the tags of a given object.
12430
12431   """
12432   REQ_BGL = False
12433
12434   def ExpandNames(self):
12435     TagsLU.ExpandNames(self)
12436
12437     # Share locks as this is only a read operation
12438     self.share_locks = _ShareAll()
12439
12440   def Exec(self, feedback_fn):
12441     """Returns the tag list.
12442
12443     """
12444     return list(self.target.GetTags())
12445
12446
12447 class LUTagsSearch(NoHooksLU):
12448   """Searches the tags for a given pattern.
12449
12450   """
12451   REQ_BGL = False
12452
12453   def ExpandNames(self):
12454     self.needed_locks = {}
12455
12456   def CheckPrereq(self):
12457     """Check prerequisites.
12458
12459     This checks the pattern passed for validity by compiling it.
12460
12461     """
12462     try:
12463       self.re = re.compile(self.op.pattern)
12464     except re.error, err:
12465       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12466                                  (self.op.pattern, err), errors.ECODE_INVAL)
12467
12468   def Exec(self, feedback_fn):
12469     """Returns the tag list.
12470
12471     """
12472     cfg = self.cfg
12473     tgts = [("/cluster", cfg.GetClusterInfo())]
12474     ilist = cfg.GetAllInstancesInfo().values()
12475     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12476     nlist = cfg.GetAllNodesInfo().values()
12477     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12478     tgts.extend(("/nodegroup/%s" % n.name, n)
12479                 for n in cfg.GetAllNodeGroupsInfo().values())
12480     results = []
12481     for path, target in tgts:
12482       for tag in target.GetTags():
12483         if self.re.search(tag):
12484           results.append((path, tag))
12485     return results
12486
12487
12488 class LUTagsSet(TagsLU):
12489   """Sets a tag on a given object.
12490
12491   """
12492   REQ_BGL = False
12493
12494   def CheckPrereq(self):
12495     """Check prerequisites.
12496
12497     This checks the type and length of the tag name and value.
12498
12499     """
12500     TagsLU.CheckPrereq(self)
12501     for tag in self.op.tags:
12502       objects.TaggableObject.ValidateTag(tag)
12503
12504   def Exec(self, feedback_fn):
12505     """Sets the tag.
12506
12507     """
12508     try:
12509       for tag in self.op.tags:
12510         self.target.AddTag(tag)
12511     except errors.TagError, err:
12512       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12513     self.cfg.Update(self.target, feedback_fn)
12514
12515
12516 class LUTagsDel(TagsLU):
12517   """Delete a list of tags from a given object.
12518
12519   """
12520   REQ_BGL = False
12521
12522   def CheckPrereq(self):
12523     """Check prerequisites.
12524
12525     This checks that we have the given tag.
12526
12527     """
12528     TagsLU.CheckPrereq(self)
12529     for tag in self.op.tags:
12530       objects.TaggableObject.ValidateTag(tag)
12531     del_tags = frozenset(self.op.tags)
12532     cur_tags = self.target.GetTags()
12533
12534     diff_tags = del_tags - cur_tags
12535     if diff_tags:
12536       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12537       raise errors.OpPrereqError("Tag(s) %s not found" %
12538                                  (utils.CommaJoin(diff_names), ),
12539                                  errors.ECODE_NOENT)
12540
12541   def Exec(self, feedback_fn):
12542     """Remove the tag from the object.
12543
12544     """
12545     for tag in self.op.tags:
12546       self.target.RemoveTag(tag)
12547     self.cfg.Update(self.target, feedback_fn)
12548
12549
12550 class LUTestDelay(NoHooksLU):
12551   """Sleep for a specified amount of time.
12552
12553   This LU sleeps on the master and/or nodes for a specified amount of
12554   time.
12555
12556   """
12557   REQ_BGL = False
12558
12559   def ExpandNames(self):
12560     """Expand names and set required locks.
12561
12562     This expands the node list, if any.
12563
12564     """
12565     self.needed_locks = {}
12566     if self.op.on_nodes:
12567       # _GetWantedNodes can be used here, but is not always appropriate to use
12568       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12569       # more information.
12570       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12571       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12572
12573   def _TestDelay(self):
12574     """Do the actual sleep.
12575
12576     """
12577     if self.op.on_master:
12578       if not utils.TestDelay(self.op.duration):
12579         raise errors.OpExecError("Error during master delay test")
12580     if self.op.on_nodes:
12581       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12582       for node, node_result in result.items():
12583         node_result.Raise("Failure during rpc call to node %s" % node)
12584
12585   def Exec(self, feedback_fn):
12586     """Execute the test delay opcode, with the wanted repetitions.
12587
12588     """
12589     if self.op.repeat == 0:
12590       self._TestDelay()
12591     else:
12592       top_value = self.op.repeat - 1
12593       for i in range(self.op.repeat):
12594         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12595         self._TestDelay()
12596
12597
12598 class LUTestJqueue(NoHooksLU):
12599   """Utility LU to test some aspects of the job queue.
12600
12601   """
12602   REQ_BGL = False
12603
12604   # Must be lower than default timeout for WaitForJobChange to see whether it
12605   # notices changed jobs
12606   _CLIENT_CONNECT_TIMEOUT = 20.0
12607   _CLIENT_CONFIRM_TIMEOUT = 60.0
12608
12609   @classmethod
12610   def _NotifyUsingSocket(cls, cb, errcls):
12611     """Opens a Unix socket and waits for another program to connect.
12612
12613     @type cb: callable
12614     @param cb: Callback to send socket name to client
12615     @type errcls: class
12616     @param errcls: Exception class to use for errors
12617
12618     """
12619     # Using a temporary directory as there's no easy way to create temporary
12620     # sockets without writing a custom loop around tempfile.mktemp and
12621     # socket.bind
12622     tmpdir = tempfile.mkdtemp()
12623     try:
12624       tmpsock = utils.PathJoin(tmpdir, "sock")
12625
12626       logging.debug("Creating temporary socket at %s", tmpsock)
12627       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12628       try:
12629         sock.bind(tmpsock)
12630         sock.listen(1)
12631
12632         # Send details to client
12633         cb(tmpsock)
12634
12635         # Wait for client to connect before continuing
12636         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12637         try:
12638           (conn, _) = sock.accept()
12639         except socket.error, err:
12640           raise errcls("Client didn't connect in time (%s)" % err)
12641       finally:
12642         sock.close()
12643     finally:
12644       # Remove as soon as client is connected
12645       shutil.rmtree(tmpdir)
12646
12647     # Wait for client to close
12648     try:
12649       try:
12650         # pylint: disable-msg=E1101
12651         # Instance of '_socketobject' has no ... member
12652         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12653         conn.recv(1)
12654       except socket.error, err:
12655         raise errcls("Client failed to confirm notification (%s)" % err)
12656     finally:
12657       conn.close()
12658
12659   def _SendNotification(self, test, arg, sockname):
12660     """Sends a notification to the client.
12661
12662     @type test: string
12663     @param test: Test name
12664     @param arg: Test argument (depends on test)
12665     @type sockname: string
12666     @param sockname: Socket path
12667
12668     """
12669     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12670
12671   def _Notify(self, prereq, test, arg):
12672     """Notifies the client of a test.
12673
12674     @type prereq: bool
12675     @param prereq: Whether this is a prereq-phase test
12676     @type test: string
12677     @param test: Test name
12678     @param arg: Test argument (depends on test)
12679
12680     """
12681     if prereq:
12682       errcls = errors.OpPrereqError
12683     else:
12684       errcls = errors.OpExecError
12685
12686     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12687                                                   test, arg),
12688                                    errcls)
12689
12690   def CheckArguments(self):
12691     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12692     self.expandnames_calls = 0
12693
12694   def ExpandNames(self):
12695     checkargs_calls = getattr(self, "checkargs_calls", 0)
12696     if checkargs_calls < 1:
12697       raise errors.ProgrammerError("CheckArguments was not called")
12698
12699     self.expandnames_calls += 1
12700
12701     if self.op.notify_waitlock:
12702       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12703
12704     self.LogInfo("Expanding names")
12705
12706     # Get lock on master node (just to get a lock, not for a particular reason)
12707     self.needed_locks = {
12708       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12709       }
12710
12711   def Exec(self, feedback_fn):
12712     if self.expandnames_calls < 1:
12713       raise errors.ProgrammerError("ExpandNames was not called")
12714
12715     if self.op.notify_exec:
12716       self._Notify(False, constants.JQT_EXEC, None)
12717
12718     self.LogInfo("Executing")
12719
12720     if self.op.log_messages:
12721       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12722       for idx, msg in enumerate(self.op.log_messages):
12723         self.LogInfo("Sending log message %s", idx + 1)
12724         feedback_fn(constants.JQT_MSGPREFIX + msg)
12725         # Report how many test messages have been sent
12726         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12727
12728     if self.op.fail:
12729       raise errors.OpExecError("Opcode failure was requested")
12730
12731     return True
12732
12733
12734 class IAllocator(object):
12735   """IAllocator framework.
12736
12737   An IAllocator instance has three sets of attributes:
12738     - cfg that is needed to query the cluster
12739     - input data (all members of the _KEYS class attribute are required)
12740     - four buffer attributes (in|out_data|text), that represent the
12741       input (to the external script) in text and data structure format,
12742       and the output from it, again in two formats
12743     - the result variables from the script (success, info, nodes) for
12744       easy usage
12745
12746   """
12747   # pylint: disable-msg=R0902
12748   # lots of instance attributes
12749
12750   def __init__(self, cfg, rpc, mode, **kwargs):
12751     self.cfg = cfg
12752     self.rpc = rpc
12753     # init buffer variables
12754     self.in_text = self.out_text = self.in_data = self.out_data = None
12755     # init all input fields so that pylint is happy
12756     self.mode = mode
12757     self.memory = self.disks = self.disk_template = None
12758     self.os = self.tags = self.nics = self.vcpus = None
12759     self.hypervisor = None
12760     self.relocate_from = None
12761     self.name = None
12762     self.instances = None
12763     self.evac_mode = None
12764     self.target_groups = []
12765     # computed fields
12766     self.required_nodes = None
12767     # init result fields
12768     self.success = self.info = self.result = None
12769
12770     try:
12771       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12772     except KeyError:
12773       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12774                                    " IAllocator" % self.mode)
12775
12776     keyset = [n for (n, _) in keydata]
12777
12778     for key in kwargs:
12779       if key not in keyset:
12780         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12781                                      " IAllocator" % key)
12782       setattr(self, key, kwargs[key])
12783
12784     for key in keyset:
12785       if key not in kwargs:
12786         raise errors.ProgrammerError("Missing input parameter '%s' to"
12787                                      " IAllocator" % key)
12788     self._BuildInputData(compat.partial(fn, self), keydata)
12789
12790   def _ComputeClusterData(self):
12791     """Compute the generic allocator input data.
12792
12793     This is the data that is independent of the actual operation.
12794
12795     """
12796     cfg = self.cfg
12797     cluster_info = cfg.GetClusterInfo()
12798     # cluster data
12799     data = {
12800       "version": constants.IALLOCATOR_VERSION,
12801       "cluster_name": cfg.GetClusterName(),
12802       "cluster_tags": list(cluster_info.GetTags()),
12803       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12804       # we don't have job IDs
12805       }
12806     ninfo = cfg.GetAllNodesInfo()
12807     iinfo = cfg.GetAllInstancesInfo().values()
12808     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12809
12810     # node data
12811     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12812
12813     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12814       hypervisor_name = self.hypervisor
12815     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12816       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12817     else:
12818       hypervisor_name = cluster_info.enabled_hypervisors[0]
12819
12820     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12821                                         hypervisor_name)
12822     node_iinfo = \
12823       self.rpc.call_all_instances_info(node_list,
12824                                        cluster_info.enabled_hypervisors)
12825
12826     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12827
12828     config_ndata = self._ComputeBasicNodeData(ninfo)
12829     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12830                                                  i_list, config_ndata)
12831     assert len(data["nodes"]) == len(ninfo), \
12832         "Incomplete node data computed"
12833
12834     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12835
12836     self.in_data = data
12837
12838   @staticmethod
12839   def _ComputeNodeGroupData(cfg):
12840     """Compute node groups data.
12841
12842     """
12843     ng = dict((guuid, {
12844       "name": gdata.name,
12845       "alloc_policy": gdata.alloc_policy,
12846       })
12847       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12848
12849     return ng
12850
12851   @staticmethod
12852   def _ComputeBasicNodeData(node_cfg):
12853     """Compute global node data.
12854
12855     @rtype: dict
12856     @returns: a dict of name: (node dict, node config)
12857
12858     """
12859     # fill in static (config-based) values
12860     node_results = dict((ninfo.name, {
12861       "tags": list(ninfo.GetTags()),
12862       "primary_ip": ninfo.primary_ip,
12863       "secondary_ip": ninfo.secondary_ip,
12864       "offline": ninfo.offline,
12865       "drained": ninfo.drained,
12866       "master_candidate": ninfo.master_candidate,
12867       "group": ninfo.group,
12868       "master_capable": ninfo.master_capable,
12869       "vm_capable": ninfo.vm_capable,
12870       })
12871       for ninfo in node_cfg.values())
12872
12873     return node_results
12874
12875   @staticmethod
12876   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12877                               node_results):
12878     """Compute global node data.
12879
12880     @param node_results: the basic node structures as filled from the config
12881
12882     """
12883     # make a copy of the current dict
12884     node_results = dict(node_results)
12885     for nname, nresult in node_data.items():
12886       assert nname in node_results, "Missing basic data for node %s" % nname
12887       ninfo = node_cfg[nname]
12888
12889       if not (ninfo.offline or ninfo.drained):
12890         nresult.Raise("Can't get data for node %s" % nname)
12891         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12892                                 nname)
12893         remote_info = nresult.payload
12894
12895         for attr in ["memory_total", "memory_free", "memory_dom0",
12896                      "vg_size", "vg_free", "cpu_total"]:
12897           if attr not in remote_info:
12898             raise errors.OpExecError("Node '%s' didn't return attribute"
12899                                      " '%s'" % (nname, attr))
12900           if not isinstance(remote_info[attr], int):
12901             raise errors.OpExecError("Node '%s' returned invalid value"
12902                                      " for '%s': %s" %
12903                                      (nname, attr, remote_info[attr]))
12904         # compute memory used by primary instances
12905         i_p_mem = i_p_up_mem = 0
12906         for iinfo, beinfo in i_list:
12907           if iinfo.primary_node == nname:
12908             i_p_mem += beinfo[constants.BE_MEMORY]
12909             if iinfo.name not in node_iinfo[nname].payload:
12910               i_used_mem = 0
12911             else:
12912               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12913             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12914             remote_info["memory_free"] -= max(0, i_mem_diff)
12915
12916             if iinfo.admin_up:
12917               i_p_up_mem += beinfo[constants.BE_MEMORY]
12918
12919         # compute memory used by instances
12920         pnr_dyn = {
12921           "total_memory": remote_info["memory_total"],
12922           "reserved_memory": remote_info["memory_dom0"],
12923           "free_memory": remote_info["memory_free"],
12924           "total_disk": remote_info["vg_size"],
12925           "free_disk": remote_info["vg_free"],
12926           "total_cpus": remote_info["cpu_total"],
12927           "i_pri_memory": i_p_mem,
12928           "i_pri_up_memory": i_p_up_mem,
12929           }
12930         pnr_dyn.update(node_results[nname])
12931         node_results[nname] = pnr_dyn
12932
12933     return node_results
12934
12935   @staticmethod
12936   def _ComputeInstanceData(cluster_info, i_list):
12937     """Compute global instance data.
12938
12939     """
12940     instance_data = {}
12941     for iinfo, beinfo in i_list:
12942       nic_data = []
12943       for nic in iinfo.nics:
12944         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12945         nic_dict = {
12946           "mac": nic.mac,
12947           "ip": nic.ip,
12948           "mode": filled_params[constants.NIC_MODE],
12949           "link": filled_params[constants.NIC_LINK],
12950           }
12951         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12952           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12953         nic_data.append(nic_dict)
12954       pir = {
12955         "tags": list(iinfo.GetTags()),
12956         "admin_up": iinfo.admin_up,
12957         "vcpus": beinfo[constants.BE_VCPUS],
12958         "memory": beinfo[constants.BE_MEMORY],
12959         "os": iinfo.os,
12960         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12961         "nics": nic_data,
12962         "disks": [{constants.IDISK_SIZE: dsk.size,
12963                    constants.IDISK_MODE: dsk.mode}
12964                   for dsk in iinfo.disks],
12965         "disk_template": iinfo.disk_template,
12966         "hypervisor": iinfo.hypervisor,
12967         }
12968       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12969                                                  pir["disks"])
12970       instance_data[iinfo.name] = pir
12971
12972     return instance_data
12973
12974   def _AddNewInstance(self):
12975     """Add new instance data to allocator structure.
12976
12977     This in combination with _AllocatorGetClusterData will create the
12978     correct structure needed as input for the allocator.
12979
12980     The checks for the completeness of the opcode must have already been
12981     done.
12982
12983     """
12984     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12985
12986     if self.disk_template in constants.DTS_INT_MIRROR:
12987       self.required_nodes = 2
12988     else:
12989       self.required_nodes = 1
12990
12991     request = {
12992       "name": self.name,
12993       "disk_template": self.disk_template,
12994       "tags": self.tags,
12995       "os": self.os,
12996       "vcpus": self.vcpus,
12997       "memory": self.memory,
12998       "disks": self.disks,
12999       "disk_space_total": disk_space,
13000       "nics": self.nics,
13001       "required_nodes": self.required_nodes,
13002       "hypervisor": self.hypervisor,
13003       }
13004
13005     return request
13006
13007   def _AddRelocateInstance(self):
13008     """Add relocate instance data to allocator structure.
13009
13010     This in combination with _IAllocatorGetClusterData will create the
13011     correct structure needed as input for the allocator.
13012
13013     The checks for the completeness of the opcode must have already been
13014     done.
13015
13016     """
13017     instance = self.cfg.GetInstanceInfo(self.name)
13018     if instance is None:
13019       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13020                                    " IAllocator" % self.name)
13021
13022     if instance.disk_template not in constants.DTS_MIRRORED:
13023       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13024                                  errors.ECODE_INVAL)
13025
13026     if instance.disk_template in constants.DTS_INT_MIRROR and \
13027         len(instance.secondary_nodes) != 1:
13028       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13029                                  errors.ECODE_STATE)
13030
13031     self.required_nodes = 1
13032     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13033     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13034
13035     request = {
13036       "name": self.name,
13037       "disk_space_total": disk_space,
13038       "required_nodes": self.required_nodes,
13039       "relocate_from": self.relocate_from,
13040       }
13041     return request
13042
13043   def _AddNodeEvacuate(self):
13044     """Get data for node-evacuate requests.
13045
13046     """
13047     return {
13048       "instances": self.instances,
13049       "evac_mode": self.evac_mode,
13050       }
13051
13052   def _AddChangeGroup(self):
13053     """Get data for node-evacuate requests.
13054
13055     """
13056     return {
13057       "instances": self.instances,
13058       "target_groups": self.target_groups,
13059       }
13060
13061   def _BuildInputData(self, fn, keydata):
13062     """Build input data structures.
13063
13064     """
13065     self._ComputeClusterData()
13066
13067     request = fn()
13068     request["type"] = self.mode
13069     for keyname, keytype in keydata:
13070       if keyname not in request:
13071         raise errors.ProgrammerError("Request parameter %s is missing" %
13072                                      keyname)
13073       val = request[keyname]
13074       if not keytype(val):
13075         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13076                                      " validation, value %s, expected"
13077                                      " type %s" % (keyname, val, keytype))
13078     self.in_data["request"] = request
13079
13080     self.in_text = serializer.Dump(self.in_data)
13081
13082   _STRING_LIST = ht.TListOf(ht.TString)
13083   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13084      # pylint: disable-msg=E1101
13085      # Class '...' has no 'OP_ID' member
13086      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13087                           opcodes.OpInstanceMigrate.OP_ID,
13088                           opcodes.OpInstanceReplaceDisks.OP_ID])
13089      })))
13090
13091   _NEVAC_MOVED = \
13092     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13093                        ht.TItems([ht.TNonEmptyString,
13094                                   ht.TNonEmptyString,
13095                                   ht.TListOf(ht.TNonEmptyString),
13096                                  ])))
13097   _NEVAC_FAILED = \
13098     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13099                        ht.TItems([ht.TNonEmptyString,
13100                                   ht.TMaybeString,
13101                                  ])))
13102   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13103                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13104
13105   _MODE_DATA = {
13106     constants.IALLOCATOR_MODE_ALLOC:
13107       (_AddNewInstance,
13108        [
13109         ("name", ht.TString),
13110         ("memory", ht.TInt),
13111         ("disks", ht.TListOf(ht.TDict)),
13112         ("disk_template", ht.TString),
13113         ("os", ht.TString),
13114         ("tags", _STRING_LIST),
13115         ("nics", ht.TListOf(ht.TDict)),
13116         ("vcpus", ht.TInt),
13117         ("hypervisor", ht.TString),
13118         ], ht.TList),
13119     constants.IALLOCATOR_MODE_RELOC:
13120       (_AddRelocateInstance,
13121        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13122        ht.TList),
13123      constants.IALLOCATOR_MODE_NODE_EVAC:
13124       (_AddNodeEvacuate, [
13125         ("instances", _STRING_LIST),
13126         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13127         ], _NEVAC_RESULT),
13128      constants.IALLOCATOR_MODE_CHG_GROUP:
13129       (_AddChangeGroup, [
13130         ("instances", _STRING_LIST),
13131         ("target_groups", _STRING_LIST),
13132         ], _NEVAC_RESULT),
13133     }
13134
13135   def Run(self, name, validate=True, call_fn=None):
13136     """Run an instance allocator and return the results.
13137
13138     """
13139     if call_fn is None:
13140       call_fn = self.rpc.call_iallocator_runner
13141
13142     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13143     result.Raise("Failure while running the iallocator script")
13144
13145     self.out_text = result.payload
13146     if validate:
13147       self._ValidateResult()
13148
13149   def _ValidateResult(self):
13150     """Process the allocator results.
13151
13152     This will process and if successful save the result in
13153     self.out_data and the other parameters.
13154
13155     """
13156     try:
13157       rdict = serializer.Load(self.out_text)
13158     except Exception, err:
13159       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13160
13161     if not isinstance(rdict, dict):
13162       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13163
13164     # TODO: remove backwards compatiblity in later versions
13165     if "nodes" in rdict and "result" not in rdict:
13166       rdict["result"] = rdict["nodes"]
13167       del rdict["nodes"]
13168
13169     for key in "success", "info", "result":
13170       if key not in rdict:
13171         raise errors.OpExecError("Can't parse iallocator results:"
13172                                  " missing key '%s'" % key)
13173       setattr(self, key, rdict[key])
13174
13175     if not self._result_check(self.result):
13176       raise errors.OpExecError("Iallocator returned invalid result,"
13177                                " expected %s, got %s" %
13178                                (self._result_check, self.result),
13179                                errors.ECODE_INVAL)
13180
13181     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13182       assert self.relocate_from is not None
13183       assert self.required_nodes == 1
13184
13185       node2group = dict((name, ndata["group"])
13186                         for (name, ndata) in self.in_data["nodes"].items())
13187
13188       fn = compat.partial(self._NodesToGroups, node2group,
13189                           self.in_data["nodegroups"])
13190
13191       request_groups = fn(self.relocate_from)
13192       result_groups = fn(rdict["result"])
13193
13194       if self.success and result_groups != request_groups:
13195         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13196                                  " differ from original groups (%s)" %
13197                                  (utils.CommaJoin(result_groups),
13198                                   utils.CommaJoin(request_groups)))
13199
13200     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13201       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13202
13203     self.out_data = rdict
13204
13205   @staticmethod
13206   def _NodesToGroups(node2group, groups, nodes):
13207     """Returns a list of unique group names for a list of nodes.
13208
13209     @type node2group: dict
13210     @param node2group: Map from node name to group UUID
13211     @type groups: dict
13212     @param groups: Group information
13213     @type nodes: list
13214     @param nodes: Node names
13215
13216     """
13217     result = set()
13218
13219     for node in nodes:
13220       try:
13221         group_uuid = node2group[node]
13222       except KeyError:
13223         # Ignore unknown node
13224         pass
13225       else:
13226         try:
13227           group = groups[group_uuid]
13228         except KeyError:
13229           # Can't find group, let's use UUID
13230           group_name = group_uuid
13231         else:
13232           group_name = group["name"]
13233
13234         result.add(group_name)
13235
13236     return sorted(result)
13237
13238
13239 class LUTestAllocator(NoHooksLU):
13240   """Run allocator tests.
13241
13242   This LU runs the allocator tests
13243
13244   """
13245   def CheckPrereq(self):
13246     """Check prerequisites.
13247
13248     This checks the opcode parameters depending on the director and mode test.
13249
13250     """
13251     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13252       for attr in ["memory", "disks", "disk_template",
13253                    "os", "tags", "nics", "vcpus"]:
13254         if not hasattr(self.op, attr):
13255           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13256                                      attr, errors.ECODE_INVAL)
13257       iname = self.cfg.ExpandInstanceName(self.op.name)
13258       if iname is not None:
13259         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13260                                    iname, errors.ECODE_EXISTS)
13261       if not isinstance(self.op.nics, list):
13262         raise errors.OpPrereqError("Invalid parameter 'nics'",
13263                                    errors.ECODE_INVAL)
13264       if not isinstance(self.op.disks, list):
13265         raise errors.OpPrereqError("Invalid parameter 'disks'",
13266                                    errors.ECODE_INVAL)
13267       for row in self.op.disks:
13268         if (not isinstance(row, dict) or
13269             constants.IDISK_SIZE not in row or
13270             not isinstance(row[constants.IDISK_SIZE], int) or
13271             constants.IDISK_MODE not in row or
13272             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13273           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13274                                      " parameter", errors.ECODE_INVAL)
13275       if self.op.hypervisor is None:
13276         self.op.hypervisor = self.cfg.GetHypervisorType()
13277     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13278       fname = _ExpandInstanceName(self.cfg, self.op.name)
13279       self.op.name = fname
13280       self.relocate_from = \
13281           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13282     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13283                           constants.IALLOCATOR_MODE_NODE_EVAC):
13284       if not self.op.instances:
13285         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13286       self.op.instances = _GetWantedInstances(self, self.op.instances)
13287     else:
13288       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13289                                  self.op.mode, errors.ECODE_INVAL)
13290
13291     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13292       if self.op.allocator is None:
13293         raise errors.OpPrereqError("Missing allocator name",
13294                                    errors.ECODE_INVAL)
13295     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13296       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13297                                  self.op.direction, errors.ECODE_INVAL)
13298
13299   def Exec(self, feedback_fn):
13300     """Run the allocator test.
13301
13302     """
13303     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13304       ial = IAllocator(self.cfg, self.rpc,
13305                        mode=self.op.mode,
13306                        name=self.op.name,
13307                        memory=self.op.memory,
13308                        disks=self.op.disks,
13309                        disk_template=self.op.disk_template,
13310                        os=self.op.os,
13311                        tags=self.op.tags,
13312                        nics=self.op.nics,
13313                        vcpus=self.op.vcpus,
13314                        hypervisor=self.op.hypervisor,
13315                        )
13316     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13317       ial = IAllocator(self.cfg, self.rpc,
13318                        mode=self.op.mode,
13319                        name=self.op.name,
13320                        relocate_from=list(self.relocate_from),
13321                        )
13322     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13323       ial = IAllocator(self.cfg, self.rpc,
13324                        mode=self.op.mode,
13325                        instances=self.op.instances,
13326                        target_groups=self.op.target_groups)
13327     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13328       ial = IAllocator(self.cfg, self.rpc,
13329                        mode=self.op.mode,
13330                        instances=self.op.instances,
13331                        evac_mode=self.op.evac_mode)
13332     else:
13333       raise errors.ProgrammerError("Uncatched mode %s in"
13334                                    " LUTestAllocator.Exec", self.op.mode)
13335
13336     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13337       result = ial.in_text
13338     else:
13339       ial.Run(self.op.allocator, validate=False)
13340       result = ial.out_text
13341     return result
13342
13343
13344 #: Query type implementations
13345 _QUERY_IMPL = {
13346   constants.QR_INSTANCE: _InstanceQuery,
13347   constants.QR_NODE: _NodeQuery,
13348   constants.QR_GROUP: _GroupQuery,
13349   constants.QR_OS: _OsQuery,
13350   }
13351
13352 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13353
13354
13355 def _GetQueryImplementation(name):
13356   """Returns the implemtnation for a query type.
13357
13358   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13359
13360   """
13361   try:
13362     return _QUERY_IMPL[name]
13363   except KeyError:
13364     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13365                                errors.ECODE_INVAL)