code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     # readability alias
 123     self.owned_locks = context.glm.list_owned
 124     self.context = context
 125     self.rpc = rpc
 126     # Dicts used to declare locking needs to mcpu
 127     self.needed_locks = None
 128     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 129     self.add_locks = {}
 130     self.remove_locks = {}
 131     # Used to force good behavior when calling helper functions
 132     self.recalculate_locks = {}
 133     # logging
 134     self.Log = processor.Log # pylint: disable=C0103
 135     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 136     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 137     self.LogStep = processor.LogStep # pylint: disable=C0103
 138     # support for dry-run
 139     self.dry_run_result = None
 140     # support for generic debug attribute
 141     if (not hasattr(self.op, "debug_level") or
 142         not isinstance(self.op.debug_level, int)):
 143       self.op.debug_level = 0
 144
 145     # Tasklets
 146     self.tasklets = None
 147
 148     # Validate opcode parameters and set defaults
 149     self.op.Validate(True)
 150
 151     self.CheckArguments()
 152
 153   def CheckArguments(self):
 154     """Check syntactic validity for the opcode arguments.
 155
 156     This method is for doing a simple syntactic check and ensure
 157     validity of opcode parameters, without any cluster-related
 158     checks. While the same can be accomplished in ExpandNames and/or
 159     CheckPrereq, doing these separate is better because:
 160
 161       - ExpandNames is left as as purely a lock-related function
 162       - CheckPrereq is run after we have acquired locks (and possible
 163         waited for them)
 164
 165     The function is allowed to change the self.op attribute so that
 166     later methods can no longer worry about missing parameters.
 167
 168     """
 169     pass
 170
 171   def ExpandNames(self):
 172     """Expand names for this LU.
 173
 174     This method is called before starting to execute the opcode, and it should
 175     update all the parameters of the opcode to their canonical form (e.g. a
 176     short node name must be fully expanded after this method has successfully
 177     completed). This way locking, hooks, logging, etc. can work correctly.
 178
 179     LUs which implement this method must also populate the self.needed_locks
 180     member, as a dict with lock levels as keys, and a list of needed lock names
 181     as values. Rules:
 182
 183       - use an empty dict if you don't need any lock
 184       - if you don't need any lock at a particular level omit that level
 185       - don't put anything for the BGL level
 186       - if you want all locks at a level use locking.ALL_SET as a value
 187
 188     If you need to share locks (rather than acquire them exclusively) at one
 189     level you can modify self.share_locks, setting a true value (usually 1) for
 190     that level. By default locks are not shared.
 191
 192     This function can also define a list of tasklets, which then will be
 193     executed in order instead of the usual LU-level CheckPrereq and Exec
 194     functions, if those are not defined by the LU.
 195
 196     Examples::
 197
 198       # Acquire all nodes and one instance
 199       self.needed_locks = {
 200         locking.LEVEL_NODE: locking.ALL_SET,
 201         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 202       }
 203       # Acquire just two nodes
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 206       }
 207       # Acquire no locks
 208       self.needed_locks = {} # No, you can't leave it to the default value None
 209
 210     """
 211     # The implementation of this method is mandatory only if the new LU is
 212     # concurrent, so that old LUs don't need to be changed all at the same
 213     # time.
 214     if self.REQ_BGL:
 215       self.needed_locks = {} # Exclusive LUs don't need locks.
 216     else:
 217       raise NotImplementedError
 218
 219   def DeclareLocks(self, level):
 220     """Declare LU locking needs for a level
 221
 222     While most LUs can just declare their locking needs at ExpandNames time,
 223     sometimes there's the need to calculate some locks after having acquired
 224     the ones before. This function is called just before acquiring locks at a
 225     particular level, but after acquiring the ones at lower levels, and permits
 226     such calculations. It can be used to modify self.needed_locks, and by
 227     default it does nothing.
 228
 229     This function is only called if you have something already set in
 230     self.needed_locks for the level.
 231
 232     @param level: Locking level which is going to be locked
 233     @type level: member of ganeti.locking.LEVELS
 234
 235     """
 236
 237   def CheckPrereq(self):
 238     """Check prerequisites for this LU.
 239
 240     This method should check that the prerequisites for the execution
 241     of this LU are fulfilled. It can do internode communication, but
 242     it should be idempotent - no cluster or system changes are
 243     allowed.
 244
 245     The method should raise errors.OpPrereqError in case something is
 246     not fulfilled. Its return value is ignored.
 247
 248     This method should also update all the parameters of the opcode to
 249     their canonical form if it hasn't been done by ExpandNames before.
 250
 251     """
 252     if self.tasklets is not None:
 253       for (idx, tl) in enumerate(self.tasklets):
 254         logging.debug("Checking prerequisites for tasklet %s/%s",
 255                       idx + 1, len(self.tasklets))
 256         tl.CheckPrereq()
 257     else:
 258       pass
 259
 260   def Exec(self, feedback_fn):
 261     """Execute the LU.
 262
 263     This method should implement the actual work. It should raise
 264     errors.OpExecError for failures that are somewhat dealt with in
 265     code, or expected.
 266
 267     """
 268     if self.tasklets is not None:
 269       for (idx, tl) in enumerate(self.tasklets):
 270         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 271         tl.Exec(feedback_fn)
 272     else:
 273       raise NotImplementedError
 274
 275   def BuildHooksEnv(self):
 276     """Build hooks environment for this LU.
 277
 278     @rtype: dict
 279     @return: Dictionary containing the environment that will be used for
 280       running the hooks for this LU. The keys of the dict must not be prefixed
 281       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 282       will extend the environment with additional variables. If no environment
 283       should be defined, an empty dictionary should be returned (not C{None}).
 284     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 285       will not be called.
 286
 287     """
 288     raise NotImplementedError
 289
 290   def BuildHooksNodes(self):
 291     """Build list of nodes to run LU's hooks.
 292
 293     @rtype: tuple; (list, list)
 294     @return: Tuple containing a list of node names on which the hook
 295       should run before the execution and a list of node names on which the
 296       hook should run after the execution. No nodes should be returned as an
 297       empty list (and not None).
 298     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 299       will not be called.
 300
 301     """
 302     raise NotImplementedError
 303
 304   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 305     """Notify the LU about the results of its hooks.
 306
 307     This method is called every time a hooks phase is executed, and notifies
 308     the Logical Unit about the hooks' result. The LU can then use it to alter
 309     its result based on the hooks.  By default the method does nothing and the
 310     previous result is passed back unchanged but any LU can define it if it
 311     wants to use the local cluster hook-scripts somehow.
 312
 313     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 314         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 315     @param hook_results: the results of the multi-node hooks rpc call
 316     @param feedback_fn: function used send feedback back to the caller
 317     @param lu_result: the previous Exec result this LU had, or None
 318         in the PRE phase
 319     @return: the new Exec result, based on the previous result
 320         and hook results
 321
 322     """
 323     # API must be kept, thus we ignore the unused argument and could
 324     # be a function warnings
 325     # pylint: disable=W0613,R0201
 326     return lu_result
 327
 328   def _ExpandAndLockInstance(self):
 329     """Helper function to expand and lock an instance.
 330
 331     Many LUs that work on an instance take its name in self.op.instance_name
 332     and need to expand it and then declare the expanded name for locking. This
 333     function does it, and then updates self.op.instance_name to the expanded
 334     name. It also initializes needed_locks as a dict, if this hasn't been done
 335     before.
 336
 337     """
 338     if self.needed_locks is None:
 339       self.needed_locks = {}
 340     else:
 341       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 342         "_ExpandAndLockInstance called with instance-level locks set"
 343     self.op.instance_name = _ExpandInstanceName(self.cfg,
 344                                                 self.op.instance_name)
 345     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 346
 347   def _LockInstancesNodes(self, primary_only=False):
 348     """Helper function to declare instances' nodes for locking.
 349
 350     This function should be called after locking one or more instances to lock
 351     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 352     with all primary or secondary nodes for instances already locked and
 353     present in self.needed_locks[locking.LEVEL_INSTANCE].
 354
 355     It should be called from DeclareLocks, and for safety only works if
 356     self.recalculate_locks[locking.LEVEL_NODE] is set.
 357
 358     In the future it may grow parameters to just lock some instance's nodes, or
 359     to just lock primaries or secondary nodes, if needed.
 360
 361     If should be called in DeclareLocks in a way similar to::
 362
 363       if level == locking.LEVEL_NODE:
 364         self._LockInstancesNodes()
 365
 366     @type primary_only: boolean
 367     @param primary_only: only lock primary nodes of locked instances
 368
 369     """
 370     assert locking.LEVEL_NODE in self.recalculate_locks, \
 371       "_LockInstancesNodes helper function called with no nodes to recalculate"
 372
 373     # TODO: check if we're really been called with the instance locks held
 374
 375     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 376     # future we might want to have different behaviors depending on the value
 377     # of self.recalculate_locks[locking.LEVEL_NODE]
 378     wanted_nodes = []
 379     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 380     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 381       wanted_nodes.append(instance.primary_node)
 382       if not primary_only:
 383         wanted_nodes.extend(instance.secondary_nodes)
 384
 385     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 386       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 387     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 388       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 389
 390     del self.recalculate_locks[locking.LEVEL_NODE]
 391
 392
 393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 394   """Simple LU which runs no hooks.
 395
 396   This LU is intended as a parent for other LogicalUnits which will
 397   run no hooks, in order to reduce duplicate code.
 398
 399   """
 400   HPATH = None
 401   HTYPE = None
 402
 403   def BuildHooksEnv(self):
 404     """Empty BuildHooksEnv for NoHooksLu.
 405
 406     This just raises an error.
 407
 408     """
 409     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 410
 411   def BuildHooksNodes(self):
 412     """Empty BuildHooksNodes for NoHooksLU.
 413
 414     """
 415     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 416
 417
 418 class Tasklet:
 419   """Tasklet base class.
 420
 421   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 422   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 423   tasklets know nothing about locks.
 424
 425   Subclasses must follow these rules:
 426     - Implement CheckPrereq
 427     - Implement Exec
 428
 429   """
 430   def __init__(self, lu):
 431     self.lu = lu
 432
 433     # Shortcuts
 434     self.cfg = lu.cfg
 435     self.rpc = lu.rpc
 436
 437   def CheckPrereq(self):
 438     """Check prerequisites for this tasklets.
 439
 440     This method should check whether the prerequisites for the execution of
 441     this tasklet are fulfilled. It can do internode communication, but it
 442     should be idempotent - no cluster or system changes are allowed.
 443
 444     The method should raise errors.OpPrereqError in case something is not
 445     fulfilled. Its return value is ignored.
 446
 447     This method should also update all parameters to their canonical form if it
 448     hasn't been done before.
 449
 450     """
 451     pass
 452
 453   def Exec(self, feedback_fn):
 454     """Execute the tasklet.
 455
 456     This method should implement the actual work. It should raise
 457     errors.OpExecError for failures that are somewhat dealt with in code, or
 458     expected.
 459
 460     """
 461     raise NotImplementedError
 462
 463
 464 class _QueryBase:
 465   """Base for query utility classes.
 466
 467   """
 468   #: Attribute holding field definitions
 469   FIELDS = None
 470
 471   def __init__(self, filter_, fields, use_locking):
 472     """Initializes this class.
 473
 474     """
 475     self.use_locking = use_locking
 476
 477     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 478                              namefield="name")
 479     self.requested_data = self.query.RequestedData()
 480     self.names = self.query.RequestedNames()
 481
 482     # Sort only if no names were requested
 483     self.sort_by_name = not self.names
 484
 485     self.do_locking = None
 486     self.wanted = None
 487
 488   def _GetNames(self, lu, all_names, lock_level):
 489     """Helper function to determine names asked for in the query.
 490
 491     """
 492     if self.do_locking:
 493       names = lu.owned_locks(lock_level)
 494     else:
 495       names = all_names
 496
 497     if self.wanted == locking.ALL_SET:
 498       assert not self.names
 499       # caller didn't specify names, so ordering is not important
 500       return utils.NiceSort(names)
 501
 502     # caller specified names and we must keep the same order
 503     assert self.names
 504     assert not self.do_locking or lu.glm.is_owned(lock_level)
 505
 506     missing = set(self.wanted).difference(names)
 507     if missing:
 508       raise errors.OpExecError("Some items were removed before retrieving"
 509                                " their data: %s" % missing)
 510
 511     # Return expanded names
 512     return self.wanted
 513
 514   def ExpandNames(self, lu):
 515     """Expand names for this query.
 516
 517     See L{LogicalUnit.ExpandNames}.
 518
 519     """
 520     raise NotImplementedError()
 521
 522   def DeclareLocks(self, lu, level):
 523     """Declare locks for this query.
 524
 525     See L{LogicalUnit.DeclareLocks}.
 526
 527     """
 528     raise NotImplementedError()
 529
 530   def _GetQueryData(self, lu):
 531     """Collects all data for this query.
 532
 533     @return: Query data object
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def NewStyleQuery(self, lu):
 539     """Collect data and execute query.
 540
 541     """
 542     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 543                                   sort_by_name=self.sort_by_name)
 544
 545   def OldStyleQuery(self, lu):
 546     """Collect data and execute query.
 547
 548     """
 549     return self.query.OldStyleQuery(self._GetQueryData(lu),
 550                                     sort_by_name=self.sort_by_name)
 551
 552
 553 def _ShareAll():
 554   """Returns a dict declaring all lock levels shared.
 555
 556   """
 557   return dict.fromkeys(locking.LEVELS, 1)
 558
 559
 560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 561   """Checks if the owned node groups are still correct for an instance.
 562
 563   @type cfg: L{config.ConfigWriter}
 564   @param cfg: The cluster configuration
 565   @type instance_name: string
 566   @param instance_name: Instance name
 567   @type owned_groups: set or frozenset
 568   @param owned_groups: List of currently owned node groups
 569
 570   """
 571   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 572
 573   if not owned_groups.issuperset(inst_groups):
 574     raise errors.OpPrereqError("Instance %s's node groups changed since"
 575                                " locks were acquired, current groups are"
 576                                " are '%s', owning groups '%s'; retry the"
 577                                " operation" %
 578                                (instance_name,
 579                                 utils.CommaJoin(inst_groups),
 580                                 utils.CommaJoin(owned_groups)),
 581                                errors.ECODE_STATE)
 582
 583   return inst_groups
 584
 585
 586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 587   """Checks if the instances in a node group are still correct.
 588
 589   @type cfg: L{config.ConfigWriter}
 590   @param cfg: The cluster configuration
 591   @type group_uuid: string
 592   @param group_uuid: Node group UUID
 593   @type owned_instances: set or frozenset
 594   @param owned_instances: List of currently owned instances
 595
 596   """
 597   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 598   if owned_instances != wanted_instances:
 599     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 600                                " locks were acquired, wanted '%s', have '%s';"
 601                                " retry the operation" %
 602                                (group_uuid,
 603                                 utils.CommaJoin(wanted_instances),
 604                                 utils.CommaJoin(owned_instances)),
 605                                errors.ECODE_STATE)
 606
 607   return wanted_instances
 608
 609
 610 def _SupportsOob(cfg, node):
 611   """Tells if node supports OOB.
 612
 613   @type cfg: L{config.ConfigWriter}
 614   @param cfg: The cluster configuration
 615   @type node: L{objects.Node}
 616   @param node: The node
 617   @return: The OOB script if supported or an empty string otherwise
 618
 619   """
 620   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 621
 622
 623 def _GetWantedNodes(lu, nodes):
 624   """Returns list of checked and expanded node names.
 625
 626   @type lu: L{LogicalUnit}
 627   @param lu: the logical unit on whose behalf we execute
 628   @type nodes: list
 629   @param nodes: list of node names or None for all nodes
 630   @rtype: list
 631   @return: the list of nodes, sorted
 632   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 633
 634   """
 635   if nodes:
 636     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 637
 638   return utils.NiceSort(lu.cfg.GetNodeList())
 639
 640
 641 def _GetWantedInstances(lu, instances):
 642   """Returns list of checked and expanded instance names.
 643
 644   @type lu: L{LogicalUnit}
 645   @param lu: the logical unit on whose behalf we execute
 646   @type instances: list
 647   @param instances: list of instance names or None for all instances
 648   @rtype: list
 649   @return: the list of instances, sorted
 650   @raise errors.OpPrereqError: if the instances parameter is wrong type
 651   @raise errors.OpPrereqError: if any of the passed instances is not found
 652
 653   """
 654   if instances:
 655     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 656   else:
 657     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 658   return wanted
 659
 660
 661 def _GetUpdatedParams(old_params, update_dict,
 662                       use_default=True, use_none=False):
 663   """Return the new version of a parameter dictionary.
 664
 665   @type old_params: dict
 666   @param old_params: old parameters
 667   @type update_dict: dict
 668   @param update_dict: dict containing new parameter values, or
 669       constants.VALUE_DEFAULT to reset the parameter to its default
 670       value
 671   @param use_default: boolean
 672   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 673       values as 'to be deleted' values
 674   @param use_none: boolean
 675   @type use_none: whether to recognise C{None} values as 'to be
 676       deleted' values
 677   @rtype: dict
 678   @return: the new parameter dictionary
 679
 680   """
 681   params_copy = copy.deepcopy(old_params)
 682   for key, val in update_dict.iteritems():
 683     if ((use_default and val == constants.VALUE_DEFAULT) or
 684         (use_none and val is None)):
 685       try:
 686         del params_copy[key]
 687       except KeyError:
 688         pass
 689     else:
 690       params_copy[key] = val
 691   return params_copy
 692
 693
 694 def _ReleaseLocks(lu, level, names=None, keep=None):
 695   """Releases locks owned by an LU.
 696
 697   @type lu: L{LogicalUnit}
 698   @param level: Lock level
 699   @type names: list or None
 700   @param names: Names of locks to release
 701   @type keep: list or None
 702   @param keep: Names of locks to retain
 703
 704   """
 705   assert not (keep is not None and names is not None), \
 706          "Only one of the 'names' and the 'keep' parameters can be given"
 707
 708   if names is not None:
 709     should_release = names.__contains__
 710   elif keep:
 711     should_release = lambda name: name not in keep
 712   else:
 713     should_release = None
 714
 715   if should_release:
 716     retain = []
 717     release = []
 718
 719     # Determine which locks to release
 720     for name in lu.owned_locks(level):
 721       if should_release(name):
 722         release.append(name)
 723       else:
 724         retain.append(name)
 725
 726     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 727
 728     # Release just some locks
 729     lu.glm.release(level, names=release)
 730
 731     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 732   else:
 733     # Release everything
 734     lu.glm.release(level)
 735
 736     assert not lu.glm.is_owned(level), "No locks should be owned"
 737
 738
 739 def _MapInstanceDisksToNodes(instances):
 740   """Creates a map from (node, volume) to instance name.
 741
 742   @type instances: list of L{objects.Instance}
 743   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 744
 745   """
 746   return dict(((node, vol), inst.name)
 747               for inst in instances
 748               for (node, vols) in inst.MapLVsByNode().items()
 749               for vol in vols)
 750
 751
 752 def _RunPostHook(lu, node_name):
 753   """Runs the post-hook for an opcode on a single node.
 754
 755   """
 756   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 757   try:
 758     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 759   except:
 760     # pylint: disable=W0702
 761     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 762
 763
 764 def _CheckOutputFields(static, dynamic, selected):
 765   """Checks whether all selected fields are valid.
 766
 767   @type static: L{utils.FieldSet}
 768   @param static: static fields set
 769   @type dynamic: L{utils.FieldSet}
 770   @param dynamic: dynamic fields set
 771
 772   """
 773   f = utils.FieldSet()
 774   f.Extend(static)
 775   f.Extend(dynamic)
 776
 777   delta = f.NonMatching(selected)
 778   if delta:
 779     raise errors.OpPrereqError("Unknown output fields selected: %s"
 780                                % ",".join(delta), errors.ECODE_INVAL)
 781
 782
 783 def _CheckGlobalHvParams(params):
 784   """Validates that given hypervisor params are not global ones.
 785
 786   This will ensure that instances don't get customised versions of
 787   global params.
 788
 789   """
 790   used_globals = constants.HVC_GLOBALS.intersection(params)
 791   if used_globals:
 792     msg = ("The following hypervisor parameters are global and cannot"
 793            " be customized at instance level, please modify them at"
 794            " cluster level: %s" % utils.CommaJoin(used_globals))
 795     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 796
 797
 798 def _CheckNodeOnline(lu, node, msg=None):
 799   """Ensure that a given node is online.
 800
 801   @param lu: the LU on behalf of which we make the check
 802   @param node: the node to check
 803   @param msg: if passed, should be a message to replace the default one
 804   @raise errors.OpPrereqError: if the node is offline
 805
 806   """
 807   if msg is None:
 808     msg = "Can't use offline node"
 809   if lu.cfg.GetNodeInfo(node).offline:
 810     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 811
 812
 813 def _CheckNodeNotDrained(lu, node):
 814   """Ensure that a given node is not drained.
 815
 816   @param lu: the LU on behalf of which we make the check
 817   @param node: the node to check
 818   @raise errors.OpPrereqError: if the node is drained
 819
 820   """
 821   if lu.cfg.GetNodeInfo(node).drained:
 822     raise errors.OpPrereqError("Can't use drained node %s" % node,
 823                                errors.ECODE_STATE)
 824
 825
 826 def _CheckNodeVmCapable(lu, node):
 827   """Ensure that a given node is vm capable.
 828
 829   @param lu: the LU on behalf of which we make the check
 830   @param node: the node to check
 831   @raise errors.OpPrereqError: if the node is not vm capable
 832
 833   """
 834   if not lu.cfg.GetNodeInfo(node).vm_capable:
 835     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 836                                errors.ECODE_STATE)
 837
 838
 839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 840   """Ensure that a node supports a given OS.
 841
 842   @param lu: the LU on behalf of which we make the check
 843   @param node: the node to check
 844   @param os_name: the OS to query about
 845   @param force_variant: whether to ignore variant errors
 846   @raise errors.OpPrereqError: if the node is not supporting the OS
 847
 848   """
 849   result = lu.rpc.call_os_get(node, os_name)
 850   result.Raise("OS '%s' not in supported OS list for node %s" %
 851                (os_name, node),
 852                prereq=True, ecode=errors.ECODE_INVAL)
 853   if not force_variant:
 854     _CheckOSVariant(result.payload, os_name)
 855
 856
 857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 858   """Ensure that a node has the given secondary ip.
 859
 860   @type lu: L{LogicalUnit}
 861   @param lu: the LU on behalf of which we make the check
 862   @type node: string
 863   @param node: the node to check
 864   @type secondary_ip: string
 865   @param secondary_ip: the ip to check
 866   @type prereq: boolean
 867   @param prereq: whether to throw a prerequisite or an execute error
 868   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 869   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 870
 871   """
 872   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 873   result.Raise("Failure checking secondary ip on node %s" % node,
 874                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 875   if not result.payload:
 876     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 877            " please fix and re-run this command" % secondary_ip)
 878     if prereq:
 879       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 880     else:
 881       raise errors.OpExecError(msg)
 882
 883
 884 def _GetClusterDomainSecret():
 885   """Reads the cluster domain secret.
 886
 887   """
 888   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 889                                strict=True)
 890
 891
 892 def _CheckInstanceDown(lu, instance, reason):
 893   """Ensure that an instance is not running."""
 894   if instance.admin_up:
 895     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 896                                (instance.name, reason), errors.ECODE_STATE)
 897
 898   pnode = instance.primary_node
 899   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 900   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 901               prereq=True, ecode=errors.ECODE_ENVIRON)
 902
 903   if instance.name in ins_l.payload:
 904     raise errors.OpPrereqError("Instance %s is running, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907
 908 def _ExpandItemName(fn, name, kind):
 909   """Expand an item name.
 910
 911   @param fn: the function to use for expansion
 912   @param name: requested item name
 913   @param kind: text description ('Node' or 'Instance')
 914   @return: the resolved (full) name
 915   @raise errors.OpPrereqError: if the item is not found
 916
 917   """
 918   full_name = fn(name)
 919   if full_name is None:
 920     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 921                                errors.ECODE_NOENT)
 922   return full_name
 923
 924
 925 def _ExpandNodeName(cfg, name):
 926   """Wrapper over L{_ExpandItemName} for nodes."""
 927   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 928
 929
 930 def _ExpandInstanceName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for instance."""
 932   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 933
 934
 935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 936                           memory, vcpus, nics, disk_template, disks,
 937                           bep, hvp, hypervisor_name, tags):
 938   """Builds instance related env variables for hooks
 939
 940   This builds the hook environment from individual variables.
 941
 942   @type name: string
 943   @param name: the name of the instance
 944   @type primary_node: string
 945   @param primary_node: the name of the instance's primary node
 946   @type secondary_nodes: list
 947   @param secondary_nodes: list of secondary nodes as strings
 948   @type os_type: string
 949   @param os_type: the name of the instance's OS
 950   @type status: boolean
 951   @param status: the should_run status of the instance
 952   @type memory: string
 953   @param memory: the memory size of the instance
 954   @type vcpus: string
 955   @param vcpus: the count of VCPUs the instance has
 956   @type nics: list
 957   @param nics: list of tuples (ip, mac, mode, link) representing
 958       the NICs the instance has
 959   @type disk_template: string
 960   @param disk_template: the disk template of the instance
 961   @type disks: list
 962   @param disks: the list of (size, mode) pairs
 963   @type bep: dict
 964   @param bep: the backend parameters for the instance
 965   @type hvp: dict
 966   @param hvp: the hypervisor parameters for the instance
 967   @type hypervisor_name: string
 968   @param hypervisor_name: the hypervisor for the instance
 969   @type tags: list
 970   @param tags: list of instance tags as strings
 971   @rtype: dict
 972   @return: the hook environment for this instance
 973
 974   """
 975   if status:
 976     str_status = "up"
 977   else:
 978     str_status = "down"
 979   env = {
 980     "OP_TARGET": name,
 981     "INSTANCE_NAME": name,
 982     "INSTANCE_PRIMARY": primary_node,
 983     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 984     "INSTANCE_OS_TYPE": os_type,
 985     "INSTANCE_STATUS": str_status,
 986     "INSTANCE_MEMORY": memory,
 987     "INSTANCE_VCPUS": vcpus,
 988     "INSTANCE_DISK_TEMPLATE": disk_template,
 989     "INSTANCE_HYPERVISOR": hypervisor_name,
 990   }
 991
 992   if nics:
 993     nic_count = len(nics)
 994     for idx, (ip, mac, mode, link) in enumerate(nics):
 995       if ip is None:
 996         ip = ""
 997       env["INSTANCE_NIC%d_IP" % idx] = ip
 998       env["INSTANCE_NIC%d_MAC" % idx] = mac
 999       env["INSTANCE_NIC%d_MODE" % idx] = mode
1000       env["INSTANCE_NIC%d_LINK" % idx] = link
1001       if mode == constants.NIC_MODE_BRIDGED:
1002         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003   else:
1004     nic_count = 0
1005
1006   env["INSTANCE_NIC_COUNT"] = nic_count
1007
1008   if disks:
1009     disk_count = len(disks)
1010     for idx, (size, mode) in enumerate(disks):
1011       env["INSTANCE_DISK%d_SIZE" % idx] = size
1012       env["INSTANCE_DISK%d_MODE" % idx] = mode
1013   else:
1014     disk_count = 0
1015
1016   env["INSTANCE_DISK_COUNT"] = disk_count
1017
1018   if not tags:
1019     tags = []
1020
1021   env["INSTANCE_TAGS"] = " ".join(tags)
1022
1023   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024     for key, value in source.items():
1025       env["INSTANCE_%s_%s" % (kind, key)] = value
1026
1027   return env
1028
1029
1030 def _NICListToTuple(lu, nics):
1031   """Build a list of nic information tuples.
1032
1033   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034   value in LUInstanceQueryData.
1035
1036   @type lu:  L{LogicalUnit}
1037   @param lu: the logical unit on whose behalf we execute
1038   @type nics: list of L{objects.NIC}
1039   @param nics: list of nics to convert to hooks tuples
1040
1041   """
1042   hooks_nics = []
1043   cluster = lu.cfg.GetClusterInfo()
1044   for nic in nics:
1045     ip = nic.ip
1046     mac = nic.mac
1047     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048     mode = filled_params[constants.NIC_MODE]
1049     link = filled_params[constants.NIC_LINK]
1050     hooks_nics.append((ip, mac, mode, link))
1051   return hooks_nics
1052
1053
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055   """Builds instance related env variables for hooks from an object.
1056
1057   @type lu: L{LogicalUnit}
1058   @param lu: the logical unit on whose behalf we execute
1059   @type instance: L{objects.Instance}
1060   @param instance: the instance for which we should build the
1061       environment
1062   @type override: dict
1063   @param override: dictionary with key/values that will override
1064       our values
1065   @rtype: dict
1066   @return: the hook environment dictionary
1067
1068   """
1069   cluster = lu.cfg.GetClusterInfo()
1070   bep = cluster.FillBE(instance)
1071   hvp = cluster.FillHV(instance)
1072   args = {
1073     "name": instance.name,
1074     "primary_node": instance.primary_node,
1075     "secondary_nodes": instance.secondary_nodes,
1076     "os_type": instance.os,
1077     "status": instance.admin_up,
1078     "memory": bep[constants.BE_MEMORY],
1079     "vcpus": bep[constants.BE_VCPUS],
1080     "nics": _NICListToTuple(lu, instance.nics),
1081     "disk_template": instance.disk_template,
1082     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083     "bep": bep,
1084     "hvp": hvp,
1085     "hypervisor_name": instance.hypervisor,
1086     "tags": instance.tags,
1087   }
1088   if override:
1089     args.update(override)
1090   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091
1092
1093 def _AdjustCandidatePool(lu, exceptions):
1094   """Adjust the candidate pool after node operations.
1095
1096   """
1097   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098   if mod_list:
1099     lu.LogInfo("Promoted nodes to master candidate role: %s",
1100                utils.CommaJoin(node.name for node in mod_list))
1101     for name in mod_list:
1102       lu.context.ReaddNode(name)
1103   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104   if mc_now > mc_max:
1105     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106                (mc_now, mc_max))
1107
1108
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110   """Decide whether I should promote myself as a master candidate.
1111
1112   """
1113   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115   # the new node will increase mc_max with one, so:
1116   mc_should = min(mc_should + 1, cp_size)
1117   return mc_now < mc_should
1118
1119
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121   """Check that the brigdes needed by a list of nics exist.
1122
1123   """
1124   cluster = lu.cfg.GetClusterInfo()
1125   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126   brlist = [params[constants.NIC_LINK] for params in paramslist
1127             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128   if brlist:
1129     result = lu.rpc.call_bridges_exist(target_node, brlist)
1130     result.Raise("Error checking bridges on destination node '%s'" %
1131                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132
1133
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135   """Check that the brigdes needed by an instance exist.
1136
1137   """
1138   if node is None:
1139     node = instance.primary_node
1140   _CheckNicsBridgesExist(lu, instance.nics, node)
1141
1142
1143 def _CheckOSVariant(os_obj, name):
1144   """Check whether an OS name conforms to the os variants specification.
1145
1146   @type os_obj: L{objects.OS}
1147   @param os_obj: OS object to check
1148   @type name: string
1149   @param name: OS name passed by the user, to check for validity
1150
1151   """
1152   variant = objects.OS.GetVariant(name)
1153   if not os_obj.supported_variants:
1154     if variant:
1155       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156                                  " passed)" % (os_obj.name, variant),
1157                                  errors.ECODE_INVAL)
1158     return
1159   if not variant:
1160     raise errors.OpPrereqError("OS name must include a variant",
1161                                errors.ECODE_INVAL)
1162
1163   if variant not in os_obj.supported_variants:
1164     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165
1166
1167 def _GetNodeInstancesInner(cfg, fn):
1168   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169
1170
1171 def _GetNodeInstances(cfg, node_name):
1172   """Returns a list of all primary and secondary instances on a node.
1173
1174   """
1175
1176   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177
1178
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180   """Returns primary instances on a node.
1181
1182   """
1183   return _GetNodeInstancesInner(cfg,
1184                                 lambda inst: node_name == inst.primary_node)
1185
1186
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188   """Returns secondary instances on a node.
1189
1190   """
1191   return _GetNodeInstancesInner(cfg,
1192                                 lambda inst: node_name in inst.secondary_nodes)
1193
1194
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196   """Returns the arguments for a storage type.
1197
1198   """
1199   # Special case for file storage
1200   if storage_type == constants.ST_FILE:
1201     # storage.FileStorage wants a list of storage directories
1202     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203
1204   return []
1205
1206
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208   faulty = []
1209
1210   for dev in instance.disks:
1211     cfg.SetDiskID(dev, node_name)
1212
1213   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214   result.Raise("Failed to get disk status from node %s" % node_name,
1215                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216
1217   for idx, bdev_status in enumerate(result.payload):
1218     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219       faulty.append(idx)
1220
1221   return faulty
1222
1223
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225   """Check the sanity of iallocator and node arguments and use the
1226   cluster-wide iallocator if appropriate.
1227
1228   Check that at most one of (iallocator, node) is specified. If none is
1229   specified, then the LU's opcode's iallocator slot is filled with the
1230   cluster-wide default iallocator.
1231
1232   @type iallocator_slot: string
1233   @param iallocator_slot: the name of the opcode iallocator slot
1234   @type node_slot: string
1235   @param node_slot: the name of the opcode target node slot
1236
1237   """
1238   node = getattr(lu.op, node_slot, None)
1239   iallocator = getattr(lu.op, iallocator_slot, None)
1240
1241   if node is not None and iallocator is not None:
1242     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243                                errors.ECODE_INVAL)
1244   elif node is None and iallocator is None:
1245     default_iallocator = lu.cfg.GetDefaultIAllocator()
1246     if default_iallocator:
1247       setattr(lu.op, iallocator_slot, default_iallocator)
1248     else:
1249       raise errors.OpPrereqError("No iallocator or node given and no"
1250                                  " cluster-wide default iallocator found;"
1251                                  " please specify either an iallocator or a"
1252                                  " node, or set a cluster-wide default"
1253                                  " iallocator")
1254
1255
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257   """Decides on which iallocator to use.
1258
1259   @type cfg: L{config.ConfigWriter}
1260   @param cfg: Cluster configuration object
1261   @type iallocator: string or None
1262   @param iallocator: Iallocator specified in opcode
1263   @rtype: string
1264   @return: Iallocator name
1265
1266   """
1267   if not iallocator:
1268     # Use default iallocator
1269     iallocator = cfg.GetDefaultIAllocator()
1270
1271   if not iallocator:
1272     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273                                " opcode nor as a cluster-wide default",
1274                                errors.ECODE_INVAL)
1275
1276   return iallocator
1277
1278
1279 class LUClusterPostInit(LogicalUnit):
1280   """Logical unit for running hooks after cluster initialization.
1281
1282   """
1283   HPATH = "cluster-init"
1284   HTYPE = constants.HTYPE_CLUSTER
1285
1286   def BuildHooksEnv(self):
1287     """Build hooks env.
1288
1289     """
1290     return {
1291       "OP_TARGET": self.cfg.GetClusterName(),
1292       }
1293
1294   def BuildHooksNodes(self):
1295     """Build hooks nodes.
1296
1297     """
1298     return ([], [self.cfg.GetMasterNode()])
1299
1300   def Exec(self, feedback_fn):
1301     """Nothing to do.
1302
1303     """
1304     return True
1305
1306
1307 class LUClusterDestroy(LogicalUnit):
1308   """Logical unit for destroying the cluster.
1309
1310   """
1311   HPATH = "cluster-destroy"
1312   HTYPE = constants.HTYPE_CLUSTER
1313
1314   def BuildHooksEnv(self):
1315     """Build hooks env.
1316
1317     """
1318     return {
1319       "OP_TARGET": self.cfg.GetClusterName(),
1320       }
1321
1322   def BuildHooksNodes(self):
1323     """Build hooks nodes.
1324
1325     """
1326     return ([], [])
1327
1328   def CheckPrereq(self):
1329     """Check prerequisites.
1330
1331     This checks whether the cluster is empty.
1332
1333     Any errors are signaled by raising errors.OpPrereqError.
1334
1335     """
1336     master = self.cfg.GetMasterNode()
1337
1338     nodelist = self.cfg.GetNodeList()
1339     if len(nodelist) != 1 or nodelist[0] != master:
1340       raise errors.OpPrereqError("There are still %d node(s) in"
1341                                  " this cluster." % (len(nodelist) - 1),
1342                                  errors.ECODE_INVAL)
1343     instancelist = self.cfg.GetInstanceList()
1344     if instancelist:
1345       raise errors.OpPrereqError("There are still %d instance(s) in"
1346                                  " this cluster." % len(instancelist),
1347                                  errors.ECODE_INVAL)
1348
1349   def Exec(self, feedback_fn):
1350     """Destroys the cluster.
1351
1352     """
1353     master = self.cfg.GetMasterNode()
1354
1355     # Run post hooks on master node before it's removed
1356     _RunPostHook(self, master)
1357
1358     result = self.rpc.call_node_deactivate_master_ip(master)
1359     result.Raise("Could not disable the master role")
1360
1361     return master
1362
1363
1364 def _VerifyCertificate(filename):
1365   """Verifies a certificate for L{LUClusterVerifyConfig}.
1366
1367   @type filename: string
1368   @param filename: Path to PEM file
1369
1370   """
1371   try:
1372     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373                                            utils.ReadFile(filename))
1374   except Exception, err: # pylint: disable=W0703
1375     return (LUClusterVerifyConfig.ETYPE_ERROR,
1376             "Failed to load X509 certificate %s: %s" % (filename, err))
1377
1378   (errcode, msg) = \
1379     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380                                 constants.SSL_CERT_EXPIRATION_ERROR)
1381
1382   if msg:
1383     fnamemsg = "While verifying %s: %s" % (filename, msg)
1384   else:
1385     fnamemsg = None
1386
1387   if errcode is None:
1388     return (None, fnamemsg)
1389   elif errcode == utils.CERT_WARNING:
1390     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391   elif errcode == utils.CERT_ERROR:
1392     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393
1394   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395
1396
1397 def _GetAllHypervisorParameters(cluster, instances):
1398   """Compute the set of all hypervisor parameters.
1399
1400   @type cluster: L{objects.Cluster}
1401   @param cluster: the cluster object
1402   @param instances: list of L{objects.Instance}
1403   @param instances: additional instances from which to obtain parameters
1404   @rtype: list of (origin, hypervisor, parameters)
1405   @return: a list with all parameters found, indicating the hypervisor they
1406        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407
1408   """
1409   hvp_data = []
1410
1411   for hv_name in cluster.enabled_hypervisors:
1412     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413
1414   for os_name, os_hvp in cluster.os_hvp.items():
1415     for hv_name, hv_params in os_hvp.items():
1416       if hv_params:
1417         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419
1420   # TODO: collapse identical parameter values in a single one
1421   for instance in instances:
1422     if instance.hvparams:
1423       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424                        cluster.FillHV(instance)))
1425
1426   return hvp_data
1427
1428
1429 class _VerifyErrors(object):
1430   """Mix-in for cluster/group verify LUs.
1431
1432   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433   self.op and self._feedback_fn to be available.)
1434
1435   """
1436   TCLUSTER = "cluster"
1437   TNODE = "node"
1438   TINSTANCE = "instance"
1439
1440   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452   ENODEDRBD = (TNODE, "ENODEDRBD")
1453   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456   ENODEHV = (TNODE, "ENODEHV")
1457   ENODELVM = (TNODE, "ENODELVM")
1458   ENODEN1 = (TNODE, "ENODEN1")
1459   ENODENET = (TNODE, "ENODENET")
1460   ENODEOS = (TNODE, "ENODEOS")
1461   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463   ENODERPC = (TNODE, "ENODERPC")
1464   ENODESSH = (TNODE, "ENODESSH")
1465   ENODEVERSION = (TNODE, "ENODEVERSION")
1466   ENODESETUP = (TNODE, "ENODESETUP")
1467   ENODETIME = (TNODE, "ENODETIME")
1468   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469
1470   ETYPE_FIELD = "code"
1471   ETYPE_ERROR = "ERROR"
1472   ETYPE_WARNING = "WARNING"
1473
1474   def _Error(self, ecode, item, msg, *args, **kwargs):
1475     """Format an error message.
1476
1477     Based on the opcode's error_codes parameter, either format a
1478     parseable error code, or a simpler error string.
1479
1480     This must be called only from Exec and functions called from Exec.
1481
1482     """
1483     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484     itype, etxt = ecode
1485     # first complete the msg
1486     if args:
1487       msg = msg % args
1488     # then format the whole message
1489     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491     else:
1492       if item:
1493         item = " " + item
1494       else:
1495         item = ""
1496       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497     # and finally report it via the feedback_fn
1498     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499
1500   def _ErrorIf(self, cond, *args, **kwargs):
1501     """Log an error message if the passed condition is True.
1502
1503     """
1504     cond = (bool(cond)
1505             or self.op.debug_simulate_errors) # pylint: disable=E1101
1506     if cond:
1507       self._Error(*args, **kwargs)
1508     # do not mark the operation as failed for WARN cases only
1509     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510       self.bad = self.bad or cond
1511
1512
1513 class LUClusterVerify(NoHooksLU):
1514   """Submits all jobs necessary to verify the cluster.
1515
1516   """
1517   REQ_BGL = False
1518
1519   def ExpandNames(self):
1520     self.needed_locks = {}
1521
1522   def Exec(self, feedback_fn):
1523     jobs = []
1524
1525     if self.op.group_name:
1526       groups = [self.op.group_name]
1527       depends_fn = lambda: None
1528     else:
1529       groups = self.cfg.GetNodeGroupList()
1530
1531       # Verify global configuration
1532       jobs.append([opcodes.OpClusterVerifyConfig()])
1533
1534       # Always depend on global verification
1535       depends_fn = lambda: [(-len(jobs), [])]
1536
1537     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538                                               depends=depends_fn())]
1539                 for group in groups)
1540
1541     # Fix up all parameters
1542     for op in itertools.chain(*jobs): # pylint: disable=W0142
1543       op.debug_simulate_errors = self.op.debug_simulate_errors
1544       op.verbose = self.op.verbose
1545       op.error_codes = self.op.error_codes
1546       try:
1547         op.skip_checks = self.op.skip_checks
1548       except AttributeError:
1549         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550
1551     return ResultWithJobs(jobs)
1552
1553
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555   """Verifies the cluster config.
1556
1557   """
1558   REQ_BGL = True
1559
1560   def _VerifyHVP(self, hvp_data):
1561     """Verifies locally the syntax of the hypervisor parameters.
1562
1563     """
1564     for item, hv_name, hv_params in hvp_data:
1565       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566              (item, hv_name))
1567       try:
1568         hv_class = hypervisor.GetHypervisor(hv_name)
1569         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570         hv_class.CheckParameterSyntax(hv_params)
1571       except errors.GenericError, err:
1572         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573
1574   def ExpandNames(self):
1575     # Information can be safely retrieved as the BGL is acquired in exclusive
1576     # mode
1577     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579     self.all_node_info = self.cfg.GetAllNodesInfo()
1580     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581     self.needed_locks = {}
1582
1583   def Exec(self, feedback_fn):
1584     """Verify integrity of cluster, performing various test on nodes.
1585
1586     """
1587     self.bad = False
1588     self._feedback_fn = feedback_fn
1589
1590     feedback_fn("* Verifying cluster config")
1591
1592     for msg in self.cfg.VerifyConfig():
1593       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594
1595     feedback_fn("* Verifying cluster certificate files")
1596
1597     for cert_filename in constants.ALL_CERT_FILES:
1598       (errcode, msg) = _VerifyCertificate(cert_filename)
1599       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600
1601     feedback_fn("* Verifying hypervisor parameters")
1602
1603     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604                                                 self.all_inst_info.values()))
1605
1606     feedback_fn("* Verifying all nodes belong to an existing group")
1607
1608     # We do this verification here because, should this bogus circumstance
1609     # occur, it would never be caught by VerifyGroup, which only acts on
1610     # nodes/instances reachable from existing node groups.
1611
1612     dangling_nodes = set(node.name for node in self.all_node_info.values()
1613                          if node.group not in self.all_group_info)
1614
1615     dangling_instances = {}
1616     no_node_instances = []
1617
1618     for inst in self.all_inst_info.values():
1619       if inst.primary_node in dangling_nodes:
1620         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621       elif inst.primary_node not in self.all_node_info:
1622         no_node_instances.append(inst.name)
1623
1624     pretty_dangling = [
1625         "%s (%s)" %
1626         (node.name,
1627          utils.CommaJoin(dangling_instances.get(node.name,
1628                                                 ["no instances"])))
1629         for node in dangling_nodes]
1630
1631     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632                   "the following nodes (and their instances) belong to a non"
1633                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1634
1635     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636                   "the following instances have a non-existing primary-node:"
1637                   " %s", utils.CommaJoin(no_node_instances))
1638
1639     return not self.bad
1640
1641
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643   """Verifies the status of a node group.
1644
1645   """
1646   HPATH = "cluster-verify"
1647   HTYPE = constants.HTYPE_CLUSTER
1648   REQ_BGL = False
1649
1650   _HOOKS_INDENT_RE = re.compile("^", re.M)
1651
1652   class NodeImage(object):
1653     """A class representing the logical and physical status of a node.
1654
1655     @type name: string
1656     @ivar name: the node name to which this object refers
1657     @ivar volumes: a structure as returned from
1658         L{ganeti.backend.GetVolumeList} (runtime)
1659     @ivar instances: a list of running instances (runtime)
1660     @ivar pinst: list of configured primary instances (config)
1661     @ivar sinst: list of configured secondary instances (config)
1662     @ivar sbp: dictionary of {primary-node: list of instances} for all
1663         instances for which this node is secondary (config)
1664     @ivar mfree: free memory, as reported by hypervisor (runtime)
1665     @ivar dfree: free disk, as reported by the node (runtime)
1666     @ivar offline: the offline status (config)
1667     @type rpc_fail: boolean
1668     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669         not whether the individual keys were correct) (runtime)
1670     @type lvm_fail: boolean
1671     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672     @type hyp_fail: boolean
1673     @ivar hyp_fail: whether the RPC call didn't return the instance list
1674     @type ghost: boolean
1675     @ivar ghost: whether this is a known node or not (config)
1676     @type os_fail: boolean
1677     @ivar os_fail: whether the RPC call didn't return valid OS data
1678     @type oslist: list
1679     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680     @type vm_capable: boolean
1681     @ivar vm_capable: whether the node can host instances
1682
1683     """
1684     def __init__(self, offline=False, name=None, vm_capable=True):
1685       self.name = name
1686       self.volumes = {}
1687       self.instances = []
1688       self.pinst = []
1689       self.sinst = []
1690       self.sbp = {}
1691       self.mfree = 0
1692       self.dfree = 0
1693       self.offline = offline
1694       self.vm_capable = vm_capable
1695       self.rpc_fail = False
1696       self.lvm_fail = False
1697       self.hyp_fail = False
1698       self.ghost = False
1699       self.os_fail = False
1700       self.oslist = {}
1701
1702   def ExpandNames(self):
1703     # This raises errors.OpPrereqError on its own:
1704     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705
1706     # Get instances in node group; this is unsafe and needs verification later
1707     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708
1709     self.needed_locks = {
1710       locking.LEVEL_INSTANCE: inst_names,
1711       locking.LEVEL_NODEGROUP: [self.group_uuid],
1712       locking.LEVEL_NODE: [],
1713       }
1714
1715     self.share_locks = _ShareAll()
1716
1717   def DeclareLocks(self, level):
1718     if level == locking.LEVEL_NODE:
1719       # Get members of node group; this is unsafe and needs verification later
1720       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721
1722       all_inst_info = self.cfg.GetAllInstancesInfo()
1723
1724       # In Exec(), we warn about mirrored instances that have primary and
1725       # secondary living in separate node groups. To fully verify that
1726       # volumes for these instances are healthy, we will need to do an
1727       # extra call to their secondaries. We ensure here those nodes will
1728       # be locked.
1729       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730         # Important: access only the instances whose lock is owned
1731         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732           nodes.update(all_inst_info[inst].secondary_nodes)
1733
1734       self.needed_locks[locking.LEVEL_NODE] = nodes
1735
1736   def CheckPrereq(self):
1737     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739
1740     group_nodes = set(self.group_info.members)
1741     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742
1743     unlocked_nodes = \
1744         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745
1746     unlocked_instances = \
1747         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748
1749     if unlocked_nodes:
1750       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751                                  utils.CommaJoin(unlocked_nodes))
1752
1753     if unlocked_instances:
1754       raise errors.OpPrereqError("Missing lock for instances: %s" %
1755                                  utils.CommaJoin(unlocked_instances))
1756
1757     self.all_node_info = self.cfg.GetAllNodesInfo()
1758     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759
1760     self.my_node_names = utils.NiceSort(group_nodes)
1761     self.my_inst_names = utils.NiceSort(group_instances)
1762
1763     self.my_node_info = dict((name, self.all_node_info[name])
1764                              for name in self.my_node_names)
1765
1766     self.my_inst_info = dict((name, self.all_inst_info[name])
1767                              for name in self.my_inst_names)
1768
1769     # We detect here the nodes that will need the extra RPC calls for verifying
1770     # split LV volumes; they should be locked.
1771     extra_lv_nodes = set()
1772
1773     for inst in self.my_inst_info.values():
1774       if inst.disk_template in constants.DTS_INT_MIRROR:
1775         group = self.my_node_info[inst.primary_node].group
1776         for nname in inst.secondary_nodes:
1777           if self.all_node_info[nname].group != group:
1778             extra_lv_nodes.add(nname)
1779
1780     unlocked_lv_nodes = \
1781         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782
1783     if unlocked_lv_nodes:
1784       raise errors.OpPrereqError("these nodes could be locked: %s" %
1785                                  utils.CommaJoin(unlocked_lv_nodes))
1786     self.extra_lv_nodes = list(extra_lv_nodes)
1787
1788   def _VerifyNode(self, ninfo, nresult):
1789     """Perform some basic validation on data returned from a node.
1790
1791       - check the result data structure is well formed and has all the
1792         mandatory fields
1793       - check ganeti version
1794
1795     @type ninfo: L{objects.Node}
1796     @param ninfo: the node to check
1797     @param nresult: the results from the node
1798     @rtype: boolean
1799     @return: whether overall this call was successful (and we can expect
1800          reasonable values in the respose)
1801
1802     """
1803     node = ninfo.name
1804     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805
1806     # main result, nresult should be a non-empty dict
1807     test = not nresult or not isinstance(nresult, dict)
1808     _ErrorIf(test, self.ENODERPC, node,
1809                   "unable to verify node: no data returned")
1810     if test:
1811       return False
1812
1813     # compares ganeti version
1814     local_version = constants.PROTOCOL_VERSION
1815     remote_version = nresult.get("version", None)
1816     test = not (remote_version and
1817                 isinstance(remote_version, (list, tuple)) and
1818                 len(remote_version) == 2)
1819     _ErrorIf(test, self.ENODERPC, node,
1820              "connection to node returned invalid data")
1821     if test:
1822       return False
1823
1824     test = local_version != remote_version[0]
1825     _ErrorIf(test, self.ENODEVERSION, node,
1826              "incompatible protocol versions: master %s,"
1827              " node %s", local_version, remote_version[0])
1828     if test:
1829       return False
1830
1831     # node seems compatible, we can actually try to look into its results
1832
1833     # full package version
1834     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835                   self.ENODEVERSION, node,
1836                   "software version mismatch: master %s, node %s",
1837                   constants.RELEASE_VERSION, remote_version[1],
1838                   code=self.ETYPE_WARNING)
1839
1840     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841     if ninfo.vm_capable and isinstance(hyp_result, dict):
1842       for hv_name, hv_result in hyp_result.iteritems():
1843         test = hv_result is not None
1844         _ErrorIf(test, self.ENODEHV, node,
1845                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846
1847     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848     if ninfo.vm_capable and isinstance(hvp_result, list):
1849       for item, hv_name, hv_result in hvp_result:
1850         _ErrorIf(True, self.ENODEHV, node,
1851                  "hypervisor %s parameter verify failure (source %s): %s",
1852                  hv_name, item, hv_result)
1853
1854     test = nresult.get(constants.NV_NODESETUP,
1855                        ["Missing NODESETUP results"])
1856     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857              "; ".join(test))
1858
1859     return True
1860
1861   def _VerifyNodeTime(self, ninfo, nresult,
1862                       nvinfo_starttime, nvinfo_endtime):
1863     """Check the node time.
1864
1865     @type ninfo: L{objects.Node}
1866     @param ninfo: the node to check
1867     @param nresult: the remote results for the node
1868     @param nvinfo_starttime: the start time of the RPC call
1869     @param nvinfo_endtime: the end time of the RPC call
1870
1871     """
1872     node = ninfo.name
1873     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874
1875     ntime = nresult.get(constants.NV_TIME, None)
1876     try:
1877       ntime_merged = utils.MergeTime(ntime)
1878     except (ValueError, TypeError):
1879       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880       return
1881
1882     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886     else:
1887       ntime_diff = None
1888
1889     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890              "Node time diverges by at least %s from master node time",
1891              ntime_diff)
1892
1893   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894     """Check the node LVM results.
1895
1896     @type ninfo: L{objects.Node}
1897     @param ninfo: the node to check
1898     @param nresult: the remote results for the node
1899     @param vg_name: the configured VG name
1900
1901     """
1902     if vg_name is None:
1903       return
1904
1905     node = ninfo.name
1906     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907
1908     # checks vg existence and size > 20G
1909     vglist = nresult.get(constants.NV_VGLIST, None)
1910     test = not vglist
1911     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912     if not test:
1913       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914                                             constants.MIN_VG_SIZE)
1915       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916
1917     # check pv names
1918     pvlist = nresult.get(constants.NV_PVLIST, None)
1919     test = pvlist is None
1920     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921     if not test:
1922       # check that ':' is not present in PV names, since it's a
1923       # special character for lvcreate (denotes the range of PEs to
1924       # use on the PV)
1925       for _, pvname, owner_vg in pvlist:
1926         test = ":" in pvname
1927         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928                  " '%s' of VG '%s'", pvname, owner_vg)
1929
1930   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931     """Check the node bridges.
1932
1933     @type ninfo: L{objects.Node}
1934     @param ninfo: the node to check
1935     @param nresult: the remote results for the node
1936     @param bridges: the expected list of bridges
1937
1938     """
1939     if not bridges:
1940       return
1941
1942     node = ninfo.name
1943     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944
1945     missing = nresult.get(constants.NV_BRIDGES, None)
1946     test = not isinstance(missing, list)
1947     _ErrorIf(test, self.ENODENET, node,
1948              "did not return valid bridge information")
1949     if not test:
1950       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951                utils.CommaJoin(sorted(missing)))
1952
1953   def _VerifyNodeNetwork(self, ninfo, nresult):
1954     """Check the node network connectivity results.
1955
1956     @type ninfo: L{objects.Node}
1957     @param ninfo: the node to check
1958     @param nresult: the remote results for the node
1959
1960     """
1961     node = ninfo.name
1962     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963
1964     test = constants.NV_NODELIST not in nresult
1965     _ErrorIf(test, self.ENODESSH, node,
1966              "node hasn't returned node ssh connectivity data")
1967     if not test:
1968       if nresult[constants.NV_NODELIST]:
1969         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970           _ErrorIf(True, self.ENODESSH, node,
1971                    "ssh communication with node '%s': %s", a_node, a_msg)
1972
1973     test = constants.NV_NODENETTEST not in nresult
1974     _ErrorIf(test, self.ENODENET, node,
1975              "node hasn't returned node tcp connectivity data")
1976     if not test:
1977       if nresult[constants.NV_NODENETTEST]:
1978         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979         for anode in nlist:
1980           _ErrorIf(True, self.ENODENET, node,
1981                    "tcp communication with node '%s': %s",
1982                    anode, nresult[constants.NV_NODENETTEST][anode])
1983
1984     test = constants.NV_MASTERIP not in nresult
1985     _ErrorIf(test, self.ENODENET, node,
1986              "node hasn't returned node master IP reachability data")
1987     if not test:
1988       if not nresult[constants.NV_MASTERIP]:
1989         if node == self.master_node:
1990           msg = "the master node cannot reach the master IP (not configured?)"
1991         else:
1992           msg = "cannot reach the master IP"
1993         _ErrorIf(True, self.ENODENET, node, msg)
1994
1995   def _VerifyInstance(self, instance, instanceconfig, node_image,
1996                       diskstatus):
1997     """Verify an instance.
1998
1999     This function checks to see if the required block devices are
2000     available on the instance's node.
2001
2002     """
2003     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004     node_current = instanceconfig.primary_node
2005
2006     node_vol_should = {}
2007     instanceconfig.MapLVsByNode(node_vol_should)
2008
2009     for node in node_vol_should:
2010       n_img = node_image[node]
2011       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012         # ignore missing volumes on offline or broken nodes
2013         continue
2014       for volume in node_vol_should[node]:
2015         test = volume not in n_img.volumes
2016         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017                  "volume %s missing on node %s", volume, node)
2018
2019     if instanceconfig.admin_up:
2020       pri_img = node_image[node_current]
2021       test = instance not in pri_img.instances and not pri_img.offline
2022       _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023                "instance not running on its primary node %s",
2024                node_current)
2025
2026     diskdata = [(nname, success, status, idx)
2027                 for (nname, disks) in diskstatus.items()
2028                 for idx, (success, status) in enumerate(disks)]
2029
2030     for nname, success, bdev_status, idx in diskdata:
2031       # the 'ghost node' construction in Exec() ensures that we have a
2032       # node here
2033       snode = node_image[nname]
2034       bad_snode = snode.ghost or snode.offline
2035       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036                self.EINSTANCEFAULTYDISK, instance,
2037                "couldn't retrieve status for disk/%s on %s: %s",
2038                idx, nname, bdev_status)
2039       _ErrorIf((instanceconfig.admin_up and success and
2040                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041                self.EINSTANCEFAULTYDISK, instance,
2042                "disk/%s on %s is faulty", idx, nname)
2043
2044   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045     """Verify if there are any unknown volumes in the cluster.
2046
2047     The .os, .swap and backup volumes are ignored. All other volumes are
2048     reported as unknown.
2049
2050     @type reserved: L{ganeti.utils.FieldSet}
2051     @param reserved: a FieldSet of reserved volume names
2052
2053     """
2054     for node, n_img in node_image.items():
2055       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056         # skip non-healthy nodes
2057         continue
2058       for volume in n_img.volumes:
2059         test = ((node not in node_vol_should or
2060                 volume not in node_vol_should[node]) and
2061                 not reserved.Matches(volume))
2062         self._ErrorIf(test, self.ENODEORPHANLV, node,
2063                       "volume %s is unknown", volume)
2064
2065   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066     """Verify N+1 Memory Resilience.
2067
2068     Check that if one single node dies we can still start all the
2069     instances it was primary for.
2070
2071     """
2072     cluster_info = self.cfg.GetClusterInfo()
2073     for node, n_img in node_image.items():
2074       # This code checks that every node which is now listed as
2075       # secondary has enough memory to host all instances it is
2076       # supposed to should a single other node in the cluster fail.
2077       # FIXME: not ready for failover to an arbitrary node
2078       # FIXME: does not support file-backed instances
2079       # WARNING: we currently take into account down instances as well
2080       # as up ones, considering that even if they're down someone
2081       # might want to start them even in the event of a node failure.
2082       if n_img.offline:
2083         # we're skipping offline nodes from the N+1 warning, since
2084         # most likely we don't have good memory infromation from them;
2085         # we already list instances living on such nodes, and that's
2086         # enough warning
2087         continue
2088       for prinode, instances in n_img.sbp.items():
2089         needed_mem = 0
2090         for instance in instances:
2091           bep = cluster_info.FillBE(instance_cfg[instance])
2092           if bep[constants.BE_AUTO_BALANCE]:
2093             needed_mem += bep[constants.BE_MEMORY]
2094         test = n_img.mfree < needed_mem
2095         self._ErrorIf(test, self.ENODEN1, node,
2096                       "not enough memory to accomodate instance failovers"
2097                       " should node %s fail (%dMiB needed, %dMiB available)",
2098                       prinode, needed_mem, n_img.mfree)
2099
2100   @classmethod
2101   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102                    (files_all, files_all_opt, files_mc, files_vm)):
2103     """Verifies file checksums collected from all nodes.
2104
2105     @param errorif: Callback for reporting errors
2106     @param nodeinfo: List of L{objects.Node} objects
2107     @param master_node: Name of master node
2108     @param all_nvinfo: RPC results
2109
2110     """
2111     node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2112
2113     assert master_node in node_names
2114     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2115             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2116            "Found file listed in more than one file list"
2117
2118     # Define functions determining which nodes to consider for a file
2119     file2nodefn = dict([(filename, fn)
2120       for (files, fn) in [(files_all, None),
2121                           (files_all_opt, None),
2122                           (files_mc, lambda node: (node.master_candidate or
2123                                                    node.name == master_node)),
2124                           (files_vm, lambda node: node.vm_capable)]
2125       for filename in files])
2126
2127     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2128
2129     for node in nodeinfo:
2130       if node.offline:
2131         continue
2132
2133       nresult = all_nvinfo[node.name]
2134
2135       if nresult.fail_msg or not nresult.payload:
2136         node_files = None
2137       else:
2138         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2139
2140       test = not (node_files and isinstance(node_files, dict))
2141       errorif(test, cls.ENODEFILECHECK, node.name,
2142               "Node did not return file checksum data")
2143       if test:
2144         continue
2145
2146       for (filename, checksum) in node_files.items():
2147         # Check if the file should be considered for a node
2148         fn = file2nodefn[filename]
2149         if fn is None or fn(node):
2150           fileinfo[filename].setdefault(checksum, set()).add(node.name)
2151
2152     for (filename, checksums) in fileinfo.items():
2153       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2154
2155       # Nodes having the file
2156       with_file = frozenset(node_name
2157                             for nodes in fileinfo[filename].values()
2158                             for node_name in nodes)
2159
2160       # Nodes missing file
2161       missing_file = node_names - with_file
2162
2163       if filename in files_all_opt:
2164         # All or no nodes
2165         errorif(missing_file and missing_file != node_names,
2166                 cls.ECLUSTERFILECHECK, None,
2167                 "File %s is optional, but it must exist on all or no"
2168                 " nodes (not found on %s)",
2169                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2170       else:
2171         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2172                 "File %s is missing from node(s) %s", filename,
2173                 utils.CommaJoin(utils.NiceSort(missing_file)))
2174
2175       # See if there are multiple versions of the file
2176       test = len(checksums) > 1
2177       if test:
2178         variants = ["variant %s on %s" %
2179                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2180                     for (idx, (checksum, nodes)) in
2181                       enumerate(sorted(checksums.items()))]
2182       else:
2183         variants = []
2184
2185       errorif(test, cls.ECLUSTERFILECHECK, None,
2186               "File %s found with %s different checksums (%s)",
2187               filename, len(checksums), "; ".join(variants))
2188
2189   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2190                       drbd_map):
2191     """Verifies and the node DRBD status.
2192
2193     @type ninfo: L{objects.Node}
2194     @param ninfo: the node to check
2195     @param nresult: the remote results for the node
2196     @param instanceinfo: the dict of instances
2197     @param drbd_helper: the configured DRBD usermode helper
2198     @param drbd_map: the DRBD map as returned by
2199         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2200
2201     """
2202     node = ninfo.name
2203     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2204
2205     if drbd_helper:
2206       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2207       test = (helper_result == None)
2208       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2209                "no drbd usermode helper returned")
2210       if helper_result:
2211         status, payload = helper_result
2212         test = not status
2213         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2214                  "drbd usermode helper check unsuccessful: %s", payload)
2215         test = status and (payload != drbd_helper)
2216         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2217                  "wrong drbd usermode helper: %s", payload)
2218
2219     # compute the DRBD minors
2220     node_drbd = {}
2221     for minor, instance in drbd_map[node].items():
2222       test = instance not in instanceinfo
2223       _ErrorIf(test, self.ECLUSTERCFG, None,
2224                "ghost instance '%s' in temporary DRBD map", instance)
2225         # ghost instance should not be running, but otherwise we
2226         # don't give double warnings (both ghost instance and
2227         # unallocated minor in use)
2228       if test:
2229         node_drbd[minor] = (instance, False)
2230       else:
2231         instance = instanceinfo[instance]
2232         node_drbd[minor] = (instance.name, instance.admin_up)
2233
2234     # and now check them
2235     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2236     test = not isinstance(used_minors, (tuple, list))
2237     _ErrorIf(test, self.ENODEDRBD, node,
2238              "cannot parse drbd status file: %s", str(used_minors))
2239     if test:
2240       # we cannot check drbd status
2241       return
2242
2243     for minor, (iname, must_exist) in node_drbd.items():
2244       test = minor not in used_minors and must_exist
2245       _ErrorIf(test, self.ENODEDRBD, node,
2246                "drbd minor %d of instance %s is not active", minor, iname)
2247     for minor in used_minors:
2248       test = minor not in node_drbd
2249       _ErrorIf(test, self.ENODEDRBD, node,
2250                "unallocated drbd minor %d is in use", minor)
2251
2252   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2253     """Builds the node OS structures.
2254
2255     @type ninfo: L{objects.Node}
2256     @param ninfo: the node to check
2257     @param nresult: the remote results for the node
2258     @param nimg: the node image object
2259
2260     """
2261     node = ninfo.name
2262     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2263
2264     remote_os = nresult.get(constants.NV_OSLIST, None)
2265     test = (not isinstance(remote_os, list) or
2266             not compat.all(isinstance(v, list) and len(v) == 7
2267                            for v in remote_os))
2268
2269     _ErrorIf(test, self.ENODEOS, node,
2270              "node hasn't returned valid OS data")
2271
2272     nimg.os_fail = test
2273
2274     if test:
2275       return
2276
2277     os_dict = {}
2278
2279     for (name, os_path, status, diagnose,
2280          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2281
2282       if name not in os_dict:
2283         os_dict[name] = []
2284
2285       # parameters is a list of lists instead of list of tuples due to
2286       # JSON lacking a real tuple type, fix it:
2287       parameters = [tuple(v) for v in parameters]
2288       os_dict[name].append((os_path, status, diagnose,
2289                             set(variants), set(parameters), set(api_ver)))
2290
2291     nimg.oslist = os_dict
2292
2293   def _VerifyNodeOS(self, ninfo, nimg, base):
2294     """Verifies the node OS list.
2295
2296     @type ninfo: L{objects.Node}
2297     @param ninfo: the node to check
2298     @param nimg: the node image object
2299     @param base: the 'template' node we match against (e.g. from the master)
2300
2301     """
2302     node = ninfo.name
2303     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2304
2305     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2306
2307     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2308     for os_name, os_data in nimg.oslist.items():
2309       assert os_data, "Empty OS status for OS %s?!" % os_name
2310       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2311       _ErrorIf(not f_status, self.ENODEOS, node,
2312                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2313       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2314                "OS '%s' has multiple entries (first one shadows the rest): %s",
2315                os_name, utils.CommaJoin([v[0] for v in os_data]))
2316       # comparisons with the 'base' image
2317       test = os_name not in base.oslist
2318       _ErrorIf(test, self.ENODEOS, node,
2319                "Extra OS %s not present on reference node (%s)",
2320                os_name, base.name)
2321       if test:
2322         continue
2323       assert base.oslist[os_name], "Base node has empty OS status?"
2324       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2325       if not b_status:
2326         # base OS is invalid, skipping
2327         continue
2328       for kind, a, b in [("API version", f_api, b_api),
2329                          ("variants list", f_var, b_var),
2330                          ("parameters", beautify_params(f_param),
2331                           beautify_params(b_param))]:
2332         _ErrorIf(a != b, self.ENODEOS, node,
2333                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2334                  kind, os_name, base.name,
2335                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2336
2337     # check any missing OSes
2338     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2339     _ErrorIf(missing, self.ENODEOS, node,
2340              "OSes present on reference node %s but missing on this node: %s",
2341              base.name, utils.CommaJoin(missing))
2342
2343   def _VerifyOob(self, ninfo, nresult):
2344     """Verifies out of band functionality of a node.
2345
2346     @type ninfo: L{objects.Node}
2347     @param ninfo: the node to check
2348     @param nresult: the remote results for the node
2349
2350     """
2351     node = ninfo.name
2352     # We just have to verify the paths on master and/or master candidates
2353     # as the oob helper is invoked on the master
2354     if ((ninfo.master_candidate or ninfo.master_capable) and
2355         constants.NV_OOB_PATHS in nresult):
2356       for path_result in nresult[constants.NV_OOB_PATHS]:
2357         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2358
2359   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2360     """Verifies and updates the node volume data.
2361
2362     This function will update a L{NodeImage}'s internal structures
2363     with data from the remote call.
2364
2365     @type ninfo: L{objects.Node}
2366     @param ninfo: the node to check
2367     @param nresult: the remote results for the node
2368     @param nimg: the node image object
2369     @param vg_name: the configured VG name
2370
2371     """
2372     node = ninfo.name
2373     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374
2375     nimg.lvm_fail = True
2376     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2377     if vg_name is None:
2378       pass
2379     elif isinstance(lvdata, basestring):
2380       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2381                utils.SafeEncode(lvdata))
2382     elif not isinstance(lvdata, dict):
2383       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2384     else:
2385       nimg.volumes = lvdata
2386       nimg.lvm_fail = False
2387
2388   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2389     """Verifies and updates the node instance list.
2390
2391     If the listing was successful, then updates this node's instance
2392     list. Otherwise, it marks the RPC call as failed for the instance
2393     list key.
2394
2395     @type ninfo: L{objects.Node}
2396     @param ninfo: the node to check
2397     @param nresult: the remote results for the node
2398     @param nimg: the node image object
2399
2400     """
2401     idata = nresult.get(constants.NV_INSTANCELIST, None)
2402     test = not isinstance(idata, list)
2403     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2404                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2405     if test:
2406       nimg.hyp_fail = True
2407     else:
2408       nimg.instances = idata
2409
2410   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2411     """Verifies and computes a node information map
2412
2413     @type ninfo: L{objects.Node}
2414     @param ninfo: the node to check
2415     @param nresult: the remote results for the node
2416     @param nimg: the node image object
2417     @param vg_name: the configured VG name
2418
2419     """
2420     node = ninfo.name
2421     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2422
2423     # try to read free memory (from the hypervisor)
2424     hv_info = nresult.get(constants.NV_HVINFO, None)
2425     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2426     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2427     if not test:
2428       try:
2429         nimg.mfree = int(hv_info["memory_free"])
2430       except (ValueError, TypeError):
2431         _ErrorIf(True, self.ENODERPC, node,
2432                  "node returned invalid nodeinfo, check hypervisor")
2433
2434     # FIXME: devise a free space model for file based instances as well
2435     if vg_name is not None:
2436       test = (constants.NV_VGLIST not in nresult or
2437               vg_name not in nresult[constants.NV_VGLIST])
2438       _ErrorIf(test, self.ENODELVM, node,
2439                "node didn't return data for the volume group '%s'"
2440                " - it is either missing or broken", vg_name)
2441       if not test:
2442         try:
2443           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2444         except (ValueError, TypeError):
2445           _ErrorIf(True, self.ENODERPC, node,
2446                    "node returned invalid LVM info, check LVM status")
2447
2448   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2449     """Gets per-disk status information for all instances.
2450
2451     @type nodelist: list of strings
2452     @param nodelist: Node names
2453     @type node_image: dict of (name, L{objects.Node})
2454     @param node_image: Node objects
2455     @type instanceinfo: dict of (name, L{objects.Instance})
2456     @param instanceinfo: Instance objects
2457     @rtype: {instance: {node: [(succes, payload)]}}
2458     @return: a dictionary of per-instance dictionaries with nodes as
2459         keys and disk information as values; the disk information is a
2460         list of tuples (success, payload)
2461
2462     """
2463     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2464
2465     node_disks = {}
2466     node_disks_devonly = {}
2467     diskless_instances = set()
2468     diskless = constants.DT_DISKLESS
2469
2470     for nname in nodelist:
2471       node_instances = list(itertools.chain(node_image[nname].pinst,
2472                                             node_image[nname].sinst))
2473       diskless_instances.update(inst for inst in node_instances
2474                                 if instanceinfo[inst].disk_template == diskless)
2475       disks = [(inst, disk)
2476                for inst in node_instances
2477                for disk in instanceinfo[inst].disks]
2478
2479       if not disks:
2480         # No need to collect data
2481         continue
2482
2483       node_disks[nname] = disks
2484
2485       # Creating copies as SetDiskID below will modify the objects and that can
2486       # lead to incorrect data returned from nodes
2487       devonly = [dev.Copy() for (_, dev) in disks]
2488
2489       for dev in devonly:
2490         self.cfg.SetDiskID(dev, nname)
2491
2492       node_disks_devonly[nname] = devonly
2493
2494     assert len(node_disks) == len(node_disks_devonly)
2495
2496     # Collect data from all nodes with disks
2497     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2498                                                           node_disks_devonly)
2499
2500     assert len(result) == len(node_disks)
2501
2502     instdisk = {}
2503
2504     for (nname, nres) in result.items():
2505       disks = node_disks[nname]
2506
2507       if nres.offline:
2508         # No data from this node
2509         data = len(disks) * [(False, "node offline")]
2510       else:
2511         msg = nres.fail_msg
2512         _ErrorIf(msg, self.ENODERPC, nname,
2513                  "while getting disk information: %s", msg)
2514         if msg:
2515           # No data from this node
2516           data = len(disks) * [(False, msg)]
2517         else:
2518           data = []
2519           for idx, i in enumerate(nres.payload):
2520             if isinstance(i, (tuple, list)) and len(i) == 2:
2521               data.append(i)
2522             else:
2523               logging.warning("Invalid result from node %s, entry %d: %s",
2524                               nname, idx, i)
2525               data.append((False, "Invalid result from the remote node"))
2526
2527       for ((inst, _), status) in zip(disks, data):
2528         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2529
2530     # Add empty entries for diskless instances.
2531     for inst in diskless_instances:
2532       assert inst not in instdisk
2533       instdisk[inst] = {}
2534
2535     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2536                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2537                       compat.all(isinstance(s, (tuple, list)) and
2538                                  len(s) == 2 for s in statuses)
2539                       for inst, nnames in instdisk.items()
2540                       for nname, statuses in nnames.items())
2541     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2542
2543     return instdisk
2544
2545   def BuildHooksEnv(self):
2546     """Build hooks env.
2547
2548     Cluster-Verify hooks just ran in the post phase and their failure makes
2549     the output be logged in the verify output and the verification to fail.
2550
2551     """
2552     env = {
2553       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2554       }
2555
2556     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2557                for node in self.my_node_info.values())
2558
2559     return env
2560
2561   def BuildHooksNodes(self):
2562     """Build hooks nodes.
2563
2564     """
2565     return ([], self.my_node_names)
2566
2567   def Exec(self, feedback_fn):
2568     """Verify integrity of the node group, performing various test on nodes.
2569
2570     """
2571     # This method has too many local variables. pylint: disable=R0914
2572     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2573
2574     if not self.my_node_names:
2575       # empty node group
2576       feedback_fn("* Empty node group, skipping verification")
2577       return True
2578
2579     self.bad = False
2580     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2581     verbose = self.op.verbose
2582     self._feedback_fn = feedback_fn
2583
2584     vg_name = self.cfg.GetVGName()
2585     drbd_helper = self.cfg.GetDRBDHelper()
2586     cluster = self.cfg.GetClusterInfo()
2587     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2588     hypervisors = cluster.enabled_hypervisors
2589     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2590
2591     i_non_redundant = [] # Non redundant instances
2592     i_non_a_balanced = [] # Non auto-balanced instances
2593     n_offline = 0 # Count of offline nodes
2594     n_drained = 0 # Count of nodes being drained
2595     node_vol_should = {}
2596
2597     # FIXME: verify OS list
2598
2599     # File verification
2600     filemap = _ComputeAncillaryFiles(cluster, False)
2601
2602     # do local checksums
2603     master_node = self.master_node = self.cfg.GetMasterNode()
2604     master_ip = self.cfg.GetMasterIP()
2605
2606     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2607
2608     # We will make nodes contact all nodes in their group, and one node from
2609     # every other group.
2610     # TODO: should it be a *random* node, different every time?
2611     online_nodes = [node.name for node in node_data_list if not node.offline]
2612     other_group_nodes = {}
2613
2614     for name in sorted(self.all_node_info):
2615       node = self.all_node_info[name]
2616       if (node.group not in other_group_nodes
2617           and node.group != self.group_uuid
2618           and not node.offline):
2619         other_group_nodes[node.group] = node.name
2620
2621     node_verify_param = {
2622       constants.NV_FILELIST:
2623         utils.UniqueSequence(filename
2624                              for files in filemap
2625                              for filename in files),
2626       constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2627       constants.NV_HYPERVISOR: hypervisors,
2628       constants.NV_HVPARAMS:
2629         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2630       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2631                                  for node in node_data_list
2632                                  if not node.offline],
2633       constants.NV_INSTANCELIST: hypervisors,
2634       constants.NV_VERSION: None,
2635       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2636       constants.NV_NODESETUP: None,
2637       constants.NV_TIME: None,
2638       constants.NV_MASTERIP: (master_node, master_ip),
2639       constants.NV_OSLIST: None,
2640       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2641       }
2642
2643     if vg_name is not None:
2644       node_verify_param[constants.NV_VGLIST] = None
2645       node_verify_param[constants.NV_LVLIST] = vg_name
2646       node_verify_param[constants.NV_PVLIST] = [vg_name]
2647       node_verify_param[constants.NV_DRBDLIST] = None
2648
2649     if drbd_helper:
2650       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2651
2652     # bridge checks
2653     # FIXME: this needs to be changed per node-group, not cluster-wide
2654     bridges = set()
2655     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2656     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2657       bridges.add(default_nicpp[constants.NIC_LINK])
2658     for instance in self.my_inst_info.values():
2659       for nic in instance.nics:
2660         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2661         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2662           bridges.add(full_nic[constants.NIC_LINK])
2663
2664     if bridges:
2665       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2666
2667     # Build our expected cluster state
2668     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2669                                                  name=node.name,
2670                                                  vm_capable=node.vm_capable))
2671                       for node in node_data_list)
2672
2673     # Gather OOB paths
2674     oob_paths = []
2675     for node in self.all_node_info.values():
2676       path = _SupportsOob(self.cfg, node)
2677       if path and path not in oob_paths:
2678         oob_paths.append(path)
2679
2680     if oob_paths:
2681       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2682
2683     for instance in self.my_inst_names:
2684       inst_config = self.my_inst_info[instance]
2685
2686       for nname in inst_config.all_nodes:
2687         if nname not in node_image:
2688           gnode = self.NodeImage(name=nname)
2689           gnode.ghost = (nname not in self.all_node_info)
2690           node_image[nname] = gnode
2691
2692       inst_config.MapLVsByNode(node_vol_should)
2693
2694       pnode = inst_config.primary_node
2695       node_image[pnode].pinst.append(instance)
2696
2697       for snode in inst_config.secondary_nodes:
2698         nimg = node_image[snode]
2699         nimg.sinst.append(instance)
2700         if pnode not in nimg.sbp:
2701           nimg.sbp[pnode] = []
2702         nimg.sbp[pnode].append(instance)
2703
2704     # At this point, we have the in-memory data structures complete,
2705     # except for the runtime information, which we'll gather next
2706
2707     # Due to the way our RPC system works, exact response times cannot be
2708     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2709     # time before and after executing the request, we can at least have a time
2710     # window.
2711     nvinfo_starttime = time.time()
2712     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2713                                            node_verify_param,
2714                                            self.cfg.GetClusterName())
2715     nvinfo_endtime = time.time()
2716
2717     if self.extra_lv_nodes and vg_name is not None:
2718       extra_lv_nvinfo = \
2719           self.rpc.call_node_verify(self.extra_lv_nodes,
2720                                     {constants.NV_LVLIST: vg_name},
2721                                     self.cfg.GetClusterName())
2722     else:
2723       extra_lv_nvinfo = {}
2724
2725     all_drbd_map = self.cfg.ComputeDRBDMap()
2726
2727     feedback_fn("* Gathering disk information (%s nodes)" %
2728                 len(self.my_node_names))
2729     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2730                                      self.my_inst_info)
2731
2732     feedback_fn("* Verifying configuration file consistency")
2733
2734     # If not all nodes are being checked, we need to make sure the master node
2735     # and a non-checked vm_capable node are in the list.
2736     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2737     if absent_nodes:
2738       vf_nvinfo = all_nvinfo.copy()
2739       vf_node_info = list(self.my_node_info.values())
2740       additional_nodes = []
2741       if master_node not in self.my_node_info:
2742         additional_nodes.append(master_node)
2743         vf_node_info.append(self.all_node_info[master_node])
2744       # Add the first vm_capable node we find which is not included
2745       for node in absent_nodes:
2746         nodeinfo = self.all_node_info[node]
2747         if nodeinfo.vm_capable and not nodeinfo.offline:
2748           additional_nodes.append(node)
2749           vf_node_info.append(self.all_node_info[node])
2750           break
2751       key = constants.NV_FILELIST
2752       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2753                                                  {key: node_verify_param[key]},
2754                                                  self.cfg.GetClusterName()))
2755     else:
2756       vf_nvinfo = all_nvinfo
2757       vf_node_info = self.my_node_info.values()
2758
2759     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2760
2761     feedback_fn("* Verifying node status")
2762
2763     refos_img = None
2764
2765     for node_i in node_data_list:
2766       node = node_i.name
2767       nimg = node_image[node]
2768
2769       if node_i.offline:
2770         if verbose:
2771           feedback_fn("* Skipping offline node %s" % (node,))
2772         n_offline += 1
2773         continue
2774
2775       if node == master_node:
2776         ntype = "master"
2777       elif node_i.master_candidate:
2778         ntype = "master candidate"
2779       elif node_i.drained:
2780         ntype = "drained"
2781         n_drained += 1
2782       else:
2783         ntype = "regular"
2784       if verbose:
2785         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2786
2787       msg = all_nvinfo[node].fail_msg
2788       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2789       if msg:
2790         nimg.rpc_fail = True
2791         continue
2792
2793       nresult = all_nvinfo[node].payload
2794
2795       nimg.call_ok = self._VerifyNode(node_i, nresult)
2796       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2797       self._VerifyNodeNetwork(node_i, nresult)
2798       self._VerifyOob(node_i, nresult)
2799
2800       if nimg.vm_capable:
2801         self._VerifyNodeLVM(node_i, nresult, vg_name)
2802         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2803                              all_drbd_map)
2804
2805         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2806         self._UpdateNodeInstances(node_i, nresult, nimg)
2807         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2808         self._UpdateNodeOS(node_i, nresult, nimg)
2809
2810         if not nimg.os_fail:
2811           if refos_img is None:
2812             refos_img = nimg
2813           self._VerifyNodeOS(node_i, nimg, refos_img)
2814         self._VerifyNodeBridges(node_i, nresult, bridges)
2815
2816         # Check whether all running instancies are primary for the node. (This
2817         # can no longer be done from _VerifyInstance below, since some of the
2818         # wrong instances could be from other node groups.)
2819         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2820
2821         for inst in non_primary_inst:
2822           test = inst in self.all_inst_info
2823           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2824                    "instance should not run on node %s", node_i.name)
2825           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2826                    "node is running unknown instance %s", inst)
2827
2828     for node, result in extra_lv_nvinfo.items():
2829       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2830                               node_image[node], vg_name)
2831
2832     feedback_fn("* Verifying instance status")
2833     for instance in self.my_inst_names:
2834       if verbose:
2835         feedback_fn("* Verifying instance %s" % instance)
2836       inst_config = self.my_inst_info[instance]
2837       self._VerifyInstance(instance, inst_config, node_image,
2838                            instdisk[instance])
2839       inst_nodes_offline = []
2840
2841       pnode = inst_config.primary_node
2842       pnode_img = node_image[pnode]
2843       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2844                self.ENODERPC, pnode, "instance %s, connection to"
2845                " primary node failed", instance)
2846
2847       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2848                self.EINSTANCEBADNODE, instance,
2849                "instance is marked as running and lives on offline node %s",
2850                inst_config.primary_node)
2851
2852       # If the instance is non-redundant we cannot survive losing its primary
2853       # node, so we are not N+1 compliant. On the other hand we have no disk
2854       # templates with more than one secondary so that situation is not well
2855       # supported either.
2856       # FIXME: does not support file-backed instances
2857       if not inst_config.secondary_nodes:
2858         i_non_redundant.append(instance)
2859
2860       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2861                instance, "instance has multiple secondary nodes: %s",
2862                utils.CommaJoin(inst_config.secondary_nodes),
2863                code=self.ETYPE_WARNING)
2864
2865       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2866         pnode = inst_config.primary_node
2867         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2868         instance_groups = {}
2869
2870         for node in instance_nodes:
2871           instance_groups.setdefault(self.all_node_info[node].group,
2872                                      []).append(node)
2873
2874         pretty_list = [
2875           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2876           # Sort so that we always list the primary node first.
2877           for group, nodes in sorted(instance_groups.items(),
2878                                      key=lambda (_, nodes): pnode in nodes,
2879                                      reverse=True)]
2880
2881         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2882                       instance, "instance has primary and secondary nodes in"
2883                       " different groups: %s", utils.CommaJoin(pretty_list),
2884                       code=self.ETYPE_WARNING)
2885
2886       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2887         i_non_a_balanced.append(instance)
2888
2889       for snode in inst_config.secondary_nodes:
2890         s_img = node_image[snode]
2891         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2892                  "instance %s, connection to secondary node failed", instance)
2893
2894         if s_img.offline:
2895           inst_nodes_offline.append(snode)
2896
2897       # warn that the instance lives on offline nodes
2898       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2899                "instance has offline secondary node(s) %s",
2900                utils.CommaJoin(inst_nodes_offline))
2901       # ... or ghost/non-vm_capable nodes
2902       for node in inst_config.all_nodes:
2903         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2904                  "instance lives on ghost node %s", node)
2905         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2906                  instance, "instance lives on non-vm_capable node %s", node)
2907
2908     feedback_fn("* Verifying orphan volumes")
2909     reserved = utils.FieldSet(*cluster.reserved_lvs)
2910
2911     # We will get spurious "unknown volume" warnings if any node of this group
2912     # is secondary for an instance whose primary is in another group. To avoid
2913     # them, we find these instances and add their volumes to node_vol_should.
2914     for inst in self.all_inst_info.values():
2915       for secondary in inst.secondary_nodes:
2916         if (secondary in self.my_node_info
2917             and inst.name not in self.my_inst_info):
2918           inst.MapLVsByNode(node_vol_should)
2919           break
2920
2921     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2922
2923     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2924       feedback_fn("* Verifying N+1 Memory redundancy")
2925       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2926
2927     feedback_fn("* Other Notes")
2928     if i_non_redundant:
2929       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2930                   % len(i_non_redundant))
2931
2932     if i_non_a_balanced:
2933       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2934                   % len(i_non_a_balanced))
2935
2936     if n_offline:
2937       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2938
2939     if n_drained:
2940       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2941
2942     return not self.bad
2943
2944   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2945     """Analyze the post-hooks' result
2946
2947     This method analyses the hook result, handles it, and sends some
2948     nicely-formatted feedback back to the user.
2949
2950     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2951         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2952     @param hooks_results: the results of the multi-node hooks rpc call
2953     @param feedback_fn: function used send feedback back to the caller
2954     @param lu_result: previous Exec result
2955     @return: the new Exec result, based on the previous result
2956         and hook results
2957
2958     """
2959     # We only really run POST phase hooks, only for non-empty groups,
2960     # and are only interested in their results
2961     if not self.my_node_names:
2962       # empty node group
2963       pass
2964     elif phase == constants.HOOKS_PHASE_POST:
2965       # Used to change hooks' output to proper indentation
2966       feedback_fn("* Hooks Results")
2967       assert hooks_results, "invalid result from hooks"
2968
2969       for node_name in hooks_results:
2970         res = hooks_results[node_name]
2971         msg = res.fail_msg
2972         test = msg and not res.offline
2973         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2974                       "Communication failure in hooks execution: %s", msg)
2975         if res.offline or msg:
2976           # No need to investigate payload if node is offline or gave an error.
2977           # override manually lu_result here as _ErrorIf only
2978           # overrides self.bad
2979           lu_result = 1
2980           continue
2981         for script, hkr, output in res.payload:
2982           test = hkr == constants.HKR_FAIL
2983           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2984                         "Script %s failed, output:", script)
2985           if test:
2986             output = self._HOOKS_INDENT_RE.sub("      ", output)
2987             feedback_fn("%s" % output)
2988             lu_result = 0
2989
2990     return lu_result
2991
2992
2993 class LUClusterVerifyDisks(NoHooksLU):
2994   """Verifies the cluster disks status.
2995
2996   """
2997   REQ_BGL = False
2998
2999   def ExpandNames(self):
3000     self.share_locks = _ShareAll()
3001     self.needed_locks = {
3002       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3003       }
3004
3005   def Exec(self, feedback_fn):
3006     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3007
3008     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3009     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3010                            for group in group_names])
3011
3012
3013 class LUGroupVerifyDisks(NoHooksLU):
3014   """Verifies the status of all disks in a node group.
3015
3016   """
3017   REQ_BGL = False
3018
3019   def ExpandNames(self):
3020     # Raises errors.OpPrereqError on its own if group can't be found
3021     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3022
3023     self.share_locks = _ShareAll()
3024     self.needed_locks = {
3025       locking.LEVEL_INSTANCE: [],
3026       locking.LEVEL_NODEGROUP: [],
3027       locking.LEVEL_NODE: [],
3028       }
3029
3030   def DeclareLocks(self, level):
3031     if level == locking.LEVEL_INSTANCE:
3032       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3033
3034       # Lock instances optimistically, needs verification once node and group
3035       # locks have been acquired
3036       self.needed_locks[locking.LEVEL_INSTANCE] = \
3037         self.cfg.GetNodeGroupInstances(self.group_uuid)
3038
3039     elif level == locking.LEVEL_NODEGROUP:
3040       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3041
3042       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3043         set([self.group_uuid] +
3044             # Lock all groups used by instances optimistically; this requires
3045             # going via the node before it's locked, requiring verification
3046             # later on
3047             [group_uuid
3048              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3049              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3050
3051     elif level == locking.LEVEL_NODE:
3052       # This will only lock the nodes in the group to be verified which contain
3053       # actual instances
3054       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3055       self._LockInstancesNodes()
3056
3057       # Lock all nodes in group to be verified
3058       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3059       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3060       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3061
3062   def CheckPrereq(self):
3063     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3064     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3065     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3066
3067     assert self.group_uuid in owned_groups
3068
3069     # Check if locked instances are still correct
3070     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3071
3072     # Get instance information
3073     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3074
3075     # Check if node groups for locked instances are still correct
3076     for (instance_name, inst) in self.instances.items():
3077       assert owned_nodes.issuperset(inst.all_nodes), \
3078         "Instance %s's nodes changed while we kept the lock" % instance_name
3079
3080       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3081                                              owned_groups)
3082
3083       assert self.group_uuid in inst_groups, \
3084         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3085
3086   def Exec(self, feedback_fn):
3087     """Verify integrity of cluster disks.
3088
3089     @rtype: tuple of three items
3090     @return: a tuple of (dict of node-to-node_error, list of instances
3091         which need activate-disks, dict of instance: (node, volume) for
3092         missing volumes
3093
3094     """
3095     res_nodes = {}
3096     res_instances = set()
3097     res_missing = {}
3098
3099     nv_dict = _MapInstanceDisksToNodes([inst
3100                                         for inst in self.instances.values()
3101                                         if inst.admin_up])
3102
3103     if nv_dict:
3104       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3105                              set(self.cfg.GetVmCapableNodeList()))
3106
3107       node_lvs = self.rpc.call_lv_list(nodes, [])
3108
3109       for (node, node_res) in node_lvs.items():
3110         if node_res.offline:
3111           continue
3112
3113         msg = node_res.fail_msg
3114         if msg:
3115           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3116           res_nodes[node] = msg
3117           continue
3118
3119         for lv_name, (_, _, lv_online) in node_res.payload.items():
3120           inst = nv_dict.pop((node, lv_name), None)
3121           if not (lv_online or inst is None):
3122             res_instances.add(inst)
3123
3124       # any leftover items in nv_dict are missing LVs, let's arrange the data
3125       # better
3126       for key, inst in nv_dict.iteritems():
3127         res_missing.setdefault(inst, []).append(key)
3128
3129     return (res_nodes, list(res_instances), res_missing)
3130
3131
3132 class LUClusterRepairDiskSizes(NoHooksLU):
3133   """Verifies the cluster disks sizes.
3134
3135   """
3136   REQ_BGL = False
3137
3138   def ExpandNames(self):
3139     if self.op.instances:
3140       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3141       self.needed_locks = {
3142         locking.LEVEL_NODE: [],
3143         locking.LEVEL_INSTANCE: self.wanted_names,
3144         }
3145       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3146     else:
3147       self.wanted_names = None
3148       self.needed_locks = {
3149         locking.LEVEL_NODE: locking.ALL_SET,
3150         locking.LEVEL_INSTANCE: locking.ALL_SET,
3151         }
3152     self.share_locks = _ShareAll()
3153
3154   def DeclareLocks(self, level):
3155     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3156       self._LockInstancesNodes(primary_only=True)
3157
3158   def CheckPrereq(self):
3159     """Check prerequisites.
3160
3161     This only checks the optional instance list against the existing names.
3162
3163     """
3164     if self.wanted_names is None:
3165       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3166
3167     self.wanted_instances = \
3168         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3169
3170   def _EnsureChildSizes(self, disk):
3171     """Ensure children of the disk have the needed disk size.
3172
3173     This is valid mainly for DRBD8 and fixes an issue where the
3174     children have smaller disk size.
3175
3176     @param disk: an L{ganeti.objects.Disk} object
3177
3178     """
3179     if disk.dev_type == constants.LD_DRBD8:
3180       assert disk.children, "Empty children for DRBD8?"
3181       fchild = disk.children[0]
3182       mismatch = fchild.size < disk.size
3183       if mismatch:
3184         self.LogInfo("Child disk has size %d, parent %d, fixing",
3185                      fchild.size, disk.size)
3186         fchild.size = disk.size
3187
3188       # and we recurse on this child only, not on the metadev
3189       return self._EnsureChildSizes(fchild) or mismatch
3190     else:
3191       return False
3192
3193   def Exec(self, feedback_fn):
3194     """Verify the size of cluster disks.
3195
3196     """
3197     # TODO: check child disks too
3198     # TODO: check differences in size between primary/secondary nodes
3199     per_node_disks = {}
3200     for instance in self.wanted_instances:
3201       pnode = instance.primary_node
3202       if pnode not in per_node_disks:
3203         per_node_disks[pnode] = []
3204       for idx, disk in enumerate(instance.disks):
3205         per_node_disks[pnode].append((instance, idx, disk))
3206
3207     changed = []
3208     for node, dskl in per_node_disks.items():
3209       newl = [v[2].Copy() for v in dskl]
3210       for dsk in newl:
3211         self.cfg.SetDiskID(dsk, node)
3212       result = self.rpc.call_blockdev_getsize(node, newl)
3213       if result.fail_msg:
3214         self.LogWarning("Failure in blockdev_getsize call to node"
3215                         " %s, ignoring", node)
3216         continue
3217       if len(result.payload) != len(dskl):
3218         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3219                         " result.payload=%s", node, len(dskl), result.payload)
3220         self.LogWarning("Invalid result from node %s, ignoring node results",
3221                         node)
3222         continue
3223       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3224         if size is None:
3225           self.LogWarning("Disk %d of instance %s did not return size"
3226                           " information, ignoring", idx, instance.name)
3227           continue
3228         if not isinstance(size, (int, long)):
3229           self.LogWarning("Disk %d of instance %s did not return valid"
3230                           " size information, ignoring", idx, instance.name)
3231           continue
3232         size = size >> 20
3233         if size != disk.size:
3234           self.LogInfo("Disk %d of instance %s has mismatched size,"
3235                        " correcting: recorded %d, actual %d", idx,
3236                        instance.name, disk.size, size)
3237           disk.size = size
3238           self.cfg.Update(instance, feedback_fn)
3239           changed.append((instance.name, idx, size))
3240         if self._EnsureChildSizes(disk):
3241           self.cfg.Update(instance, feedback_fn)
3242           changed.append((instance.name, idx, disk.size))
3243     return changed
3244
3245
3246 class LUClusterRename(LogicalUnit):
3247   """Rename the cluster.
3248
3249   """
3250   HPATH = "cluster-rename"
3251   HTYPE = constants.HTYPE_CLUSTER
3252
3253   def BuildHooksEnv(self):
3254     """Build hooks env.
3255
3256     """
3257     return {
3258       "OP_TARGET": self.cfg.GetClusterName(),
3259       "NEW_NAME": self.op.name,
3260       }
3261
3262   def BuildHooksNodes(self):
3263     """Build hooks nodes.
3264
3265     """
3266     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3267
3268   def CheckPrereq(self):
3269     """Verify that the passed name is a valid one.
3270
3271     """
3272     hostname = netutils.GetHostname(name=self.op.name,
3273                                     family=self.cfg.GetPrimaryIPFamily())
3274
3275     new_name = hostname.name
3276     self.ip = new_ip = hostname.ip
3277     old_name = self.cfg.GetClusterName()
3278     old_ip = self.cfg.GetMasterIP()
3279     if new_name == old_name and new_ip == old_ip:
3280       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3281                                  " cluster has changed",
3282                                  errors.ECODE_INVAL)
3283     if new_ip != old_ip:
3284       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3285         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3286                                    " reachable on the network" %
3287                                    new_ip, errors.ECODE_NOTUNIQUE)
3288
3289     self.op.name = new_name
3290
3291   def Exec(self, feedback_fn):
3292     """Rename the cluster.
3293
3294     """
3295     clustername = self.op.name
3296     ip = self.ip
3297
3298     # shutdown the master IP
3299     master = self.cfg.GetMasterNode()
3300     result = self.rpc.call_node_deactivate_master_ip(master)
3301     result.Raise("Could not disable the master role")
3302
3303     try:
3304       cluster = self.cfg.GetClusterInfo()
3305       cluster.cluster_name = clustername
3306       cluster.master_ip = ip
3307       self.cfg.Update(cluster, feedback_fn)
3308
3309       # update the known hosts file
3310       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3311       node_list = self.cfg.GetOnlineNodeList()
3312       try:
3313         node_list.remove(master)
3314       except ValueError:
3315         pass
3316       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3317     finally:
3318       result = self.rpc.call_node_activate_master_ip(master)
3319       msg = result.fail_msg
3320       if msg:
3321         self.LogWarning("Could not re-enable the master role on"
3322                         " the master, please restart manually: %s", msg)
3323
3324     return clustername
3325
3326
3327 class LUClusterSetParams(LogicalUnit):
3328   """Change the parameters of the cluster.
3329
3330   """
3331   HPATH = "cluster-modify"
3332   HTYPE = constants.HTYPE_CLUSTER
3333   REQ_BGL = False
3334
3335   def CheckArguments(self):
3336     """Check parameters
3337
3338     """
3339     if self.op.uid_pool:
3340       uidpool.CheckUidPool(self.op.uid_pool)
3341
3342     if self.op.add_uids:
3343       uidpool.CheckUidPool(self.op.add_uids)
3344
3345     if self.op.remove_uids:
3346       uidpool.CheckUidPool(self.op.remove_uids)
3347
3348   def ExpandNames(self):
3349     # FIXME: in the future maybe other cluster params won't require checking on
3350     # all nodes to be modified.
3351     self.needed_locks = {
3352       locking.LEVEL_NODE: locking.ALL_SET,
3353     }
3354     self.share_locks[locking.LEVEL_NODE] = 1
3355
3356   def BuildHooksEnv(self):
3357     """Build hooks env.
3358
3359     """
3360     return {
3361       "OP_TARGET": self.cfg.GetClusterName(),
3362       "NEW_VG_NAME": self.op.vg_name,
3363       }
3364
3365   def BuildHooksNodes(self):
3366     """Build hooks nodes.
3367
3368     """
3369     mn = self.cfg.GetMasterNode()
3370     return ([mn], [mn])
3371
3372   def CheckPrereq(self):
3373     """Check prerequisites.
3374
3375     This checks whether the given params don't conflict and
3376     if the given volume group is valid.
3377
3378     """
3379     if self.op.vg_name is not None and not self.op.vg_name:
3380       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3381         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3382                                    " instances exist", errors.ECODE_INVAL)
3383
3384     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3385       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3386         raise errors.OpPrereqError("Cannot disable drbd helper while"
3387                                    " drbd-based instances exist",
3388                                    errors.ECODE_INVAL)
3389
3390     node_list = self.owned_locks(locking.LEVEL_NODE)
3391
3392     # if vg_name not None, checks given volume group on all nodes
3393     if self.op.vg_name:
3394       vglist = self.rpc.call_vg_list(node_list)
3395       for node in node_list:
3396         msg = vglist[node].fail_msg
3397         if msg:
3398           # ignoring down node
3399           self.LogWarning("Error while gathering data on node %s"
3400                           " (ignoring node): %s", node, msg)
3401           continue
3402         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3403                                               self.op.vg_name,
3404                                               constants.MIN_VG_SIZE)
3405         if vgstatus:
3406           raise errors.OpPrereqError("Error on node '%s': %s" %
3407                                      (node, vgstatus), errors.ECODE_ENVIRON)
3408
3409     if self.op.drbd_helper:
3410       # checks given drbd helper on all nodes
3411       helpers = self.rpc.call_drbd_helper(node_list)
3412       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3413         if ninfo.offline:
3414           self.LogInfo("Not checking drbd helper on offline node %s", node)
3415           continue
3416         msg = helpers[node].fail_msg
3417         if msg:
3418           raise errors.OpPrereqError("Error checking drbd helper on node"
3419                                      " '%s': %s" % (node, msg),
3420                                      errors.ECODE_ENVIRON)
3421         node_helper = helpers[node].payload
3422         if node_helper != self.op.drbd_helper:
3423           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3424                                      (node, node_helper), errors.ECODE_ENVIRON)
3425
3426     self.cluster = cluster = self.cfg.GetClusterInfo()
3427     # validate params changes
3428     if self.op.beparams:
3429       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3430       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3431
3432     if self.op.ndparams:
3433       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3434       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3435
3436       # TODO: we need a more general way to handle resetting
3437       # cluster-level parameters to default values
3438       if self.new_ndparams["oob_program"] == "":
3439         self.new_ndparams["oob_program"] = \
3440             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3441
3442     if self.op.nicparams:
3443       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3444       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3445       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3446       nic_errors = []
3447
3448       # check all instances for consistency
3449       for instance in self.cfg.GetAllInstancesInfo().values():
3450         for nic_idx, nic in enumerate(instance.nics):
3451           params_copy = copy.deepcopy(nic.nicparams)
3452           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3453
3454           # check parameter syntax
3455           try:
3456             objects.NIC.CheckParameterSyntax(params_filled)
3457           except errors.ConfigurationError, err:
3458             nic_errors.append("Instance %s, nic/%d: %s" %
3459                               (instance.name, nic_idx, err))
3460
3461           # if we're moving instances to routed, check that they have an ip
3462           target_mode = params_filled[constants.NIC_MODE]
3463           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3464             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3465                               " address" % (instance.name, nic_idx))
3466       if nic_errors:
3467         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3468                                    "\n".join(nic_errors))
3469
3470     # hypervisor list/parameters
3471     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3472     if self.op.hvparams:
3473       for hv_name, hv_dict in self.op.hvparams.items():
3474         if hv_name not in self.new_hvparams:
3475           self.new_hvparams[hv_name] = hv_dict
3476         else:
3477           self.new_hvparams[hv_name].update(hv_dict)
3478
3479     # os hypervisor parameters
3480     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3481     if self.op.os_hvp:
3482       for os_name, hvs in self.op.os_hvp.items():
3483         if os_name not in self.new_os_hvp:
3484           self.new_os_hvp[os_name] = hvs
3485         else:
3486           for hv_name, hv_dict in hvs.items():
3487             if hv_name not in self.new_os_hvp[os_name]:
3488               self.new_os_hvp[os_name][hv_name] = hv_dict
3489             else:
3490               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3491
3492     # os parameters
3493     self.new_osp = objects.FillDict(cluster.osparams, {})
3494     if self.op.osparams:
3495       for os_name, osp in self.op.osparams.items():
3496         if os_name not in self.new_osp:
3497           self.new_osp[os_name] = {}
3498
3499         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3500                                                   use_none=True)
3501
3502         if not self.new_osp[os_name]:
3503           # we removed all parameters
3504           del self.new_osp[os_name]
3505         else:
3506           # check the parameter validity (remote check)
3507           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3508                          os_name, self.new_osp[os_name])
3509
3510     # changes to the hypervisor list
3511     if self.op.enabled_hypervisors is not None:
3512       self.hv_list = self.op.enabled_hypervisors
3513       for hv in self.hv_list:
3514         # if the hypervisor doesn't already exist in the cluster
3515         # hvparams, we initialize it to empty, and then (in both
3516         # cases) we make sure to fill the defaults, as we might not
3517         # have a complete defaults list if the hypervisor wasn't
3518         # enabled before
3519         if hv not in new_hvp:
3520           new_hvp[hv] = {}
3521         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3522         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3523     else:
3524       self.hv_list = cluster.enabled_hypervisors
3525
3526     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3527       # either the enabled list has changed, or the parameters have, validate
3528       for hv_name, hv_params in self.new_hvparams.items():
3529         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3530             (self.op.enabled_hypervisors and
3531              hv_name in self.op.enabled_hypervisors)):
3532           # either this is a new hypervisor, or its parameters have changed
3533           hv_class = hypervisor.GetHypervisor(hv_name)
3534           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3535           hv_class.CheckParameterSyntax(hv_params)
3536           _CheckHVParams(self, node_list, hv_name, hv_params)
3537
3538     if self.op.os_hvp:
3539       # no need to check any newly-enabled hypervisors, since the
3540       # defaults have already been checked in the above code-block
3541       for os_name, os_hvp in self.new_os_hvp.items():
3542         for hv_name, hv_params in os_hvp.items():
3543           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3544           # we need to fill in the new os_hvp on top of the actual hv_p
3545           cluster_defaults = self.new_hvparams.get(hv_name, {})
3546           new_osp = objects.FillDict(cluster_defaults, hv_params)
3547           hv_class = hypervisor.GetHypervisor(hv_name)
3548           hv_class.CheckParameterSyntax(new_osp)
3549           _CheckHVParams(self, node_list, hv_name, new_osp)
3550
3551     if self.op.default_iallocator:
3552       alloc_script = utils.FindFile(self.op.default_iallocator,
3553                                     constants.IALLOCATOR_SEARCH_PATH,
3554                                     os.path.isfile)
3555       if alloc_script is None:
3556         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3557                                    " specified" % self.op.default_iallocator,
3558                                    errors.ECODE_INVAL)
3559
3560   def Exec(self, feedback_fn):
3561     """Change the parameters of the cluster.
3562
3563     """
3564     if self.op.vg_name is not None:
3565       new_volume = self.op.vg_name
3566       if not new_volume:
3567         new_volume = None
3568       if new_volume != self.cfg.GetVGName():
3569         self.cfg.SetVGName(new_volume)
3570       else:
3571         feedback_fn("Cluster LVM configuration already in desired"
3572                     " state, not changing")
3573     if self.op.drbd_helper is not None:
3574       new_helper = self.op.drbd_helper
3575       if not new_helper:
3576         new_helper = None
3577       if new_helper != self.cfg.GetDRBDHelper():
3578         self.cfg.SetDRBDHelper(new_helper)
3579       else:
3580         feedback_fn("Cluster DRBD helper already in desired state,"
3581                     " not changing")
3582     if self.op.hvparams:
3583       self.cluster.hvparams = self.new_hvparams
3584     if self.op.os_hvp:
3585       self.cluster.os_hvp = self.new_os_hvp
3586     if self.op.enabled_hypervisors is not None:
3587       self.cluster.hvparams = self.new_hvparams
3588       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3589     if self.op.beparams:
3590       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3591     if self.op.nicparams:
3592       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3593     if self.op.osparams:
3594       self.cluster.osparams = self.new_osp
3595     if self.op.ndparams:
3596       self.cluster.ndparams = self.new_ndparams
3597
3598     if self.op.candidate_pool_size is not None:
3599       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3600       # we need to update the pool size here, otherwise the save will fail
3601       _AdjustCandidatePool(self, [])
3602
3603     if self.op.maintain_node_health is not None:
3604       self.cluster.maintain_node_health = self.op.maintain_node_health
3605
3606     if self.op.prealloc_wipe_disks is not None:
3607       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3608
3609     if self.op.add_uids is not None:
3610       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3611
3612     if self.op.remove_uids is not None:
3613       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3614
3615     if self.op.uid_pool is not None:
3616       self.cluster.uid_pool = self.op.uid_pool
3617
3618     if self.op.default_iallocator is not None:
3619       self.cluster.default_iallocator = self.op.default_iallocator
3620
3621     if self.op.reserved_lvs is not None:
3622       self.cluster.reserved_lvs = self.op.reserved_lvs
3623
3624     def helper_os(aname, mods, desc):
3625       desc += " OS list"
3626       lst = getattr(self.cluster, aname)
3627       for key, val in mods:
3628         if key == constants.DDM_ADD:
3629           if val in lst:
3630             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3631           else:
3632             lst.append(val)
3633         elif key == constants.DDM_REMOVE:
3634           if val in lst:
3635             lst.remove(val)
3636           else:
3637             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3638         else:
3639           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3640
3641     if self.op.hidden_os:
3642       helper_os("hidden_os", self.op.hidden_os, "hidden")
3643
3644     if self.op.blacklisted_os:
3645       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3646
3647     if self.op.master_netdev:
3648       master = self.cfg.GetMasterNode()
3649       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3650                   self.cluster.master_netdev)
3651       result = self.rpc.call_node_deactivate_master_ip(master)
3652       result.Raise("Could not disable the master ip")
3653       feedback_fn("Changing master_netdev from %s to %s" %
3654                   (self.cluster.master_netdev, self.op.master_netdev))
3655       self.cluster.master_netdev = self.op.master_netdev
3656
3657     self.cfg.Update(self.cluster, feedback_fn)
3658
3659     if self.op.master_netdev:
3660       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3661                   self.op.master_netdev)
3662       result = self.rpc.call_node_activate_master_ip(master)
3663       if result.fail_msg:
3664         self.LogWarning("Could not re-enable the master ip on"
3665                         " the master, please restart manually: %s",
3666                         result.fail_msg)
3667
3668
3669 def _UploadHelper(lu, nodes, fname):
3670   """Helper for uploading a file and showing warnings.
3671
3672   """
3673   if os.path.exists(fname):
3674     result = lu.rpc.call_upload_file(nodes, fname)
3675     for to_node, to_result in result.items():
3676       msg = to_result.fail_msg
3677       if msg:
3678         msg = ("Copy of file %s to node %s failed: %s" %
3679                (fname, to_node, msg))
3680         lu.proc.LogWarning(msg)
3681
3682
3683 def _ComputeAncillaryFiles(cluster, redist):
3684   """Compute files external to Ganeti which need to be consistent.
3685
3686   @type redist: boolean
3687   @param redist: Whether to include files which need to be redistributed
3688
3689   """
3690   # Compute files for all nodes
3691   files_all = set([
3692     constants.SSH_KNOWN_HOSTS_FILE,
3693     constants.CONFD_HMAC_KEY,
3694     constants.CLUSTER_DOMAIN_SECRET_FILE,
3695     ])
3696
3697   if not redist:
3698     files_all.update(constants.ALL_CERT_FILES)
3699     files_all.update(ssconf.SimpleStore().GetFileList())
3700
3701   if cluster.modify_etc_hosts:
3702     files_all.add(constants.ETC_HOSTS)
3703
3704   # Files which must either exist on all nodes or on none
3705   files_all_opt = set([
3706     constants.RAPI_USERS_FILE,
3707     ])
3708
3709   # Files which should only be on master candidates
3710   files_mc = set()
3711   if not redist:
3712     files_mc.add(constants.CLUSTER_CONF_FILE)
3713
3714   # Files which should only be on VM-capable nodes
3715   files_vm = set(filename
3716     for hv_name in cluster.enabled_hypervisors
3717     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3718
3719   # Filenames must be unique
3720   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3721           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3722          "Found file listed in more than one file list"
3723
3724   return (files_all, files_all_opt, files_mc, files_vm)
3725
3726
3727 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3728   """Distribute additional files which are part of the cluster configuration.
3729
3730   ConfigWriter takes care of distributing the config and ssconf files, but
3731   there are more files which should be distributed to all nodes. This function
3732   makes sure those are copied.
3733
3734   @param lu: calling logical unit
3735   @param additional_nodes: list of nodes not in the config to distribute to
3736   @type additional_vm: boolean
3737   @param additional_vm: whether the additional nodes are vm-capable or not
3738
3739   """
3740   # Gather target nodes
3741   cluster = lu.cfg.GetClusterInfo()
3742   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3743
3744   online_nodes = lu.cfg.GetOnlineNodeList()
3745   vm_nodes = lu.cfg.GetVmCapableNodeList()
3746
3747   if additional_nodes is not None:
3748     online_nodes.extend(additional_nodes)
3749     if additional_vm:
3750       vm_nodes.extend(additional_nodes)
3751
3752   # Never distribute to master node
3753   for nodelist in [online_nodes, vm_nodes]:
3754     if master_info.name in nodelist:
3755       nodelist.remove(master_info.name)
3756
3757   # Gather file lists
3758   (files_all, files_all_opt, files_mc, files_vm) = \
3759     _ComputeAncillaryFiles(cluster, True)
3760
3761   # Never re-distribute configuration file from here
3762   assert not (constants.CLUSTER_CONF_FILE in files_all or
3763               constants.CLUSTER_CONF_FILE in files_vm)
3764   assert not files_mc, "Master candidates not handled in this function"
3765
3766   filemap = [
3767     (online_nodes, files_all),
3768     (online_nodes, files_all_opt),
3769     (vm_nodes, files_vm),
3770     ]
3771
3772   # Upload the files
3773   for (node_list, files) in filemap:
3774     for fname in files:
3775       _UploadHelper(lu, node_list, fname)
3776
3777
3778 class LUClusterRedistConf(NoHooksLU):
3779   """Force the redistribution of cluster configuration.
3780
3781   This is a very simple LU.
3782
3783   """
3784   REQ_BGL = False
3785
3786   def ExpandNames(self):
3787     self.needed_locks = {
3788       locking.LEVEL_NODE: locking.ALL_SET,
3789     }
3790     self.share_locks[locking.LEVEL_NODE] = 1
3791
3792   def Exec(self, feedback_fn):
3793     """Redistribute the configuration.
3794
3795     """
3796     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3797     _RedistributeAncillaryFiles(self)
3798
3799
3800 class LUClusterActivateMasterIp(NoHooksLU):
3801   """Activate the master IP on the master node.
3802
3803   """
3804   def Exec(self, feedback_fn):
3805     """Activate the master IP.
3806
3807     """
3808     master = self.cfg.GetMasterNode()
3809     self.rpc.call_node_activate_master_ip(master)
3810
3811
3812 class LUClusterDeactivateMasterIp(NoHooksLU):
3813   """Deactivate the master IP on the master node.
3814
3815   """
3816   def Exec(self, feedback_fn):
3817     """Deactivate the master IP.
3818
3819     """
3820     master = self.cfg.GetMasterNode()
3821     self.rpc.call_node_deactivate_master_ip(master)
3822
3823
3824 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3825   """Sleep and poll for an instance's disk to sync.
3826
3827   """
3828   if not instance.disks or disks is not None and not disks:
3829     return True
3830
3831   disks = _ExpandCheckDisks(instance, disks)
3832
3833   if not oneshot:
3834     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3835
3836   node = instance.primary_node
3837
3838   for dev in disks:
3839     lu.cfg.SetDiskID(dev, node)
3840
3841   # TODO: Convert to utils.Retry
3842
3843   retries = 0
3844   degr_retries = 10 # in seconds, as we sleep 1 second each time
3845   while True:
3846     max_time = 0
3847     done = True
3848     cumul_degraded = False
3849     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3850     msg = rstats.fail_msg
3851     if msg:
3852       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3853       retries += 1
3854       if retries >= 10:
3855         raise errors.RemoteError("Can't contact node %s for mirror data,"
3856                                  " aborting." % node)
3857       time.sleep(6)
3858       continue
3859     rstats = rstats.payload
3860     retries = 0
3861     for i, mstat in enumerate(rstats):
3862       if mstat is None:
3863         lu.LogWarning("Can't compute data for node %s/%s",
3864                            node, disks[i].iv_name)
3865         continue
3866
3867       cumul_degraded = (cumul_degraded or
3868                         (mstat.is_degraded and mstat.sync_percent is None))
3869       if mstat.sync_percent is not None:
3870         done = False
3871         if mstat.estimated_time is not None:
3872           rem_time = ("%s remaining (estimated)" %
3873                       utils.FormatSeconds(mstat.estimated_time))
3874           max_time = mstat.estimated_time
3875         else:
3876           rem_time = "no time estimate"
3877         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3878                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3879
3880     # if we're done but degraded, let's do a few small retries, to
3881     # make sure we see a stable and not transient situation; therefore
3882     # we force restart of the loop
3883     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3884       logging.info("Degraded disks found, %d retries left", degr_retries)
3885       degr_retries -= 1
3886       time.sleep(1)
3887       continue
3888
3889     if done or oneshot:
3890       break
3891
3892     time.sleep(min(60, max_time))
3893
3894   if done:
3895     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3896   return not cumul_degraded
3897
3898
3899 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3900   """Check that mirrors are not degraded.
3901
3902   The ldisk parameter, if True, will change the test from the
3903   is_degraded attribute (which represents overall non-ok status for
3904   the device(s)) to the ldisk (representing the local storage status).
3905
3906   """
3907   lu.cfg.SetDiskID(dev, node)
3908
3909   result = True
3910
3911   if on_primary or dev.AssembleOnSecondary():
3912     rstats = lu.rpc.call_blockdev_find(node, dev)
3913     msg = rstats.fail_msg
3914     if msg:
3915       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3916       result = False
3917     elif not rstats.payload:
3918       lu.LogWarning("Can't find disk on node %s", node)
3919       result = False
3920     else:
3921       if ldisk:
3922         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3923       else:
3924         result = result and not rstats.payload.is_degraded
3925
3926   if dev.children:
3927     for child in dev.children:
3928       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3929
3930   return result
3931
3932
3933 class LUOobCommand(NoHooksLU):
3934   """Logical unit for OOB handling.
3935
3936   """
3937   REG_BGL = False
3938   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3939
3940   def ExpandNames(self):
3941     """Gather locks we need.
3942
3943     """
3944     if self.op.node_names:
3945       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3946       lock_names = self.op.node_names
3947     else:
3948       lock_names = locking.ALL_SET
3949
3950     self.needed_locks = {
3951       locking.LEVEL_NODE: lock_names,
3952       }
3953
3954   def CheckPrereq(self):
3955     """Check prerequisites.
3956
3957     This checks:
3958      - the node exists in the configuration
3959      - OOB is supported
3960
3961     Any errors are signaled by raising errors.OpPrereqError.
3962
3963     """
3964     self.nodes = []
3965     self.master_node = self.cfg.GetMasterNode()
3966
3967     assert self.op.power_delay >= 0.0
3968
3969     if self.op.node_names:
3970       if (self.op.command in self._SKIP_MASTER and
3971           self.master_node in self.op.node_names):
3972         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3973         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3974
3975         if master_oob_handler:
3976           additional_text = ("run '%s %s %s' if you want to operate on the"
3977                              " master regardless") % (master_oob_handler,
3978                                                       self.op.command,
3979                                                       self.master_node)
3980         else:
3981           additional_text = "it does not support out-of-band operations"
3982
3983         raise errors.OpPrereqError(("Operating on the master node %s is not"
3984                                     " allowed for %s; %s") %
3985                                    (self.master_node, self.op.command,
3986                                     additional_text), errors.ECODE_INVAL)
3987     else:
3988       self.op.node_names = self.cfg.GetNodeList()
3989       if self.op.command in self._SKIP_MASTER:
3990         self.op.node_names.remove(self.master_node)
3991
3992     if self.op.command in self._SKIP_MASTER:
3993       assert self.master_node not in self.op.node_names
3994
3995     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3996       if node is None:
3997         raise errors.OpPrereqError("Node %s not found" % node_name,
3998                                    errors.ECODE_NOENT)
3999       else:
4000         self.nodes.append(node)
4001
4002       if (not self.op.ignore_status and
4003           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4004         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4005                                     " not marked offline") % node_name,
4006                                    errors.ECODE_STATE)
4007
4008   def Exec(self, feedback_fn):
4009     """Execute OOB and return result if we expect any.
4010
4011     """
4012     master_node = self.master_node
4013     ret = []
4014
4015     for idx, node in enumerate(utils.NiceSort(self.nodes,
4016                                               key=lambda node: node.name)):
4017       node_entry = [(constants.RS_NORMAL, node.name)]
4018       ret.append(node_entry)
4019
4020       oob_program = _SupportsOob(self.cfg, node)
4021
4022       if not oob_program:
4023         node_entry.append((constants.RS_UNAVAIL, None))
4024         continue
4025
4026       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4027                    self.op.command, oob_program, node.name)
4028       result = self.rpc.call_run_oob(master_node, oob_program,
4029                                      self.op.command, node.name,
4030                                      self.op.timeout)
4031
4032       if result.fail_msg:
4033         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4034                         node.name, result.fail_msg)
4035         node_entry.append((constants.RS_NODATA, None))
4036       else:
4037         try:
4038           self._CheckPayload(result)
4039         except errors.OpExecError, err:
4040           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4041                           node.name, err)
4042           node_entry.append((constants.RS_NODATA, None))
4043         else:
4044           if self.op.command == constants.OOB_HEALTH:
4045             # For health we should log important events
4046             for item, status in result.payload:
4047               if status in [constants.OOB_STATUS_WARNING,
4048                             constants.OOB_STATUS_CRITICAL]:
4049                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4050                                 item, node.name, status)
4051
4052           if self.op.command == constants.OOB_POWER_ON:
4053             node.powered = True
4054           elif self.op.command == constants.OOB_POWER_OFF:
4055             node.powered = False
4056           elif self.op.command == constants.OOB_POWER_STATUS:
4057             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4058             if powered != node.powered:
4059               logging.warning(("Recorded power state (%s) of node '%s' does not"
4060                                " match actual power state (%s)"), node.powered,
4061                               node.name, powered)
4062
4063           # For configuration changing commands we should update the node
4064           if self.op.command in (constants.OOB_POWER_ON,
4065                                  constants.OOB_POWER_OFF):
4066             self.cfg.Update(node, feedback_fn)
4067
4068           node_entry.append((constants.RS_NORMAL, result.payload))
4069
4070           if (self.op.command == constants.OOB_POWER_ON and
4071               idx < len(self.nodes) - 1):
4072             time.sleep(self.op.power_delay)
4073
4074     return ret
4075
4076   def _CheckPayload(self, result):
4077     """Checks if the payload is valid.
4078
4079     @param result: RPC result
4080     @raises errors.OpExecError: If payload is not valid
4081
4082     """
4083     errs = []
4084     if self.op.command == constants.OOB_HEALTH:
4085       if not isinstance(result.payload, list):
4086         errs.append("command 'health' is expected to return a list but got %s" %
4087                     type(result.payload))
4088       else:
4089         for item, status in result.payload:
4090           if status not in constants.OOB_STATUSES:
4091             errs.append("health item '%s' has invalid status '%s'" %
4092                         (item, status))
4093
4094     if self.op.command == constants.OOB_POWER_STATUS:
4095       if not isinstance(result.payload, dict):
4096         errs.append("power-status is expected to return a dict but got %s" %
4097                     type(result.payload))
4098
4099     if self.op.command in [
4100         constants.OOB_POWER_ON,
4101         constants.OOB_POWER_OFF,
4102         constants.OOB_POWER_CYCLE,
4103         ]:
4104       if result.payload is not None:
4105         errs.append("%s is expected to not return payload but got '%s'" %
4106                     (self.op.command, result.payload))
4107
4108     if errs:
4109       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4110                                utils.CommaJoin(errs))
4111
4112
4113 class _OsQuery(_QueryBase):
4114   FIELDS = query.OS_FIELDS
4115
4116   def ExpandNames(self, lu):
4117     # Lock all nodes in shared mode
4118     # Temporary removal of locks, should be reverted later
4119     # TODO: reintroduce locks when they are lighter-weight
4120     lu.needed_locks = {}
4121     #self.share_locks[locking.LEVEL_NODE] = 1
4122     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4123
4124     # The following variables interact with _QueryBase._GetNames
4125     if self.names:
4126       self.wanted = self.names
4127     else:
4128       self.wanted = locking.ALL_SET
4129
4130     self.do_locking = self.use_locking
4131
4132   def DeclareLocks(self, lu, level):
4133     pass
4134
4135   @staticmethod
4136   def _DiagnoseByOS(rlist):
4137     """Remaps a per-node return list into an a per-os per-node dictionary
4138
4139     @param rlist: a map with node names as keys and OS objects as values
4140
4141     @rtype: dict
4142     @return: a dictionary with osnames as keys and as value another
4143         map, with nodes as keys and tuples of (path, status, diagnose,
4144         variants, parameters, api_versions) as values, eg::
4145
4146           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4147                                      (/srv/..., False, "invalid api")],
4148                            "node2": [(/srv/..., True, "", [], [])]}
4149           }
4150
4151     """
4152     all_os = {}
4153     # we build here the list of nodes that didn't fail the RPC (at RPC
4154     # level), so that nodes with a non-responding node daemon don't
4155     # make all OSes invalid
4156     good_nodes = [node_name for node_name in rlist
4157                   if not rlist[node_name].fail_msg]
4158     for node_name, nr in rlist.items():
4159       if nr.fail_msg or not nr.payload:
4160         continue
4161       for (name, path, status, diagnose, variants,
4162            params, api_versions) in nr.payload:
4163         if name not in all_os:
4164           # build a list of nodes for this os containing empty lists
4165           # for each node in node_list
4166           all_os[name] = {}
4167           for nname in good_nodes:
4168             all_os[name][nname] = []
4169         # convert params from [name, help] to (name, help)
4170         params = [tuple(v) for v in params]
4171         all_os[name][node_name].append((path, status, diagnose,
4172                                         variants, params, api_versions))
4173     return all_os
4174
4175   def _GetQueryData(self, lu):
4176     """Computes the list of nodes and their attributes.
4177
4178     """
4179     # Locking is not used
4180     assert not (compat.any(lu.glm.is_owned(level)
4181                            for level in locking.LEVELS
4182                            if level != locking.LEVEL_CLUSTER) or
4183                 self.do_locking or self.use_locking)
4184
4185     valid_nodes = [node.name
4186                    for node in lu.cfg.GetAllNodesInfo().values()
4187                    if not node.offline and node.vm_capable]
4188     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4189     cluster = lu.cfg.GetClusterInfo()
4190
4191     data = {}
4192
4193     for (os_name, os_data) in pol.items():
4194       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4195                           hidden=(os_name in cluster.hidden_os),
4196                           blacklisted=(os_name in cluster.blacklisted_os))
4197
4198       variants = set()
4199       parameters = set()
4200       api_versions = set()
4201
4202       for idx, osl in enumerate(os_data.values()):
4203         info.valid = bool(info.valid and osl and osl[0][1])
4204         if not info.valid:
4205           break
4206
4207         (node_variants, node_params, node_api) = osl[0][3:6]
4208         if idx == 0:
4209           # First entry
4210           variants.update(node_variants)
4211           parameters.update(node_params)
4212           api_versions.update(node_api)
4213         else:
4214           # Filter out inconsistent values
4215           variants.intersection_update(node_variants)
4216           parameters.intersection_update(node_params)
4217           api_versions.intersection_update(node_api)
4218
4219       info.variants = list(variants)
4220       info.parameters = list(parameters)
4221       info.api_versions = list(api_versions)
4222
4223       data[os_name] = info
4224
4225     # Prepare data in requested order
4226     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4227             if name in data]
4228
4229
4230 class LUOsDiagnose(NoHooksLU):
4231   """Logical unit for OS diagnose/query.
4232
4233   """
4234   REQ_BGL = False
4235
4236   @staticmethod
4237   def _BuildFilter(fields, names):
4238     """Builds a filter for querying OSes.
4239
4240     """
4241     name_filter = qlang.MakeSimpleFilter("name", names)
4242
4243     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4244     # respective field is not requested
4245     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4246                      for fname in ["hidden", "blacklisted"]
4247                      if fname not in fields]
4248     if "valid" not in fields:
4249       status_filter.append([qlang.OP_TRUE, "valid"])
4250
4251     if status_filter:
4252       status_filter.insert(0, qlang.OP_AND)
4253     else:
4254       status_filter = None
4255
4256     if name_filter and status_filter:
4257       return [qlang.OP_AND, name_filter, status_filter]
4258     elif name_filter:
4259       return name_filter
4260     else:
4261       return status_filter
4262
4263   def CheckArguments(self):
4264     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4265                        self.op.output_fields, False)
4266
4267   def ExpandNames(self):
4268     self.oq.ExpandNames(self)
4269
4270   def Exec(self, feedback_fn):
4271     return self.oq.OldStyleQuery(self)
4272
4273
4274 class LUNodeRemove(LogicalUnit):
4275   """Logical unit for removing a node.
4276
4277   """
4278   HPATH = "node-remove"
4279   HTYPE = constants.HTYPE_NODE
4280
4281   def BuildHooksEnv(self):
4282     """Build hooks env.
4283
4284     This doesn't run on the target node in the pre phase as a failed
4285     node would then be impossible to remove.
4286
4287     """
4288     return {
4289       "OP_TARGET": self.op.node_name,
4290       "NODE_NAME": self.op.node_name,
4291       }
4292
4293   def BuildHooksNodes(self):
4294     """Build hooks nodes.
4295
4296     """
4297     all_nodes = self.cfg.GetNodeList()
4298     try:
4299       all_nodes.remove(self.op.node_name)
4300     except ValueError:
4301       logging.warning("Node '%s', which is about to be removed, was not found"
4302                       " in the list of all nodes", self.op.node_name)
4303     return (all_nodes, all_nodes)
4304
4305   def CheckPrereq(self):
4306     """Check prerequisites.
4307
4308     This checks:
4309      - the node exists in the configuration
4310      - it does not have primary or secondary instances
4311      - it's not the master
4312
4313     Any errors are signaled by raising errors.OpPrereqError.
4314
4315     """
4316     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4317     node = self.cfg.GetNodeInfo(self.op.node_name)
4318     assert node is not None
4319
4320     masternode = self.cfg.GetMasterNode()
4321     if node.name == masternode:
4322       raise errors.OpPrereqError("Node is the master node, failover to another"
4323                                  " node is required", errors.ECODE_INVAL)
4324
4325     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4326       if node.name in instance.all_nodes:
4327         raise errors.OpPrereqError("Instance %s is still running on the node,"
4328                                    " please remove first" % instance_name,
4329                                    errors.ECODE_INVAL)
4330     self.op.node_name = node.name
4331     self.node = node
4332
4333   def Exec(self, feedback_fn):
4334     """Removes the node from the cluster.
4335
4336     """
4337     node = self.node
4338     logging.info("Stopping the node daemon and removing configs from node %s",
4339                  node.name)
4340
4341     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4342
4343     # Promote nodes to master candidate as needed
4344     _AdjustCandidatePool(self, exceptions=[node.name])
4345     self.context.RemoveNode(node.name)
4346
4347     # Run post hooks on the node before it's removed
4348     _RunPostHook(self, node.name)
4349
4350     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4351     msg = result.fail_msg
4352     if msg:
4353       self.LogWarning("Errors encountered on the remote node while leaving"
4354                       " the cluster: %s", msg)
4355
4356     # Remove node from our /etc/hosts
4357     if self.cfg.GetClusterInfo().modify_etc_hosts:
4358       master_node = self.cfg.GetMasterNode()
4359       result = self.rpc.call_etc_hosts_modify(master_node,
4360                                               constants.ETC_HOSTS_REMOVE,
4361                                               node.name, None)
4362       result.Raise("Can't update hosts file with new host data")
4363       _RedistributeAncillaryFiles(self)
4364
4365
4366 class _NodeQuery(_QueryBase):
4367   FIELDS = query.NODE_FIELDS
4368
4369   def ExpandNames(self, lu):
4370     lu.needed_locks = {}
4371     lu.share_locks = _ShareAll()
4372
4373     if self.names:
4374       self.wanted = _GetWantedNodes(lu, self.names)
4375     else:
4376       self.wanted = locking.ALL_SET
4377
4378     self.do_locking = (self.use_locking and
4379                        query.NQ_LIVE in self.requested_data)
4380
4381     if self.do_locking:
4382       # If any non-static field is requested we need to lock the nodes
4383       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4384
4385   def DeclareLocks(self, lu, level):
4386     pass
4387
4388   def _GetQueryData(self, lu):
4389     """Computes the list of nodes and their attributes.
4390
4391     """
4392     all_info = lu.cfg.GetAllNodesInfo()
4393
4394     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4395
4396     # Gather data as requested
4397     if query.NQ_LIVE in self.requested_data:
4398       # filter out non-vm_capable nodes
4399       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4400
4401       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4402                                         lu.cfg.GetHypervisorType())
4403       live_data = dict((name, nresult.payload)
4404                        for (name, nresult) in node_data.items()
4405                        if not nresult.fail_msg and nresult.payload)
4406     else:
4407       live_data = None
4408
4409     if query.NQ_INST in self.requested_data:
4410       node_to_primary = dict([(name, set()) for name in nodenames])
4411       node_to_secondary = dict([(name, set()) for name in nodenames])
4412
4413       inst_data = lu.cfg.GetAllInstancesInfo()
4414
4415       for inst in inst_data.values():
4416         if inst.primary_node in node_to_primary:
4417           node_to_primary[inst.primary_node].add(inst.name)
4418         for secnode in inst.secondary_nodes:
4419           if secnode in node_to_secondary:
4420             node_to_secondary[secnode].add(inst.name)
4421     else:
4422       node_to_primary = None
4423       node_to_secondary = None
4424
4425     if query.NQ_OOB in self.requested_data:
4426       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4427                          for name, node in all_info.iteritems())
4428     else:
4429       oob_support = None
4430
4431     if query.NQ_GROUP in self.requested_data:
4432       groups = lu.cfg.GetAllNodeGroupsInfo()
4433     else:
4434       groups = {}
4435
4436     return query.NodeQueryData([all_info[name] for name in nodenames],
4437                                live_data, lu.cfg.GetMasterNode(),
4438                                node_to_primary, node_to_secondary, groups,
4439                                oob_support, lu.cfg.GetClusterInfo())
4440
4441
4442 class LUNodeQuery(NoHooksLU):
4443   """Logical unit for querying nodes.
4444
4445   """
4446   # pylint: disable=W0142
4447   REQ_BGL = False
4448
4449   def CheckArguments(self):
4450     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4451                          self.op.output_fields, self.op.use_locking)
4452
4453   def ExpandNames(self):
4454     self.nq.ExpandNames(self)
4455
4456   def Exec(self, feedback_fn):
4457     return self.nq.OldStyleQuery(self)
4458
4459
4460 class LUNodeQueryvols(NoHooksLU):
4461   """Logical unit for getting volumes on node(s).
4462
4463   """
4464   REQ_BGL = False
4465   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4466   _FIELDS_STATIC = utils.FieldSet("node")
4467
4468   def CheckArguments(self):
4469     _CheckOutputFields(static=self._FIELDS_STATIC,
4470                        dynamic=self._FIELDS_DYNAMIC,
4471                        selected=self.op.output_fields)
4472
4473   def ExpandNames(self):
4474     self.needed_locks = {}
4475     self.share_locks[locking.LEVEL_NODE] = 1
4476     if not self.op.nodes:
4477       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4478     else:
4479       self.needed_locks[locking.LEVEL_NODE] = \
4480         _GetWantedNodes(self, self.op.nodes)
4481
4482   def Exec(self, feedback_fn):
4483     """Computes the list of nodes and their attributes.
4484
4485     """
4486     nodenames = self.owned_locks(locking.LEVEL_NODE)
4487     volumes = self.rpc.call_node_volumes(nodenames)
4488
4489     ilist = self.cfg.GetAllInstancesInfo()
4490     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4491
4492     output = []
4493     for node in nodenames:
4494       nresult = volumes[node]
4495       if nresult.offline:
4496         continue
4497       msg = nresult.fail_msg
4498       if msg:
4499         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4500         continue
4501
4502       node_vols = sorted(nresult.payload,
4503                          key=operator.itemgetter("dev"))
4504
4505       for vol in node_vols:
4506         node_output = []
4507         for field in self.op.output_fields:
4508           if field == "node":
4509             val = node
4510           elif field == "phys":
4511             val = vol["dev"]
4512           elif field == "vg":
4513             val = vol["vg"]
4514           elif field == "name":
4515             val = vol["name"]
4516           elif field == "size":
4517             val = int(float(vol["size"]))
4518           elif field == "instance":
4519             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4520           else:
4521             raise errors.ParameterError(field)
4522           node_output.append(str(val))
4523
4524         output.append(node_output)
4525
4526     return output
4527
4528
4529 class LUNodeQueryStorage(NoHooksLU):
4530   """Logical unit for getting information on storage units on node(s).
4531
4532   """
4533   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4534   REQ_BGL = False
4535
4536   def CheckArguments(self):
4537     _CheckOutputFields(static=self._FIELDS_STATIC,
4538                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4539                        selected=self.op.output_fields)
4540
4541   def ExpandNames(self):
4542     self.needed_locks = {}
4543     self.share_locks[locking.LEVEL_NODE] = 1
4544
4545     if self.op.nodes:
4546       self.needed_locks[locking.LEVEL_NODE] = \
4547         _GetWantedNodes(self, self.op.nodes)
4548     else:
4549       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4550
4551   def Exec(self, feedback_fn):
4552     """Computes the list of nodes and their attributes.
4553
4554     """
4555     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4556
4557     # Always get name to sort by
4558     if constants.SF_NAME in self.op.output_fields:
4559       fields = self.op.output_fields[:]
4560     else:
4561       fields = [constants.SF_NAME] + self.op.output_fields
4562
4563     # Never ask for node or type as it's only known to the LU
4564     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4565       while extra in fields:
4566         fields.remove(extra)
4567
4568     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4569     name_idx = field_idx[constants.SF_NAME]
4570
4571     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4572     data = self.rpc.call_storage_list(self.nodes,
4573                                       self.op.storage_type, st_args,
4574                                       self.op.name, fields)
4575
4576     result = []
4577
4578     for node in utils.NiceSort(self.nodes):
4579       nresult = data[node]
4580       if nresult.offline:
4581         continue
4582
4583       msg = nresult.fail_msg
4584       if msg:
4585         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4586         continue
4587
4588       rows = dict([(row[name_idx], row) for row in nresult.payload])
4589
4590       for name in utils.NiceSort(rows.keys()):
4591         row = rows[name]
4592
4593         out = []
4594
4595         for field in self.op.output_fields:
4596           if field == constants.SF_NODE:
4597             val = node
4598           elif field == constants.SF_TYPE:
4599             val = self.op.storage_type
4600           elif field in field_idx:
4601             val = row[field_idx[field]]
4602           else:
4603             raise errors.ParameterError(field)
4604
4605           out.append(val)
4606
4607         result.append(out)
4608
4609     return result
4610
4611
4612 class _InstanceQuery(_QueryBase):
4613   FIELDS = query.INSTANCE_FIELDS
4614
4615   def ExpandNames(self, lu):
4616     lu.needed_locks = {}
4617     lu.share_locks = _ShareAll()
4618
4619     if self.names:
4620       self.wanted = _GetWantedInstances(lu, self.names)
4621     else:
4622       self.wanted = locking.ALL_SET
4623
4624     self.do_locking = (self.use_locking and
4625                        query.IQ_LIVE in self.requested_data)
4626     if self.do_locking:
4627       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4628       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4629       lu.needed_locks[locking.LEVEL_NODE] = []
4630       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4631
4632     self.do_grouplocks = (self.do_locking and
4633                           query.IQ_NODES in self.requested_data)
4634
4635   def DeclareLocks(self, lu, level):
4636     if self.do_locking:
4637       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4638         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4639
4640         # Lock all groups used by instances optimistically; this requires going
4641         # via the node before it's locked, requiring verification later on
4642         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4643           set(group_uuid
4644               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4645               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4646       elif level == locking.LEVEL_NODE:
4647         lu._LockInstancesNodes() # pylint: disable=W0212
4648
4649   @staticmethod
4650   def _CheckGroupLocks(lu):
4651     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4652     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4653
4654     # Check if node groups for locked instances are still correct
4655     for instance_name in owned_instances:
4656       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4657
4658   def _GetQueryData(self, lu):
4659     """Computes the list of instances and their attributes.
4660
4661     """
4662     if self.do_grouplocks:
4663       self._CheckGroupLocks(lu)
4664
4665     cluster = lu.cfg.GetClusterInfo()
4666     all_info = lu.cfg.GetAllInstancesInfo()
4667
4668     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4669
4670     instance_list = [all_info[name] for name in instance_names]
4671     nodes = frozenset(itertools.chain(*(inst.all_nodes
4672                                         for inst in instance_list)))
4673     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4674     bad_nodes = []
4675     offline_nodes = []
4676     wrongnode_inst = set()
4677
4678     # Gather data as requested
4679     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4680       live_data = {}
4681       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4682       for name in nodes:
4683         result = node_data[name]
4684         if result.offline:
4685           # offline nodes will be in both lists
4686           assert result.fail_msg
4687           offline_nodes.append(name)
4688         if result.fail_msg:
4689           bad_nodes.append(name)
4690         elif result.payload:
4691           for inst in result.payload:
4692             if inst in all_info:
4693               if all_info[inst].primary_node == name:
4694                 live_data.update(result.payload)
4695               else:
4696                 wrongnode_inst.add(inst)
4697             else:
4698               # orphan instance; we don't list it here as we don't
4699               # handle this case yet in the output of instance listing
4700               logging.warning("Orphan instance '%s' found on node %s",
4701                               inst, name)
4702         # else no instance is alive
4703     else:
4704       live_data = {}
4705
4706     if query.IQ_DISKUSAGE in self.requested_data:
4707       disk_usage = dict((inst.name,
4708                          _ComputeDiskSize(inst.disk_template,
4709                                           [{constants.IDISK_SIZE: disk.size}
4710                                            for disk in inst.disks]))
4711                         for inst in instance_list)
4712     else:
4713       disk_usage = None
4714
4715     if query.IQ_CONSOLE in self.requested_data:
4716       consinfo = {}
4717       for inst in instance_list:
4718         if inst.name in live_data:
4719           # Instance is running
4720           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4721         else:
4722           consinfo[inst.name] = None
4723       assert set(consinfo.keys()) == set(instance_names)
4724     else:
4725       consinfo = None
4726
4727     if query.IQ_NODES in self.requested_data:
4728       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4729                                             instance_list)))
4730       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4731       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4732                     for uuid in set(map(operator.attrgetter("group"),
4733                                         nodes.values())))
4734     else:
4735       nodes = None
4736       groups = None
4737
4738     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4739                                    disk_usage, offline_nodes, bad_nodes,
4740                                    live_data, wrongnode_inst, consinfo,
4741                                    nodes, groups)
4742
4743
4744 class LUQuery(NoHooksLU):
4745   """Query for resources/items of a certain kind.
4746
4747   """
4748   # pylint: disable=W0142
4749   REQ_BGL = False
4750
4751   def CheckArguments(self):
4752     qcls = _GetQueryImplementation(self.op.what)
4753
4754     self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4755
4756   def ExpandNames(self):
4757     self.impl.ExpandNames(self)
4758
4759   def DeclareLocks(self, level):
4760     self.impl.DeclareLocks(self, level)
4761
4762   def Exec(self, feedback_fn):
4763     return self.impl.NewStyleQuery(self)
4764
4765
4766 class LUQueryFields(NoHooksLU):
4767   """Query for resources/items of a certain kind.
4768
4769   """
4770   # pylint: disable=W0142
4771   REQ_BGL = False
4772
4773   def CheckArguments(self):
4774     self.qcls = _GetQueryImplementation(self.op.what)
4775
4776   def ExpandNames(self):
4777     self.needed_locks = {}
4778
4779   def Exec(self, feedback_fn):
4780     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4781
4782
4783 class LUNodeModifyStorage(NoHooksLU):
4784   """Logical unit for modifying a storage volume on a node.
4785
4786   """
4787   REQ_BGL = False
4788
4789   def CheckArguments(self):
4790     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4791
4792     storage_type = self.op.storage_type
4793
4794     try:
4795       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4796     except KeyError:
4797       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4798                                  " modified" % storage_type,
4799                                  errors.ECODE_INVAL)
4800
4801     diff = set(self.op.changes.keys()) - modifiable
4802     if diff:
4803       raise errors.OpPrereqError("The following fields can not be modified for"
4804                                  " storage units of type '%s': %r" %
4805                                  (storage_type, list(diff)),
4806                                  errors.ECODE_INVAL)
4807
4808   def ExpandNames(self):
4809     self.needed_locks = {
4810       locking.LEVEL_NODE: self.op.node_name,
4811       }
4812
4813   def Exec(self, feedback_fn):
4814     """Computes the list of nodes and their attributes.
4815
4816     """
4817     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4818     result = self.rpc.call_storage_modify(self.op.node_name,
4819                                           self.op.storage_type, st_args,
4820                                           self.op.name, self.op.changes)
4821     result.Raise("Failed to modify storage unit '%s' on %s" %
4822                  (self.op.name, self.op.node_name))
4823
4824
4825 class LUNodeAdd(LogicalUnit):
4826   """Logical unit for adding node to the cluster.
4827
4828   """
4829   HPATH = "node-add"
4830   HTYPE = constants.HTYPE_NODE
4831   _NFLAGS = ["master_capable", "vm_capable"]
4832
4833   def CheckArguments(self):
4834     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4835     # validate/normalize the node name
4836     self.hostname = netutils.GetHostname(name=self.op.node_name,
4837                                          family=self.primary_ip_family)
4838     self.op.node_name = self.hostname.name
4839
4840     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4841       raise errors.OpPrereqError("Cannot readd the master node",
4842                                  errors.ECODE_STATE)
4843
4844     if self.op.readd and self.op.group:
4845       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4846                                  " being readded", errors.ECODE_INVAL)
4847
4848   def BuildHooksEnv(self):
4849     """Build hooks env.
4850
4851     This will run on all nodes before, and on all nodes + the new node after.
4852
4853     """
4854     return {
4855       "OP_TARGET": self.op.node_name,
4856       "NODE_NAME": self.op.node_name,
4857       "NODE_PIP": self.op.primary_ip,
4858       "NODE_SIP": self.op.secondary_ip,
4859       "MASTER_CAPABLE": str(self.op.master_capable),
4860       "VM_CAPABLE": str(self.op.vm_capable),
4861       }
4862
4863   def BuildHooksNodes(self):
4864     """Build hooks nodes.
4865
4866     """
4867     # Exclude added node
4868     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4869     post_nodes = pre_nodes + [self.op.node_name, ]
4870
4871     return (pre_nodes, post_nodes)
4872
4873   def CheckPrereq(self):
4874     """Check prerequisites.
4875
4876     This checks:
4877      - the new node is not already in the config
4878      - it is resolvable
4879      - its parameters (single/dual homed) matches the cluster
4880
4881     Any errors are signaled by raising errors.OpPrereqError.
4882
4883     """
4884     cfg = self.cfg
4885     hostname = self.hostname
4886     node = hostname.name
4887     primary_ip = self.op.primary_ip = hostname.ip
4888     if self.op.secondary_ip is None:
4889       if self.primary_ip_family == netutils.IP6Address.family:
4890         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4891                                    " IPv4 address must be given as secondary",
4892                                    errors.ECODE_INVAL)
4893       self.op.secondary_ip = primary_ip
4894
4895     secondary_ip = self.op.secondary_ip
4896     if not netutils.IP4Address.IsValid(secondary_ip):
4897       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4898                                  " address" % secondary_ip, errors.ECODE_INVAL)
4899
4900     node_list = cfg.GetNodeList()
4901     if not self.op.readd and node in node_list:
4902       raise errors.OpPrereqError("Node %s is already in the configuration" %
4903                                  node, errors.ECODE_EXISTS)
4904     elif self.op.readd and node not in node_list:
4905       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4906                                  errors.ECODE_NOENT)
4907
4908     self.changed_primary_ip = False
4909
4910     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4911       if self.op.readd and node == existing_node_name:
4912         if existing_node.secondary_ip != secondary_ip:
4913           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4914                                      " address configuration as before",
4915                                      errors.ECODE_INVAL)
4916         if existing_node.primary_ip != primary_ip:
4917           self.changed_primary_ip = True
4918
4919         continue
4920
4921       if (existing_node.primary_ip == primary_ip or
4922           existing_node.secondary_ip == primary_ip or
4923           existing_node.primary_ip == secondary_ip or
4924           existing_node.secondary_ip == secondary_ip):
4925         raise errors.OpPrereqError("New node ip address(es) conflict with"
4926                                    " existing node %s" % existing_node.name,
4927                                    errors.ECODE_NOTUNIQUE)
4928
4929     # After this 'if' block, None is no longer a valid value for the
4930     # _capable op attributes
4931     if self.op.readd:
4932       old_node = self.cfg.GetNodeInfo(node)
4933       assert old_node is not None, "Can't retrieve locked node %s" % node
4934       for attr in self._NFLAGS:
4935         if getattr(self.op, attr) is None:
4936           setattr(self.op, attr, getattr(old_node, attr))
4937     else:
4938       for attr in self._NFLAGS:
4939         if getattr(self.op, attr) is None:
4940           setattr(self.op, attr, True)
4941
4942     if self.op.readd and not self.op.vm_capable:
4943       pri, sec = cfg.GetNodeInstances(node)
4944       if pri or sec:
4945         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4946                                    " flag set to false, but it already holds"
4947                                    " instances" % node,
4948                                    errors.ECODE_STATE)
4949
4950     # check that the type of the node (single versus dual homed) is the
4951     # same as for the master
4952     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4953     master_singlehomed = myself.secondary_ip == myself.primary_ip
4954     newbie_singlehomed = secondary_ip == primary_ip
4955     if master_singlehomed != newbie_singlehomed:
4956       if master_singlehomed:
4957         raise errors.OpPrereqError("The master has no secondary ip but the"
4958                                    " new node has one",
4959                                    errors.ECODE_INVAL)
4960       else:
4961         raise errors.OpPrereqError("The master has a secondary ip but the"
4962                                    " new node doesn't have one",
4963                                    errors.ECODE_INVAL)
4964
4965     # checks reachability
4966     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4967       raise errors.OpPrereqError("Node not reachable by ping",
4968                                  errors.ECODE_ENVIRON)
4969
4970     if not newbie_singlehomed:
4971       # check reachability from my secondary ip to newbie's secondary ip
4972       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4973                            source=myself.secondary_ip):
4974         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4975                                    " based ping to node daemon port",
4976                                    errors.ECODE_ENVIRON)
4977
4978     if self.op.readd:
4979       exceptions = [node]
4980     else:
4981       exceptions = []
4982
4983     if self.op.master_capable:
4984       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4985     else:
4986       self.master_candidate = False
4987
4988     if self.op.readd:
4989       self.new_node = old_node
4990     else:
4991       node_group = cfg.LookupNodeGroup(self.op.group)
4992       self.new_node = objects.Node(name=node,
4993                                    primary_ip=primary_ip,
4994                                    secondary_ip=secondary_ip,
4995                                    master_candidate=self.master_candidate,
4996                                    offline=False, drained=False,
4997                                    group=node_group)
4998
4999     if self.op.ndparams:
5000       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5001
5002   def Exec(self, feedback_fn):
5003     """Adds the new node to the cluster.
5004
5005     """
5006     new_node = self.new_node
5007     node = new_node.name
5008
5009     # We adding a new node so we assume it's powered
5010     new_node.powered = True
5011
5012     # for re-adds, reset the offline/drained/master-candidate flags;
5013     # we need to reset here, otherwise offline would prevent RPC calls
5014     # later in the procedure; this also means that if the re-add
5015     # fails, we are left with a non-offlined, broken node
5016     if self.op.readd:
5017       new_node.drained = new_node.offline = False # pylint: disable=W0201
5018       self.LogInfo("Readding a node, the offline/drained flags were reset")
5019       # if we demote the node, we do cleanup later in the procedure
5020       new_node.master_candidate = self.master_candidate
5021       if self.changed_primary_ip:
5022         new_node.primary_ip = self.op.primary_ip
5023
5024     # copy the master/vm_capable flags
5025     for attr in self._NFLAGS:
5026       setattr(new_node, attr, getattr(self.op, attr))
5027
5028     # notify the user about any possible mc promotion
5029     if new_node.master_candidate:
5030       self.LogInfo("Node will be a master candidate")
5031
5032     if self.op.ndparams:
5033       new_node.ndparams = self.op.ndparams
5034     else:
5035       new_node.ndparams = {}
5036
5037     # check connectivity
5038     result = self.rpc.call_version([node])[node]
5039     result.Raise("Can't get version information from node %s" % node)
5040     if constants.PROTOCOL_VERSION == result.payload:
5041       logging.info("Communication to node %s fine, sw version %s match",
5042                    node, result.payload)
5043     else:
5044       raise errors.OpExecError("Version mismatch master version %s,"
5045                                " node version %s" %
5046                                (constants.PROTOCOL_VERSION, result.payload))
5047
5048     # Add node to our /etc/hosts, and add key to known_hosts
5049     if self.cfg.GetClusterInfo().modify_etc_hosts:
5050       master_node = self.cfg.GetMasterNode()
5051       result = self.rpc.call_etc_hosts_modify(master_node,
5052                                               constants.ETC_HOSTS_ADD,
5053                                               self.hostname.name,
5054                                               self.hostname.ip)
5055       result.Raise("Can't update hosts file with new host data")
5056
5057     if new_node.secondary_ip != new_node.primary_ip:
5058       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5059                                False)
5060
5061     node_verify_list = [self.cfg.GetMasterNode()]
5062     node_verify_param = {
5063       constants.NV_NODELIST: [node],
5064       # TODO: do a node-net-test as well?
5065     }
5066
5067     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5068                                        self.cfg.GetClusterName())
5069     for verifier in node_verify_list:
5070       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5071       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5072       if nl_payload:
5073         for failed in nl_payload:
5074           feedback_fn("ssh/hostname verification failed"
5075                       " (checking from %s): %s" %
5076                       (verifier, nl_payload[failed]))
5077         raise errors.OpExecError("ssh/hostname verification failed")
5078
5079     if self.op.readd:
5080       _RedistributeAncillaryFiles(self)
5081       self.context.ReaddNode(new_node)
5082       # make sure we redistribute the config
5083       self.cfg.Update(new_node, feedback_fn)
5084       # and make sure the new node will not have old files around
5085       if not new_node.master_candidate:
5086         result = self.rpc.call_node_demote_from_mc(new_node.name)
5087         msg = result.fail_msg
5088         if msg:
5089           self.LogWarning("Node failed to demote itself from master"
5090                           " candidate status: %s" % msg)
5091     else:
5092       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5093                                   additional_vm=self.op.vm_capable)
5094       self.context.AddNode(new_node, self.proc.GetECId())
5095
5096
5097 class LUNodeSetParams(LogicalUnit):
5098   """Modifies the parameters of a node.
5099
5100   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5101       to the node role (as _ROLE_*)
5102   @cvar _R2F: a dictionary from node role to tuples of flags
5103   @cvar _FLAGS: a list of attribute names corresponding to the flags
5104
5105   """
5106   HPATH = "node-modify"
5107   HTYPE = constants.HTYPE_NODE
5108   REQ_BGL = False
5109   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5110   _F2R = {
5111     (True, False, False): _ROLE_CANDIDATE,
5112     (False, True, False): _ROLE_DRAINED,
5113     (False, False, True): _ROLE_OFFLINE,
5114     (False, False, False): _ROLE_REGULAR,
5115     }
5116   _R2F = dict((v, k) for k, v in _F2R.items())
5117   _FLAGS = ["master_candidate", "drained", "offline"]
5118
5119   def CheckArguments(self):
5120     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5121     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5122                 self.op.master_capable, self.op.vm_capable,
5123                 self.op.secondary_ip, self.op.ndparams]
5124     if all_mods.count(None) == len(all_mods):
5125       raise errors.OpPrereqError("Please pass at least one modification",
5126                                  errors.ECODE_INVAL)
5127     if all_mods.count(True) > 1:
5128       raise errors.OpPrereqError("Can't set the node into more than one"
5129                                  " state at the same time",
5130                                  errors.ECODE_INVAL)
5131
5132     # Boolean value that tells us whether we might be demoting from MC
5133     self.might_demote = (self.op.master_candidate == False or
5134                          self.op.offline == True or
5135                          self.op.drained == True or
5136                          self.op.master_capable == False)
5137
5138     if self.op.secondary_ip:
5139       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5140         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5141                                    " address" % self.op.secondary_ip,
5142                                    errors.ECODE_INVAL)
5143
5144     self.lock_all = self.op.auto_promote and self.might_demote
5145     self.lock_instances = self.op.secondary_ip is not None
5146
5147   def ExpandNames(self):
5148     if self.lock_all:
5149       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5150     else:
5151       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5152
5153     if self.lock_instances:
5154       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5155
5156   def DeclareLocks(self, level):
5157     # If we have locked all instances, before waiting to lock nodes, release
5158     # all the ones living on nodes unrelated to the current operation.
5159     if level == locking.LEVEL_NODE and self.lock_instances:
5160       self.affected_instances = []
5161       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5162         instances_keep = []
5163
5164         # Build list of instances to release
5165         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5166         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5167           if (instance.disk_template in constants.DTS_INT_MIRROR and
5168               self.op.node_name in instance.all_nodes):
5169             instances_keep.append(instance_name)
5170             self.affected_instances.append(instance)
5171
5172         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5173
5174         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5175                 set(instances_keep))
5176
5177   def BuildHooksEnv(self):
5178     """Build hooks env.
5179
5180     This runs on the master node.
5181
5182     """
5183     return {
5184       "OP_TARGET": self.op.node_name,
5185       "MASTER_CANDIDATE": str(self.op.master_candidate),
5186       "OFFLINE": str(self.op.offline),
5187       "DRAINED": str(self.op.drained),
5188       "MASTER_CAPABLE": str(self.op.master_capable),
5189       "VM_CAPABLE": str(self.op.vm_capable),
5190       }
5191
5192   def BuildHooksNodes(self):
5193     """Build hooks nodes.
5194
5195     """
5196     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5197     return (nl, nl)
5198
5199   def CheckPrereq(self):
5200     """Check prerequisites.
5201
5202     This only checks the instance list against the existing names.
5203
5204     """
5205     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5206
5207     if (self.op.master_candidate is not None or
5208         self.op.drained is not None or
5209         self.op.offline is not None):
5210       # we can't change the master's node flags
5211       if self.op.node_name == self.cfg.GetMasterNode():
5212         raise errors.OpPrereqError("The master role can be changed"
5213                                    " only via master-failover",
5214                                    errors.ECODE_INVAL)
5215
5216     if self.op.master_candidate and not node.master_capable:
5217       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5218                                  " it a master candidate" % node.name,
5219                                  errors.ECODE_STATE)
5220
5221     if self.op.vm_capable == False:
5222       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5223       if ipri or isec:
5224         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5225                                    " the vm_capable flag" % node.name,
5226                                    errors.ECODE_STATE)
5227
5228     if node.master_candidate and self.might_demote and not self.lock_all:
5229       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5230       # check if after removing the current node, we're missing master
5231       # candidates
5232       (mc_remaining, mc_should, _) = \
5233           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5234       if mc_remaining < mc_should:
5235         raise errors.OpPrereqError("Not enough master candidates, please"
5236                                    " pass auto promote option to allow"
5237                                    " promotion", errors.ECODE_STATE)
5238
5239     self.old_flags = old_flags = (node.master_candidate,
5240                                   node.drained, node.offline)
5241     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5242     self.old_role = old_role = self._F2R[old_flags]
5243
5244     # Check for ineffective changes
5245     for attr in self._FLAGS:
5246       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5247         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5248         setattr(self.op, attr, None)
5249
5250     # Past this point, any flag change to False means a transition
5251     # away from the respective state, as only real changes are kept
5252
5253     # TODO: We might query the real power state if it supports OOB
5254     if _SupportsOob(self.cfg, node):
5255       if self.op.offline is False and not (node.powered or
5256                                            self.op.powered == True):
5257         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5258                                     " offline status can be reset") %
5259                                    self.op.node_name)
5260     elif self.op.powered is not None:
5261       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5262                                   " as it does not support out-of-band"
5263                                   " handling") % self.op.node_name)
5264
5265     # If we're being deofflined/drained, we'll MC ourself if needed
5266     if (self.op.drained == False or self.op.offline == False or
5267         (self.op.master_capable and not node.master_capable)):
5268       if _DecideSelfPromotion(self):
5269         self.op.master_candidate = True
5270         self.LogInfo("Auto-promoting node to master candidate")
5271
5272     # If we're no longer master capable, we'll demote ourselves from MC
5273     if self.op.master_capable == False and node.master_candidate:
5274       self.LogInfo("Demoting from master candidate")
5275       self.op.master_candidate = False
5276
5277     # Compute new role
5278     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5279     if self.op.master_candidate:
5280       new_role = self._ROLE_CANDIDATE
5281     elif self.op.drained:
5282       new_role = self._ROLE_DRAINED
5283     elif self.op.offline:
5284       new_role = self._ROLE_OFFLINE
5285     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5286       # False is still in new flags, which means we're un-setting (the
5287       # only) True flag
5288       new_role = self._ROLE_REGULAR
5289     else: # no new flags, nothing, keep old role
5290       new_role = old_role
5291
5292     self.new_role = new_role
5293
5294     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5295       # Trying to transition out of offline status
5296       result = self.rpc.call_version([node.name])[node.name]
5297       if result.fail_msg:
5298         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5299                                    " to report its version: %s" %
5300                                    (node.name, result.fail_msg),
5301                                    errors.ECODE_STATE)
5302       else:
5303         self.LogWarning("Transitioning node from offline to online state"
5304                         " without using re-add. Please make sure the node"
5305                         " is healthy!")
5306
5307     if self.op.secondary_ip:
5308       # Ok even without locking, because this can't be changed by any LU
5309       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5310       master_singlehomed = master.secondary_ip == master.primary_ip
5311       if master_singlehomed and self.op.secondary_ip:
5312         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5313                                    " homed cluster", errors.ECODE_INVAL)
5314
5315       if node.offline:
5316         if self.affected_instances:
5317           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5318                                      " node has instances (%s) configured"
5319                                      " to use it" % self.affected_instances)
5320       else:
5321         # On online nodes, check that no instances are running, and that
5322         # the node has the new ip and we can reach it.
5323         for instance in self.affected_instances:
5324           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5325
5326         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5327         if master.name != node.name:
5328           # check reachability from master secondary ip to new secondary ip
5329           if not netutils.TcpPing(self.op.secondary_ip,
5330                                   constants.DEFAULT_NODED_PORT,
5331                                   source=master.secondary_ip):
5332             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5333                                        " based ping to node daemon port",
5334                                        errors.ECODE_ENVIRON)
5335
5336     if self.op.ndparams:
5337       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5338       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5339       self.new_ndparams = new_ndparams
5340
5341   def Exec(self, feedback_fn):
5342     """Modifies a node.
5343
5344     """
5345     node = self.node
5346     old_role = self.old_role
5347     new_role = self.new_role
5348
5349     result = []
5350
5351     if self.op.ndparams:
5352       node.ndparams = self.new_ndparams
5353
5354     if self.op.powered is not None:
5355       node.powered = self.op.powered
5356
5357     for attr in ["master_capable", "vm_capable"]:
5358       val = getattr(self.op, attr)
5359       if val is not None:
5360         setattr(node, attr, val)
5361         result.append((attr, str(val)))
5362
5363     if new_role != old_role:
5364       # Tell the node to demote itself, if no longer MC and not offline
5365       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5366         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5367         if msg:
5368           self.LogWarning("Node failed to demote itself: %s", msg)
5369
5370       new_flags = self._R2F[new_role]
5371       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5372         if of != nf:
5373           result.append((desc, str(nf)))
5374       (node.master_candidate, node.drained, node.offline) = new_flags
5375
5376       # we locked all nodes, we adjust the CP before updating this node
5377       if self.lock_all:
5378         _AdjustCandidatePool(self, [node.name])
5379
5380     if self.op.secondary_ip:
5381       node.secondary_ip = self.op.secondary_ip
5382       result.append(("secondary_ip", self.op.secondary_ip))
5383
5384     # this will trigger configuration file update, if needed
5385     self.cfg.Update(node, feedback_fn)
5386
5387     # this will trigger job queue propagation or cleanup if the mc
5388     # flag changed
5389     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5390       self.context.ReaddNode(node)
5391
5392     return result
5393
5394
5395 class LUNodePowercycle(NoHooksLU):
5396   """Powercycles a node.
5397
5398   """
5399   REQ_BGL = False
5400
5401   def CheckArguments(self):
5402     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5403     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5404       raise errors.OpPrereqError("The node is the master and the force"
5405                                  " parameter was not set",
5406                                  errors.ECODE_INVAL)
5407
5408   def ExpandNames(self):
5409     """Locking for PowercycleNode.
5410
5411     This is a last-resort option and shouldn't block on other
5412     jobs. Therefore, we grab no locks.
5413
5414     """
5415     self.needed_locks = {}
5416
5417   def Exec(self, feedback_fn):
5418     """Reboots a node.
5419
5420     """
5421     result = self.rpc.call_node_powercycle(self.op.node_name,
5422                                            self.cfg.GetHypervisorType())
5423     result.Raise("Failed to schedule the reboot")
5424     return result.payload
5425
5426
5427 class LUClusterQuery(NoHooksLU):
5428   """Query cluster configuration.
5429
5430   """
5431   REQ_BGL = False
5432
5433   def ExpandNames(self):
5434     self.needed_locks = {}
5435
5436   def Exec(self, feedback_fn):
5437     """Return cluster config.
5438
5439     """
5440     cluster = self.cfg.GetClusterInfo()
5441     os_hvp = {}
5442
5443     # Filter just for enabled hypervisors
5444     for os_name, hv_dict in cluster.os_hvp.items():
5445       os_hvp[os_name] = {}
5446       for hv_name, hv_params in hv_dict.items():
5447         if hv_name in cluster.enabled_hypervisors:
5448           os_hvp[os_name][hv_name] = hv_params
5449
5450     # Convert ip_family to ip_version
5451     primary_ip_version = constants.IP4_VERSION
5452     if cluster.primary_ip_family == netutils.IP6Address.family:
5453       primary_ip_version = constants.IP6_VERSION
5454
5455     result = {
5456       "software_version": constants.RELEASE_VERSION,
5457       "protocol_version": constants.PROTOCOL_VERSION,
5458       "config_version": constants.CONFIG_VERSION,
5459       "os_api_version": max(constants.OS_API_VERSIONS),
5460       "export_version": constants.EXPORT_VERSION,
5461       "architecture": (platform.architecture()[0], platform.machine()),
5462       "name": cluster.cluster_name,
5463       "master": cluster.master_node,
5464       "default_hypervisor": cluster.enabled_hypervisors[0],
5465       "enabled_hypervisors": cluster.enabled_hypervisors,
5466       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5467                         for hypervisor_name in cluster.enabled_hypervisors]),
5468       "os_hvp": os_hvp,
5469       "beparams": cluster.beparams,
5470       "osparams": cluster.osparams,
5471       "nicparams": cluster.nicparams,
5472       "ndparams": cluster.ndparams,
5473       "candidate_pool_size": cluster.candidate_pool_size,
5474       "master_netdev": cluster.master_netdev,
5475       "volume_group_name": cluster.volume_group_name,
5476       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5477       "file_storage_dir": cluster.file_storage_dir,
5478       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5479       "maintain_node_health": cluster.maintain_node_health,
5480       "ctime": cluster.ctime,
5481       "mtime": cluster.mtime,
5482       "uuid": cluster.uuid,
5483       "tags": list(cluster.GetTags()),
5484       "uid_pool": cluster.uid_pool,
5485       "default_iallocator": cluster.default_iallocator,
5486       "reserved_lvs": cluster.reserved_lvs,
5487       "primary_ip_version": primary_ip_version,
5488       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5489       "hidden_os": cluster.hidden_os,
5490       "blacklisted_os": cluster.blacklisted_os,
5491       }
5492
5493     return result
5494
5495
5496 class LUClusterConfigQuery(NoHooksLU):
5497   """Return configuration values.
5498
5499   """
5500   REQ_BGL = False
5501   _FIELDS_DYNAMIC = utils.FieldSet()
5502   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5503                                   "watcher_pause", "volume_group_name")
5504
5505   def CheckArguments(self):
5506     _CheckOutputFields(static=self._FIELDS_STATIC,
5507                        dynamic=self._FIELDS_DYNAMIC,
5508                        selected=self.op.output_fields)
5509
5510   def ExpandNames(self):
5511     self.needed_locks = {}
5512
5513   def Exec(self, feedback_fn):
5514     """Dump a representation of the cluster config to the standard output.
5515
5516     """
5517     values = []
5518     for field in self.op.output_fields:
5519       if field == "cluster_name":
5520         entry = self.cfg.GetClusterName()
5521       elif field == "master_node":
5522         entry = self.cfg.GetMasterNode()
5523       elif field == "drain_flag":
5524         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5525       elif field == "watcher_pause":
5526         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5527       elif field == "volume_group_name":
5528         entry = self.cfg.GetVGName()
5529       else:
5530         raise errors.ParameterError(field)
5531       values.append(entry)
5532     return values
5533
5534
5535 class LUInstanceActivateDisks(NoHooksLU):
5536   """Bring up an instance's disks.
5537
5538   """
5539   REQ_BGL = False
5540
5541   def ExpandNames(self):
5542     self._ExpandAndLockInstance()
5543     self.needed_locks[locking.LEVEL_NODE] = []
5544     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5545
5546   def DeclareLocks(self, level):
5547     if level == locking.LEVEL_NODE:
5548       self._LockInstancesNodes()
5549
5550   def CheckPrereq(self):
5551     """Check prerequisites.
5552
5553     This checks that the instance is in the cluster.
5554
5555     """
5556     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5557     assert self.instance is not None, \
5558       "Cannot retrieve locked instance %s" % self.op.instance_name
5559     _CheckNodeOnline(self, self.instance.primary_node)
5560
5561   def Exec(self, feedback_fn):
5562     """Activate the disks.
5563
5564     """
5565     disks_ok, disks_info = \
5566               _AssembleInstanceDisks(self, self.instance,
5567                                      ignore_size=self.op.ignore_size)
5568     if not disks_ok:
5569       raise errors.OpExecError("Cannot activate block devices")
5570
5571     return disks_info
5572
5573
5574 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5575                            ignore_size=False):
5576   """Prepare the block devices for an instance.
5577
5578   This sets up the block devices on all nodes.
5579
5580   @type lu: L{LogicalUnit}
5581   @param lu: the logical unit on whose behalf we execute
5582   @type instance: L{objects.Instance}
5583   @param instance: the instance for whose disks we assemble
5584   @type disks: list of L{objects.Disk} or None
5585   @param disks: which disks to assemble (or all, if None)
5586   @type ignore_secondaries: boolean
5587   @param ignore_secondaries: if true, errors on secondary nodes
5588       won't result in an error return from the function
5589   @type ignore_size: boolean
5590   @param ignore_size: if true, the current known size of the disk
5591       will not be used during the disk activation, useful for cases
5592       when the size is wrong
5593   @return: False if the operation failed, otherwise a list of
5594       (host, instance_visible_name, node_visible_name)
5595       with the mapping from node devices to instance devices
5596
5597   """
5598   device_info = []
5599   disks_ok = True
5600   iname = instance.name
5601   disks = _ExpandCheckDisks(instance, disks)
5602
5603   # With the two passes mechanism we try to reduce the window of
5604   # opportunity for the race condition of switching DRBD to primary
5605   # before handshaking occured, but we do not eliminate it
5606
5607   # The proper fix would be to wait (with some limits) until the
5608   # connection has been made and drbd transitions from WFConnection
5609   # into any other network-connected state (Connected, SyncTarget,
5610   # SyncSource, etc.)
5611
5612   # 1st pass, assemble on all nodes in secondary mode
5613   for idx, inst_disk in enumerate(disks):
5614     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5615       if ignore_size:
5616         node_disk = node_disk.Copy()
5617         node_disk.UnsetSize()
5618       lu.cfg.SetDiskID(node_disk, node)
5619       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5620       msg = result.fail_msg
5621       if msg:
5622         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5623                            " (is_primary=False, pass=1): %s",
5624                            inst_disk.iv_name, node, msg)
5625         if not ignore_secondaries:
5626           disks_ok = False
5627
5628   # FIXME: race condition on drbd migration to primary
5629
5630   # 2nd pass, do only the primary node
5631   for idx, inst_disk in enumerate(disks):
5632     dev_path = None
5633
5634     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5635       if node != instance.primary_node:
5636         continue
5637       if ignore_size:
5638         node_disk = node_disk.Copy()
5639         node_disk.UnsetSize()
5640       lu.cfg.SetDiskID(node_disk, node)
5641       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5642       msg = result.fail_msg
5643       if msg:
5644         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5645                            " (is_primary=True, pass=2): %s",
5646                            inst_disk.iv_name, node, msg)
5647         disks_ok = False
5648       else:
5649         dev_path = result.payload
5650
5651     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5652
5653   # leave the disks configured for the primary node
5654   # this is a workaround that would be fixed better by
5655   # improving the logical/physical id handling
5656   for disk in disks:
5657     lu.cfg.SetDiskID(disk, instance.primary_node)
5658
5659   return disks_ok, device_info
5660
5661
5662 def _StartInstanceDisks(lu, instance, force):
5663   """Start the disks of an instance.
5664
5665   """
5666   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5667                                            ignore_secondaries=force)
5668   if not disks_ok:
5669     _ShutdownInstanceDisks(lu, instance)
5670     if force is not None and not force:
5671       lu.proc.LogWarning("", hint="If the message above refers to a"
5672                          " secondary node,"
5673                          " you can retry the operation using '--force'.")
5674     raise errors.OpExecError("Disk consistency error")
5675
5676
5677 class LUInstanceDeactivateDisks(NoHooksLU):
5678   """Shutdown an instance's disks.
5679
5680   """
5681   REQ_BGL = False
5682
5683   def ExpandNames(self):
5684     self._ExpandAndLockInstance()
5685     self.needed_locks[locking.LEVEL_NODE] = []
5686     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5687
5688   def DeclareLocks(self, level):
5689     if level == locking.LEVEL_NODE:
5690       self._LockInstancesNodes()
5691
5692   def CheckPrereq(self):
5693     """Check prerequisites.
5694
5695     This checks that the instance is in the cluster.
5696
5697     """
5698     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5699     assert self.instance is not None, \
5700       "Cannot retrieve locked instance %s" % self.op.instance_name
5701
5702   def Exec(self, feedback_fn):
5703     """Deactivate the disks
5704
5705     """
5706     instance = self.instance
5707     if self.op.force:
5708       _ShutdownInstanceDisks(self, instance)
5709     else:
5710       _SafeShutdownInstanceDisks(self, instance)
5711
5712
5713 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5714   """Shutdown block devices of an instance.
5715
5716   This function checks if an instance is running, before calling
5717   _ShutdownInstanceDisks.
5718
5719   """
5720   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5721   _ShutdownInstanceDisks(lu, instance, disks=disks)
5722
5723
5724 def _ExpandCheckDisks(instance, disks):
5725   """Return the instance disks selected by the disks list
5726
5727   @type disks: list of L{objects.Disk} or None
5728   @param disks: selected disks
5729   @rtype: list of L{objects.Disk}
5730   @return: selected instance disks to act on
5731
5732   """
5733   if disks is None:
5734     return instance.disks
5735   else:
5736     if not set(disks).issubset(instance.disks):
5737       raise errors.ProgrammerError("Can only act on disks belonging to the"
5738                                    " target instance")
5739     return disks
5740
5741
5742 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5743   """Shutdown block devices of an instance.
5744
5745   This does the shutdown on all nodes of the instance.
5746
5747   If the ignore_primary is false, errors on the primary node are
5748   ignored.
5749
5750   """
5751   all_result = True
5752   disks = _ExpandCheckDisks(instance, disks)
5753
5754   for disk in disks:
5755     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5756       lu.cfg.SetDiskID(top_disk, node)
5757       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5758       msg = result.fail_msg
5759       if msg:
5760         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5761                       disk.iv_name, node, msg)
5762         if ((node == instance.primary_node and not ignore_primary) or
5763             (node != instance.primary_node and not result.offline)):
5764           all_result = False
5765   return all_result
5766
5767
5768 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5769   """Checks if a node has enough free memory.
5770
5771   This function check if a given node has the needed amount of free
5772   memory. In case the node has less memory or we cannot get the
5773   information from the node, this function raise an OpPrereqError
5774   exception.
5775
5776   @type lu: C{LogicalUnit}
5777   @param lu: a logical unit from which we get configuration data
5778   @type node: C{str}
5779   @param node: the node to check
5780   @type reason: C{str}
5781   @param reason: string to use in the error message
5782   @type requested: C{int}
5783   @param requested: the amount of memory in MiB to check for
5784   @type hypervisor_name: C{str}
5785   @param hypervisor_name: the hypervisor to ask for memory stats
5786   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5787       we cannot check the node
5788
5789   """
5790   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5791   nodeinfo[node].Raise("Can't get data from node %s" % node,
5792                        prereq=True, ecode=errors.ECODE_ENVIRON)
5793   free_mem = nodeinfo[node].payload.get("memory_free", None)
5794   if not isinstance(free_mem, int):
5795     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5796                                " was '%s'" % (node, free_mem),
5797                                errors.ECODE_ENVIRON)
5798   if requested > free_mem:
5799     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5800                                " needed %s MiB, available %s MiB" %
5801                                (node, reason, requested, free_mem),
5802                                errors.ECODE_NORES)
5803
5804
5805 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5806   """Checks if nodes have enough free disk space in the all VGs.
5807
5808   This function check if all given nodes have the needed amount of
5809   free disk. In case any node has less disk or we cannot get the
5810   information from the node, this function raise an OpPrereqError
5811   exception.
5812
5813   @type lu: C{LogicalUnit}
5814   @param lu: a logical unit from which we get configuration data
5815   @type nodenames: C{list}
5816   @param nodenames: the list of node names to check
5817   @type req_sizes: C{dict}
5818   @param req_sizes: the hash of vg and corresponding amount of disk in
5819       MiB to check for
5820   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5821       or we cannot check the node
5822
5823   """
5824   for vg, req_size in req_sizes.items():
5825     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5826
5827
5828 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5829   """Checks if nodes have enough free disk space in the specified VG.
5830
5831   This function check if all given nodes have the needed amount of
5832   free disk. In case any node has less disk or we cannot get the
5833   information from the node, this function raise an OpPrereqError
5834   exception.
5835
5836   @type lu: C{LogicalUnit}
5837   @param lu: a logical unit from which we get configuration data
5838   @type nodenames: C{list}
5839   @param nodenames: the list of node names to check
5840   @type vg: C{str}
5841   @param vg: the volume group to check
5842   @type requested: C{int}
5843   @param requested: the amount of disk in MiB to check for
5844   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5845       or we cannot check the node
5846
5847   """
5848   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5849   for node in nodenames:
5850     info = nodeinfo[node]
5851     info.Raise("Cannot get current information from node %s" % node,
5852                prereq=True, ecode=errors.ECODE_ENVIRON)
5853     vg_free = info.payload.get("vg_free", None)
5854     if not isinstance(vg_free, int):
5855       raise errors.OpPrereqError("Can't compute free disk space on node"
5856                                  " %s for vg %s, result was '%s'" %
5857                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5858     if requested > vg_free:
5859       raise errors.OpPrereqError("Not enough disk space on target node %s"
5860                                  " vg %s: required %d MiB, available %d MiB" %
5861                                  (node, vg, requested, vg_free),
5862                                  errors.ECODE_NORES)
5863
5864
5865 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5866   """Checks if nodes have enough physical CPUs
5867
5868   This function checks if all given nodes have the needed number of
5869   physical CPUs. In case any node has less CPUs or we cannot get the
5870   information from the node, this function raises an OpPrereqError
5871   exception.
5872
5873   @type lu: C{LogicalUnit}
5874   @param lu: a logical unit from which we get configuration data
5875   @type nodenames: C{list}
5876   @param nodenames: the list of node names to check
5877   @type requested: C{int}
5878   @param requested: the minimum acceptable number of physical CPUs
5879   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5880       or we cannot check the node
5881
5882   """
5883   nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5884   for node in nodenames:
5885     info = nodeinfo[node]
5886     info.Raise("Cannot get current information from node %s" % node,
5887                prereq=True, ecode=errors.ECODE_ENVIRON)
5888     num_cpus = info.payload.get("cpu_total", None)
5889     if not isinstance(num_cpus, int):
5890       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5891                                  " on node %s, result was '%s'" %
5892                                  (node, num_cpus), errors.ECODE_ENVIRON)
5893     if requested > num_cpus:
5894       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5895                                  "required" % (node, num_cpus, requested),
5896                                  errors.ECODE_NORES)
5897
5898
5899 class LUInstanceStartup(LogicalUnit):
5900   """Starts an instance.
5901
5902   """
5903   HPATH = "instance-start"
5904   HTYPE = constants.HTYPE_INSTANCE
5905   REQ_BGL = False
5906
5907   def CheckArguments(self):
5908     # extra beparams
5909     if self.op.beparams:
5910       # fill the beparams dict
5911       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5912
5913   def ExpandNames(self):
5914     self._ExpandAndLockInstance()
5915
5916   def BuildHooksEnv(self):
5917     """Build hooks env.
5918
5919     This runs on master, primary and secondary nodes of the instance.
5920
5921     """
5922     env = {
5923       "FORCE": self.op.force,
5924       }
5925
5926     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5927
5928     return env
5929
5930   def BuildHooksNodes(self):
5931     """Build hooks nodes.
5932
5933     """
5934     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5935     return (nl, nl)
5936
5937   def CheckPrereq(self):
5938     """Check prerequisites.
5939
5940     This checks that the instance is in the cluster.
5941
5942     """
5943     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5944     assert self.instance is not None, \
5945       "Cannot retrieve locked instance %s" % self.op.instance_name
5946
5947     # extra hvparams
5948     if self.op.hvparams:
5949       # check hypervisor parameter syntax (locally)
5950       cluster = self.cfg.GetClusterInfo()
5951       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5952       filled_hvp = cluster.FillHV(instance)
5953       filled_hvp.update(self.op.hvparams)
5954       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5955       hv_type.CheckParameterSyntax(filled_hvp)
5956       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5957
5958     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5959
5960     if self.primary_offline and self.op.ignore_offline_nodes:
5961       self.proc.LogWarning("Ignoring offline primary node")
5962
5963       if self.op.hvparams or self.op.beparams:
5964         self.proc.LogWarning("Overridden parameters are ignored")
5965     else:
5966       _CheckNodeOnline(self, instance.primary_node)
5967
5968       bep = self.cfg.GetClusterInfo().FillBE(instance)
5969
5970       # check bridges existence
5971       _CheckInstanceBridgesExist(self, instance)
5972
5973       remote_info = self.rpc.call_instance_info(instance.primary_node,
5974                                                 instance.name,
5975                                                 instance.hypervisor)
5976       remote_info.Raise("Error checking node %s" % instance.primary_node,
5977                         prereq=True, ecode=errors.ECODE_ENVIRON)
5978       if not remote_info.payload: # not running already
5979         _CheckNodeFreeMemory(self, instance.primary_node,
5980                              "starting instance %s" % instance.name,
5981                              bep[constants.BE_MEMORY], instance.hypervisor)
5982
5983   def Exec(self, feedback_fn):
5984     """Start the instance.
5985
5986     """
5987     instance = self.instance
5988     force = self.op.force
5989
5990     if not self.op.no_remember:
5991       self.cfg.MarkInstanceUp(instance.name)
5992
5993     if self.primary_offline:
5994       assert self.op.ignore_offline_nodes
5995       self.proc.LogInfo("Primary node offline, marked instance as started")
5996     else:
5997       node_current = instance.primary_node
5998
5999       _StartInstanceDisks(self, instance, force)
6000
6001       result = self.rpc.call_instance_start(node_current, instance,
6002                                             self.op.hvparams, self.op.beparams,
6003                                             self.op.startup_paused)
6004       msg = result.fail_msg
6005       if msg:
6006         _ShutdownInstanceDisks(self, instance)
6007         raise errors.OpExecError("Could not start instance: %s" % msg)
6008
6009
6010 class LUInstanceReboot(LogicalUnit):
6011   """Reboot an instance.
6012
6013   """
6014   HPATH = "instance-reboot"
6015   HTYPE = constants.HTYPE_INSTANCE
6016   REQ_BGL = False
6017
6018   def ExpandNames(self):
6019     self._ExpandAndLockInstance()
6020
6021   def BuildHooksEnv(self):
6022     """Build hooks env.
6023
6024     This runs on master, primary and secondary nodes of the instance.
6025
6026     """
6027     env = {
6028       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6029       "REBOOT_TYPE": self.op.reboot_type,
6030       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6031       }
6032
6033     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6034
6035     return env
6036
6037   def BuildHooksNodes(self):
6038     """Build hooks nodes.
6039
6040     """
6041     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6042     return (nl, nl)
6043
6044   def CheckPrereq(self):
6045     """Check prerequisites.
6046
6047     This checks that the instance is in the cluster.
6048
6049     """
6050     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6051     assert self.instance is not None, \
6052       "Cannot retrieve locked instance %s" % self.op.instance_name
6053
6054     _CheckNodeOnline(self, instance.primary_node)
6055
6056     # check bridges existence
6057     _CheckInstanceBridgesExist(self, instance)
6058
6059   def Exec(self, feedback_fn):
6060     """Reboot the instance.
6061
6062     """
6063     instance = self.instance
6064     ignore_secondaries = self.op.ignore_secondaries
6065     reboot_type = self.op.reboot_type
6066
6067     remote_info = self.rpc.call_instance_info(instance.primary_node,
6068                                               instance.name,
6069                                               instance.hypervisor)
6070     remote_info.Raise("Error checking node %s" % instance.primary_node)
6071     instance_running = bool(remote_info.payload)
6072
6073     node_current = instance.primary_node
6074
6075     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6076                                             constants.INSTANCE_REBOOT_HARD]:
6077       for disk in instance.disks:
6078         self.cfg.SetDiskID(disk, node_current)
6079       result = self.rpc.call_instance_reboot(node_current, instance,
6080                                              reboot_type,
6081                                              self.op.shutdown_timeout)
6082       result.Raise("Could not reboot instance")
6083     else:
6084       if instance_running:
6085         result = self.rpc.call_instance_shutdown(node_current, instance,
6086                                                  self.op.shutdown_timeout)
6087         result.Raise("Could not shutdown instance for full reboot")
6088         _ShutdownInstanceDisks(self, instance)
6089       else:
6090         self.LogInfo("Instance %s was already stopped, starting now",
6091                      instance.name)
6092       _StartInstanceDisks(self, instance, ignore_secondaries)
6093       result = self.rpc.call_instance_start(node_current, instance,
6094                                             None, None, False)
6095       msg = result.fail_msg
6096       if msg:
6097         _ShutdownInstanceDisks(self, instance)
6098         raise errors.OpExecError("Could not start instance for"
6099                                  " full reboot: %s" % msg)
6100
6101     self.cfg.MarkInstanceUp(instance.name)
6102
6103
6104 class LUInstanceShutdown(LogicalUnit):
6105   """Shutdown an instance.
6106
6107   """
6108   HPATH = "instance-stop"
6109   HTYPE = constants.HTYPE_INSTANCE
6110   REQ_BGL = False
6111
6112   def ExpandNames(self):
6113     self._ExpandAndLockInstance()
6114
6115   def BuildHooksEnv(self):
6116     """Build hooks env.
6117
6118     This runs on master, primary and secondary nodes of the instance.
6119
6120     """
6121     env = _BuildInstanceHookEnvByObject(self, self.instance)
6122     env["TIMEOUT"] = self.op.timeout
6123     return env
6124
6125   def BuildHooksNodes(self):
6126     """Build hooks nodes.
6127
6128     """
6129     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6130     return (nl, nl)
6131
6132   def CheckPrereq(self):
6133     """Check prerequisites.
6134
6135     This checks that the instance is in the cluster.
6136
6137     """
6138     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6139     assert self.instance is not None, \
6140       "Cannot retrieve locked instance %s" % self.op.instance_name
6141
6142     self.primary_offline = \
6143       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6144
6145     if self.primary_offline and self.op.ignore_offline_nodes:
6146       self.proc.LogWarning("Ignoring offline primary node")
6147     else:
6148       _CheckNodeOnline(self, self.instance.primary_node)
6149
6150   def Exec(self, feedback_fn):
6151     """Shutdown the instance.
6152
6153     """
6154     instance = self.instance
6155     node_current = instance.primary_node
6156     timeout = self.op.timeout
6157
6158     if not self.op.no_remember:
6159       self.cfg.MarkInstanceDown(instance.name)
6160
6161     if self.primary_offline:
6162       assert self.op.ignore_offline_nodes
6163       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6164     else:
6165       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6166       msg = result.fail_msg
6167       if msg:
6168         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6169
6170       _ShutdownInstanceDisks(self, instance)
6171
6172
6173 class LUInstanceReinstall(LogicalUnit):
6174   """Reinstall an instance.
6175
6176   """
6177   HPATH = "instance-reinstall"
6178   HTYPE = constants.HTYPE_INSTANCE
6179   REQ_BGL = False
6180
6181   def ExpandNames(self):
6182     self._ExpandAndLockInstance()
6183
6184   def BuildHooksEnv(self):
6185     """Build hooks env.
6186
6187     This runs on master, primary and secondary nodes of the instance.
6188
6189     """
6190     return _BuildInstanceHookEnvByObject(self, self.instance)
6191
6192   def BuildHooksNodes(self):
6193     """Build hooks nodes.
6194
6195     """
6196     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6197     return (nl, nl)
6198
6199   def CheckPrereq(self):
6200     """Check prerequisites.
6201
6202     This checks that the instance is in the cluster and is not running.
6203
6204     """
6205     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6206     assert instance is not None, \
6207       "Cannot retrieve locked instance %s" % self.op.instance_name
6208     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6209                      " offline, cannot reinstall")
6210     for node in instance.secondary_nodes:
6211       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6212                        " cannot reinstall")
6213
6214     if instance.disk_template == constants.DT_DISKLESS:
6215       raise errors.OpPrereqError("Instance '%s' has no disks" %
6216                                  self.op.instance_name,
6217                                  errors.ECODE_INVAL)
6218     _CheckInstanceDown(self, instance, "cannot reinstall")
6219
6220     if self.op.os_type is not None:
6221       # OS verification
6222       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6223       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6224       instance_os = self.op.os_type
6225     else:
6226       instance_os = instance.os
6227
6228     nodelist = list(instance.all_nodes)
6229
6230     if self.op.osparams:
6231       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6232       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6233       self.os_inst = i_osdict # the new dict (without defaults)
6234     else:
6235       self.os_inst = None
6236
6237     self.instance = instance
6238
6239   def Exec(self, feedback_fn):
6240     """Reinstall the instance.
6241
6242     """
6243     inst = self.instance
6244
6245     if self.op.os_type is not None:
6246       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6247       inst.os = self.op.os_type
6248       # Write to configuration
6249       self.cfg.Update(inst, feedback_fn)
6250
6251     _StartInstanceDisks(self, inst, None)
6252     try:
6253       feedback_fn("Running the instance OS create scripts...")
6254       # FIXME: pass debug option from opcode to backend
6255       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6256                                              self.op.debug_level,
6257                                              osparams=self.os_inst)
6258       result.Raise("Could not install OS for instance %s on node %s" %
6259                    (inst.name, inst.primary_node))
6260     finally:
6261       _ShutdownInstanceDisks(self, inst)
6262
6263
6264 class LUInstanceRecreateDisks(LogicalUnit):
6265   """Recreate an instance's missing disks.
6266
6267   """
6268   HPATH = "instance-recreate-disks"
6269   HTYPE = constants.HTYPE_INSTANCE
6270   REQ_BGL = False
6271
6272   def CheckArguments(self):
6273     # normalise the disk list
6274     self.op.disks = sorted(frozenset(self.op.disks))
6275
6276   def ExpandNames(self):
6277     self._ExpandAndLockInstance()
6278     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6279     if self.op.nodes:
6280       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6281       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6282     else:
6283       self.needed_locks[locking.LEVEL_NODE] = []
6284
6285   def DeclareLocks(self, level):
6286     if level == locking.LEVEL_NODE:
6287       # if we replace the nodes, we only need to lock the old primary,
6288       # otherwise we need to lock all nodes for disk re-creation
6289       primary_only = bool(self.op.nodes)
6290       self._LockInstancesNodes(primary_only=primary_only)
6291
6292   def BuildHooksEnv(self):
6293     """Build hooks env.
6294
6295     This runs on master, primary and secondary nodes of the instance.
6296
6297     """
6298     return _BuildInstanceHookEnvByObject(self, self.instance)
6299
6300   def BuildHooksNodes(self):
6301     """Build hooks nodes.
6302
6303     """
6304     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6305     return (nl, nl)
6306
6307   def CheckPrereq(self):
6308     """Check prerequisites.
6309
6310     This checks that the instance is in the cluster and is not running.
6311
6312     """
6313     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6314     assert instance is not None, \
6315       "Cannot retrieve locked instance %s" % self.op.instance_name
6316     if self.op.nodes:
6317       if len(self.op.nodes) != len(instance.all_nodes):
6318         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6319                                    " %d replacement nodes were specified" %
6320                                    (instance.name, len(instance.all_nodes),
6321                                     len(self.op.nodes)),
6322                                    errors.ECODE_INVAL)
6323       assert instance.disk_template != constants.DT_DRBD8 or \
6324           len(self.op.nodes) == 2
6325       assert instance.disk_template != constants.DT_PLAIN or \
6326           len(self.op.nodes) == 1
6327       primary_node = self.op.nodes[0]
6328     else:
6329       primary_node = instance.primary_node
6330     _CheckNodeOnline(self, primary_node)
6331
6332     if instance.disk_template == constants.DT_DISKLESS:
6333       raise errors.OpPrereqError("Instance '%s' has no disks" %
6334                                  self.op.instance_name, errors.ECODE_INVAL)
6335     # if we replace nodes *and* the old primary is offline, we don't
6336     # check
6337     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6338     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6339     if not (self.op.nodes and old_pnode.offline):
6340       _CheckInstanceDown(self, instance, "cannot recreate disks")
6341
6342     if not self.op.disks:
6343       self.op.disks = range(len(instance.disks))
6344     else:
6345       for idx in self.op.disks:
6346         if idx >= len(instance.disks):
6347           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6348                                      errors.ECODE_INVAL)
6349     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6350       raise errors.OpPrereqError("Can't recreate disks partially and"
6351                                  " change the nodes at the same time",
6352                                  errors.ECODE_INVAL)
6353     self.instance = instance
6354
6355   def Exec(self, feedback_fn):
6356     """Recreate the disks.
6357
6358     """
6359     instance = self.instance
6360
6361     to_skip = []
6362     mods = [] # keeps track of needed logical_id changes
6363
6364     for idx, disk in enumerate(instance.disks):
6365       if idx not in self.op.disks: # disk idx has not been passed in
6366         to_skip.append(idx)
6367         continue
6368       # update secondaries for disks, if needed
6369       if self.op.nodes:
6370         if disk.dev_type == constants.LD_DRBD8:
6371           # need to update the nodes and minors
6372           assert len(self.op.nodes) == 2
6373           assert len(disk.logical_id) == 6 # otherwise disk internals
6374                                            # have changed
6375           (_, _, old_port, _, _, old_secret) = disk.logical_id
6376           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6377           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6378                     new_minors[0], new_minors[1], old_secret)
6379           assert len(disk.logical_id) == len(new_id)
6380           mods.append((idx, new_id))
6381
6382     # now that we have passed all asserts above, we can apply the mods
6383     # in a single run (to avoid partial changes)
6384     for idx, new_id in mods:
6385       instance.disks[idx].logical_id = new_id
6386
6387     # change primary node, if needed
6388     if self.op.nodes:
6389       instance.primary_node = self.op.nodes[0]
6390       self.LogWarning("Changing the instance's nodes, you will have to"
6391                       " remove any disks left on the older nodes manually")
6392
6393     if self.op.nodes:
6394       self.cfg.Update(instance, feedback_fn)
6395
6396     _CreateDisks(self, instance, to_skip=to_skip)
6397
6398
6399 class LUInstanceRename(LogicalUnit):
6400   """Rename an instance.
6401
6402   """
6403   HPATH = "instance-rename"
6404   HTYPE = constants.HTYPE_INSTANCE
6405
6406   def CheckArguments(self):
6407     """Check arguments.
6408
6409     """
6410     if self.op.ip_check and not self.op.name_check:
6411       # TODO: make the ip check more flexible and not depend on the name check
6412       raise errors.OpPrereqError("IP address check requires a name check",
6413                                  errors.ECODE_INVAL)
6414
6415   def BuildHooksEnv(self):
6416     """Build hooks env.
6417
6418     This runs on master, primary and secondary nodes of the instance.
6419
6420     """
6421     env = _BuildInstanceHookEnvByObject(self, self.instance)
6422     env["INSTANCE_NEW_NAME"] = self.op.new_name
6423     return env
6424
6425   def BuildHooksNodes(self):
6426     """Build hooks nodes.
6427
6428     """
6429     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6430     return (nl, nl)
6431
6432   def CheckPrereq(self):
6433     """Check prerequisites.
6434
6435     This checks that the instance is in the cluster and is not running.
6436
6437     """
6438     self.op.instance_name = _ExpandInstanceName(self.cfg,
6439                                                 self.op.instance_name)
6440     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6441     assert instance is not None
6442     _CheckNodeOnline(self, instance.primary_node)
6443     _CheckInstanceDown(self, instance, "cannot rename")
6444     self.instance = instance
6445
6446     new_name = self.op.new_name
6447     if self.op.name_check:
6448       hostname = netutils.GetHostname(name=new_name)
6449       if hostname != new_name:
6450         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6451                      hostname.name)
6452       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6453         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6454                                     " same as given hostname '%s'") %
6455                                     (hostname.name, self.op.new_name),
6456                                     errors.ECODE_INVAL)
6457       new_name = self.op.new_name = hostname.name
6458       if (self.op.ip_check and
6459           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6460         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6461                                    (hostname.ip, new_name),
6462                                    errors.ECODE_NOTUNIQUE)
6463
6464     instance_list = self.cfg.GetInstanceList()
6465     if new_name in instance_list and new_name != instance.name:
6466       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6467                                  new_name, errors.ECODE_EXISTS)
6468
6469   def Exec(self, feedback_fn):
6470     """Rename the instance.
6471
6472     """
6473     inst = self.instance
6474     old_name = inst.name
6475
6476     rename_file_storage = False
6477     if (inst.disk_template in constants.DTS_FILEBASED and
6478         self.op.new_name != inst.name):
6479       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6480       rename_file_storage = True
6481
6482     self.cfg.RenameInstance(inst.name, self.op.new_name)
6483     # Change the instance lock. This is definitely safe while we hold the BGL.
6484     # Otherwise the new lock would have to be added in acquired mode.
6485     assert self.REQ_BGL
6486     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6487     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6488
6489     # re-read the instance from the configuration after rename
6490     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6491
6492     if rename_file_storage:
6493       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6494       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6495                                                      old_file_storage_dir,
6496                                                      new_file_storage_dir)
6497       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6498                    " (but the instance has been renamed in Ganeti)" %
6499                    (inst.primary_node, old_file_storage_dir,
6500                     new_file_storage_dir))
6501
6502     _StartInstanceDisks(self, inst, None)
6503     try:
6504       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6505                                                  old_name, self.op.debug_level)
6506       msg = result.fail_msg
6507       if msg:
6508         msg = ("Could not run OS rename script for instance %s on node %s"
6509                " (but the instance has been renamed in Ganeti): %s" %
6510                (inst.name, inst.primary_node, msg))
6511         self.proc.LogWarning(msg)
6512     finally:
6513       _ShutdownInstanceDisks(self, inst)
6514
6515     return inst.name
6516
6517
6518 class LUInstanceRemove(LogicalUnit):
6519   """Remove an instance.
6520
6521   """
6522   HPATH = "instance-remove"
6523   HTYPE = constants.HTYPE_INSTANCE
6524   REQ_BGL = False
6525
6526   def ExpandNames(self):
6527     self._ExpandAndLockInstance()
6528     self.needed_locks[locking.LEVEL_NODE] = []
6529     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6530
6531   def DeclareLocks(self, level):
6532     if level == locking.LEVEL_NODE:
6533       self._LockInstancesNodes()
6534
6535   def BuildHooksEnv(self):
6536     """Build hooks env.
6537
6538     This runs on master, primary and secondary nodes of the instance.
6539
6540     """
6541     env = _BuildInstanceHookEnvByObject(self, self.instance)
6542     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6543     return env
6544
6545   def BuildHooksNodes(self):
6546     """Build hooks nodes.
6547
6548     """
6549     nl = [self.cfg.GetMasterNode()]
6550     nl_post = list(self.instance.all_nodes) + nl
6551     return (nl, nl_post)
6552
6553   def CheckPrereq(self):
6554     """Check prerequisites.
6555
6556     This checks that the instance is in the cluster.
6557
6558     """
6559     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6560     assert self.instance is not None, \
6561       "Cannot retrieve locked instance %s" % self.op.instance_name
6562
6563   def Exec(self, feedback_fn):
6564     """Remove the instance.
6565
6566     """
6567     instance = self.instance
6568     logging.info("Shutting down instance %s on node %s",
6569                  instance.name, instance.primary_node)
6570
6571     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6572                                              self.op.shutdown_timeout)
6573     msg = result.fail_msg
6574     if msg:
6575       if self.op.ignore_failures:
6576         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6577       else:
6578         raise errors.OpExecError("Could not shutdown instance %s on"
6579                                  " node %s: %s" %
6580                                  (instance.name, instance.primary_node, msg))
6581
6582     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6583
6584
6585 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6586   """Utility function to remove an instance.
6587
6588   """
6589   logging.info("Removing block devices for instance %s", instance.name)
6590
6591   if not _RemoveDisks(lu, instance):
6592     if not ignore_failures:
6593       raise errors.OpExecError("Can't remove instance's disks")
6594     feedback_fn("Warning: can't remove instance's disks")
6595
6596   logging.info("Removing instance %s out of cluster config", instance.name)
6597
6598   lu.cfg.RemoveInstance(instance.name)
6599
6600   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6601     "Instance lock removal conflict"
6602
6603   # Remove lock for the instance
6604   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6605
6606
6607 class LUInstanceQuery(NoHooksLU):
6608   """Logical unit for querying instances.
6609
6610   """
6611   # pylint: disable=W0142
6612   REQ_BGL = False
6613
6614   def CheckArguments(self):
6615     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6616                              self.op.output_fields, self.op.use_locking)
6617
6618   def ExpandNames(self):
6619     self.iq.ExpandNames(self)
6620
6621   def DeclareLocks(self, level):
6622     self.iq.DeclareLocks(self, level)
6623
6624   def Exec(self, feedback_fn):
6625     return self.iq.OldStyleQuery(self)
6626
6627
6628 class LUInstanceFailover(LogicalUnit):
6629   """Failover an instance.
6630
6631   """
6632   HPATH = "instance-failover"
6633   HTYPE = constants.HTYPE_INSTANCE
6634   REQ_BGL = False
6635
6636   def CheckArguments(self):
6637     """Check the arguments.
6638
6639     """
6640     self.iallocator = getattr(self.op, "iallocator", None)
6641     self.target_node = getattr(self.op, "target_node", None)
6642
6643   def ExpandNames(self):
6644     self._ExpandAndLockInstance()
6645
6646     if self.op.target_node is not None:
6647       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6648
6649     self.needed_locks[locking.LEVEL_NODE] = []
6650     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6651
6652     ignore_consistency = self.op.ignore_consistency
6653     shutdown_timeout = self.op.shutdown_timeout
6654     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6655                                        cleanup=False,
6656                                        failover=True,
6657                                        ignore_consistency=ignore_consistency,
6658                                        shutdown_timeout=shutdown_timeout)
6659     self.tasklets = [self._migrater]
6660
6661   def DeclareLocks(self, level):
6662     if level == locking.LEVEL_NODE:
6663       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6664       if instance.disk_template in constants.DTS_EXT_MIRROR:
6665         if self.op.target_node is None:
6666           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6667         else:
6668           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6669                                                    self.op.target_node]
6670         del self.recalculate_locks[locking.LEVEL_NODE]
6671       else:
6672         self._LockInstancesNodes()
6673
6674   def BuildHooksEnv(self):
6675     """Build hooks env.
6676
6677     This runs on master, primary and secondary nodes of the instance.
6678
6679     """
6680     instance = self._migrater.instance
6681     source_node = instance.primary_node
6682     target_node = self.op.target_node
6683     env = {
6684       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6685       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6686       "OLD_PRIMARY": source_node,
6687       "NEW_PRIMARY": target_node,
6688       }
6689
6690     if instance.disk_template in constants.DTS_INT_MIRROR:
6691       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6692       env["NEW_SECONDARY"] = source_node
6693     else:
6694       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6695
6696     env.update(_BuildInstanceHookEnvByObject(self, instance))
6697
6698     return env
6699
6700   def BuildHooksNodes(self):
6701     """Build hooks nodes.
6702
6703     """
6704     instance = self._migrater.instance
6705     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6706     return (nl, nl + [instance.primary_node])
6707
6708
6709 class LUInstanceMigrate(LogicalUnit):
6710   """Migrate an instance.
6711
6712   This is migration without shutting down, compared to the failover,
6713   which is done with shutdown.
6714
6715   """
6716   HPATH = "instance-migrate"
6717   HTYPE = constants.HTYPE_INSTANCE
6718   REQ_BGL = False
6719
6720   def ExpandNames(self):
6721     self._ExpandAndLockInstance()
6722
6723     if self.op.target_node is not None:
6724       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6725
6726     self.needed_locks[locking.LEVEL_NODE] = []
6727     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6728
6729     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6730                                        cleanup=self.op.cleanup,
6731                                        failover=False,
6732                                        fallback=self.op.allow_failover)
6733     self.tasklets = [self._migrater]
6734
6735   def DeclareLocks(self, level):
6736     if level == locking.LEVEL_NODE:
6737       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6738       if instance.disk_template in constants.DTS_EXT_MIRROR:
6739         if self.op.target_node is None:
6740           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6741         else:
6742           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6743                                                    self.op.target_node]
6744         del self.recalculate_locks[locking.LEVEL_NODE]
6745       else:
6746         self._LockInstancesNodes()
6747
6748   def BuildHooksEnv(self):
6749     """Build hooks env.
6750
6751     This runs on master, primary and secondary nodes of the instance.
6752
6753     """
6754     instance = self._migrater.instance
6755     source_node = instance.primary_node
6756     target_node = self.op.target_node
6757     env = _BuildInstanceHookEnvByObject(self, instance)
6758     env.update({
6759       "MIGRATE_LIVE": self._migrater.live,
6760       "MIGRATE_CLEANUP": self.op.cleanup,
6761       "OLD_PRIMARY": source_node,
6762       "NEW_PRIMARY": target_node,
6763       })
6764
6765     if instance.disk_template in constants.DTS_INT_MIRROR:
6766       env["OLD_SECONDARY"] = target_node
6767       env["NEW_SECONDARY"] = source_node
6768     else:
6769       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6770
6771     return env
6772
6773   def BuildHooksNodes(self):
6774     """Build hooks nodes.
6775
6776     """
6777     instance = self._migrater.instance
6778     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6779     return (nl, nl + [instance.primary_node])
6780
6781
6782 class LUInstanceMove(LogicalUnit):
6783   """Move an instance by data-copying.
6784
6785   """
6786   HPATH = "instance-move"
6787   HTYPE = constants.HTYPE_INSTANCE
6788   REQ_BGL = False
6789
6790   def ExpandNames(self):
6791     self._ExpandAndLockInstance()
6792     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6793     self.op.target_node = target_node
6794     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6795     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6796
6797   def DeclareLocks(self, level):
6798     if level == locking.LEVEL_NODE:
6799       self._LockInstancesNodes(primary_only=True)
6800
6801   def BuildHooksEnv(self):
6802     """Build hooks env.
6803
6804     This runs on master, primary and secondary nodes of the instance.
6805
6806     """
6807     env = {
6808       "TARGET_NODE": self.op.target_node,
6809       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6810       }
6811     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6812     return env
6813
6814   def BuildHooksNodes(self):
6815     """Build hooks nodes.
6816
6817     """
6818     nl = [
6819       self.cfg.GetMasterNode(),
6820       self.instance.primary_node,
6821       self.op.target_node,
6822       ]
6823     return (nl, nl)
6824
6825   def CheckPrereq(self):
6826     """Check prerequisites.
6827
6828     This checks that the instance is in the cluster.
6829
6830     """
6831     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6832     assert self.instance is not None, \
6833       "Cannot retrieve locked instance %s" % self.op.instance_name
6834
6835     node = self.cfg.GetNodeInfo(self.op.target_node)
6836     assert node is not None, \
6837       "Cannot retrieve locked node %s" % self.op.target_node
6838
6839     self.target_node = target_node = node.name
6840
6841     if target_node == instance.primary_node:
6842       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6843                                  (instance.name, target_node),
6844                                  errors.ECODE_STATE)
6845
6846     bep = self.cfg.GetClusterInfo().FillBE(instance)
6847
6848     for idx, dsk in enumerate(instance.disks):
6849       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6850         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6851                                    " cannot copy" % idx, errors.ECODE_STATE)
6852
6853     _CheckNodeOnline(self, target_node)
6854     _CheckNodeNotDrained(self, target_node)
6855     _CheckNodeVmCapable(self, target_node)
6856
6857     if instance.admin_up:
6858       # check memory requirements on the secondary node
6859       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6860                            instance.name, bep[constants.BE_MEMORY],
6861                            instance.hypervisor)
6862     else:
6863       self.LogInfo("Not checking memory on the secondary node as"
6864                    " instance will not be started")
6865
6866     # check bridge existance
6867     _CheckInstanceBridgesExist(self, instance, node=target_node)
6868
6869   def Exec(self, feedback_fn):
6870     """Move an instance.
6871
6872     The move is done by shutting it down on its present node, copying
6873     the data over (slow) and starting it on the new node.
6874
6875     """
6876     instance = self.instance
6877
6878     source_node = instance.primary_node
6879     target_node = self.target_node
6880
6881     self.LogInfo("Shutting down instance %s on source node %s",
6882                  instance.name, source_node)
6883
6884     result = self.rpc.call_instance_shutdown(source_node, instance,
6885                                              self.op.shutdown_timeout)
6886     msg = result.fail_msg
6887     if msg:
6888       if self.op.ignore_consistency:
6889         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6890                              " Proceeding anyway. Please make sure node"
6891                              " %s is down. Error details: %s",
6892                              instance.name, source_node, source_node, msg)
6893       else:
6894         raise errors.OpExecError("Could not shutdown instance %s on"
6895                                  " node %s: %s" %
6896                                  (instance.name, source_node, msg))
6897
6898     # create the target disks
6899     try:
6900       _CreateDisks(self, instance, target_node=target_node)
6901     except errors.OpExecError:
6902       self.LogWarning("Device creation failed, reverting...")
6903       try:
6904         _RemoveDisks(self, instance, target_node=target_node)
6905       finally:
6906         self.cfg.ReleaseDRBDMinors(instance.name)
6907         raise
6908
6909     cluster_name = self.cfg.GetClusterInfo().cluster_name
6910
6911     errs = []
6912     # activate, get path, copy the data over
6913     for idx, disk in enumerate(instance.disks):
6914       self.LogInfo("Copying data for disk %d", idx)
6915       result = self.rpc.call_blockdev_assemble(target_node, disk,
6916                                                instance.name, True, idx)
6917       if result.fail_msg:
6918         self.LogWarning("Can't assemble newly created disk %d: %s",
6919                         idx, result.fail_msg)
6920         errs.append(result.fail_msg)
6921         break
6922       dev_path = result.payload
6923       result = self.rpc.call_blockdev_export(source_node, disk,
6924                                              target_node, dev_path,
6925                                              cluster_name)
6926       if result.fail_msg:
6927         self.LogWarning("Can't copy data over for disk %d: %s",
6928                         idx, result.fail_msg)
6929         errs.append(result.fail_msg)
6930         break
6931
6932     if errs:
6933       self.LogWarning("Some disks failed to copy, aborting")
6934       try:
6935         _RemoveDisks(self, instance, target_node=target_node)
6936       finally:
6937         self.cfg.ReleaseDRBDMinors(instance.name)
6938         raise errors.OpExecError("Errors during disk copy: %s" %
6939                                  (",".join(errs),))
6940
6941     instance.primary_node = target_node
6942     self.cfg.Update(instance, feedback_fn)
6943
6944     self.LogInfo("Removing the disks on the original node")
6945     _RemoveDisks(self, instance, target_node=source_node)
6946
6947     # Only start the instance if it's marked as up
6948     if instance.admin_up:
6949       self.LogInfo("Starting instance %s on node %s",
6950                    instance.name, target_node)
6951
6952       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6953                                            ignore_secondaries=True)
6954       if not disks_ok:
6955         _ShutdownInstanceDisks(self, instance)
6956         raise errors.OpExecError("Can't activate the instance's disks")
6957
6958       result = self.rpc.call_instance_start(target_node, instance,
6959                                             None, None, False)
6960       msg = result.fail_msg
6961       if msg:
6962         _ShutdownInstanceDisks(self, instance)
6963         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6964                                  (instance.name, target_node, msg))
6965
6966
6967 class LUNodeMigrate(LogicalUnit):
6968   """Migrate all instances from a node.
6969
6970   """
6971   HPATH = "node-migrate"
6972   HTYPE = constants.HTYPE_NODE
6973   REQ_BGL = False
6974
6975   def CheckArguments(self):
6976     pass
6977
6978   def ExpandNames(self):
6979     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6980
6981     self.share_locks = _ShareAll()
6982     self.needed_locks = {
6983       locking.LEVEL_NODE: [self.op.node_name],
6984       }
6985
6986   def BuildHooksEnv(self):
6987     """Build hooks env.
6988
6989     This runs on the master, the primary and all the secondaries.
6990
6991     """
6992     return {
6993       "NODE_NAME": self.op.node_name,
6994       }
6995
6996   def BuildHooksNodes(self):
6997     """Build hooks nodes.
6998
6999     """
7000     nl = [self.cfg.GetMasterNode()]
7001     return (nl, nl)
7002
7003   def CheckPrereq(self):
7004     pass
7005
7006   def Exec(self, feedback_fn):
7007     # Prepare jobs for migration instances
7008     jobs = [
7009       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7010                                  mode=self.op.mode,
7011                                  live=self.op.live,
7012                                  iallocator=self.op.iallocator,
7013                                  target_node=self.op.target_node)]
7014       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7015       ]
7016
7017     # TODO: Run iallocator in this opcode and pass correct placement options to
7018     # OpInstanceMigrate. Since other jobs can modify the cluster between
7019     # running the iallocator and the actual migration, a good consistency model
7020     # will have to be found.
7021
7022     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7023             frozenset([self.op.node_name]))
7024
7025     return ResultWithJobs(jobs)
7026
7027
7028 class TLMigrateInstance(Tasklet):
7029   """Tasklet class for instance migration.
7030
7031   @type live: boolean
7032   @ivar live: whether the migration will be done live or non-live;
7033       this variable is initalized only after CheckPrereq has run
7034   @type cleanup: boolean
7035   @ivar cleanup: Wheater we cleanup from a failed migration
7036   @type iallocator: string
7037   @ivar iallocator: The iallocator used to determine target_node
7038   @type target_node: string
7039   @ivar target_node: If given, the target_node to reallocate the instance to
7040   @type failover: boolean
7041   @ivar failover: Whether operation results in failover or migration
7042   @type fallback: boolean
7043   @ivar fallback: Whether fallback to failover is allowed if migration not
7044                   possible
7045   @type ignore_consistency: boolean
7046   @ivar ignore_consistency: Wheter we should ignore consistency between source
7047                             and target node
7048   @type shutdown_timeout: int
7049   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7050
7051   """
7052
7053   # Constants
7054   _MIGRATION_POLL_INTERVAL = 1      # seconds
7055   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7056
7057   def __init__(self, lu, instance_name, cleanup=False,
7058                failover=False, fallback=False,
7059                ignore_consistency=False,
7060                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7061     """Initializes this class.
7062
7063     """
7064     Tasklet.__init__(self, lu)
7065
7066     # Parameters
7067     self.instance_name = instance_name
7068     self.cleanup = cleanup
7069     self.live = False # will be overridden later
7070     self.failover = failover
7071     self.fallback = fallback
7072     self.ignore_consistency = ignore_consistency
7073     self.shutdown_timeout = shutdown_timeout
7074
7075   def CheckPrereq(self):
7076     """Check prerequisites.
7077
7078     This checks that the instance is in the cluster.
7079
7080     """
7081     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7082     instance = self.cfg.GetInstanceInfo(instance_name)
7083     assert instance is not None
7084     self.instance = instance
7085
7086     if (not self.cleanup and not instance.admin_up and not self.failover and
7087         self.fallback):
7088       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7089                       " to failover")
7090       self.failover = True
7091
7092     if instance.disk_template not in constants.DTS_MIRRORED:
7093       if self.failover:
7094         text = "failovers"
7095       else:
7096         text = "migrations"
7097       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7098                                  " %s" % (instance.disk_template, text),
7099                                  errors.ECODE_STATE)
7100
7101     if instance.disk_template in constants.DTS_EXT_MIRROR:
7102       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7103
7104       if self.lu.op.iallocator:
7105         self._RunAllocator()
7106       else:
7107         # We set set self.target_node as it is required by
7108         # BuildHooksEnv
7109         self.target_node = self.lu.op.target_node
7110
7111       # self.target_node is already populated, either directly or by the
7112       # iallocator run
7113       target_node = self.target_node
7114       if self.target_node == instance.primary_node:
7115         raise errors.OpPrereqError("Cannot migrate instance %s"
7116                                    " to its primary (%s)" %
7117                                    (instance.name, instance.primary_node))
7118
7119       if len(self.lu.tasklets) == 1:
7120         # It is safe to release locks only when we're the only tasklet
7121         # in the LU
7122         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7123                       keep=[instance.primary_node, self.target_node])
7124
7125     else:
7126       secondary_nodes = instance.secondary_nodes
7127       if not secondary_nodes:
7128         raise errors.ConfigurationError("No secondary node but using"
7129                                         " %s disk template" %
7130                                         instance.disk_template)
7131       target_node = secondary_nodes[0]
7132       if self.lu.op.iallocator or (self.lu.op.target_node and
7133                                    self.lu.op.target_node != target_node):
7134         if self.failover:
7135           text = "failed over"
7136         else:
7137           text = "migrated"
7138         raise errors.OpPrereqError("Instances with disk template %s cannot"
7139                                    " be %s to arbitrary nodes"
7140                                    " (neither an iallocator nor a target"
7141                                    " node can be passed)" %
7142                                    (instance.disk_template, text),
7143                                    errors.ECODE_INVAL)
7144
7145     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7146
7147     # check memory requirements on the secondary node
7148     if not self.failover or instance.admin_up:
7149       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7150                            instance.name, i_be[constants.BE_MEMORY],
7151                            instance.hypervisor)
7152     else:
7153       self.lu.LogInfo("Not checking memory on the secondary node as"
7154                       " instance will not be started")
7155
7156     # check bridge existance
7157     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7158
7159     if not self.cleanup:
7160       _CheckNodeNotDrained(self.lu, target_node)
7161       if not self.failover:
7162         result = self.rpc.call_instance_migratable(instance.primary_node,
7163                                                    instance)
7164         if result.fail_msg and self.fallback:
7165           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7166                           " failover")
7167           self.failover = True
7168         else:
7169           result.Raise("Can't migrate, please use failover",
7170                        prereq=True, ecode=errors.ECODE_STATE)
7171
7172     assert not (self.failover and self.cleanup)
7173
7174     if not self.failover:
7175       if self.lu.op.live is not None and self.lu.op.mode is not None:
7176         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7177                                    " parameters are accepted",
7178                                    errors.ECODE_INVAL)
7179       if self.lu.op.live is not None:
7180         if self.lu.op.live:
7181           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7182         else:
7183           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7184         # reset the 'live' parameter to None so that repeated
7185         # invocations of CheckPrereq do not raise an exception
7186         self.lu.op.live = None
7187       elif self.lu.op.mode is None:
7188         # read the default value from the hypervisor
7189         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7190                                                 skip_globals=False)
7191         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7192
7193       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7194     else:
7195       # Failover is never live
7196       self.live = False
7197
7198   def _RunAllocator(self):
7199     """Run the allocator based on input opcode.
7200
7201     """
7202     ial = IAllocator(self.cfg, self.rpc,
7203                      mode=constants.IALLOCATOR_MODE_RELOC,
7204                      name=self.instance_name,
7205                      # TODO See why hail breaks with a single node below
7206                      relocate_from=[self.instance.primary_node,
7207                                     self.instance.primary_node],
7208                      )
7209
7210     ial.Run(self.lu.op.iallocator)
7211
7212     if not ial.success:
7213       raise errors.OpPrereqError("Can't compute nodes using"
7214                                  " iallocator '%s': %s" %
7215                                  (self.lu.op.iallocator, ial.info),
7216                                  errors.ECODE_NORES)
7217     if len(ial.result) != ial.required_nodes:
7218       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7219                                  " of nodes (%s), required %s" %
7220                                  (self.lu.op.iallocator, len(ial.result),
7221                                   ial.required_nodes), errors.ECODE_FAULT)
7222     self.target_node = ial.result[0]
7223     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7224                  self.instance_name, self.lu.op.iallocator,
7225                  utils.CommaJoin(ial.result))
7226
7227   def _WaitUntilSync(self):
7228     """Poll with custom rpc for disk sync.
7229
7230     This uses our own step-based rpc call.
7231
7232     """
7233     self.feedback_fn("* wait until resync is done")
7234     all_done = False
7235     while not all_done:
7236       all_done = True
7237       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7238                                             self.nodes_ip,
7239                                             self.instance.disks)
7240       min_percent = 100
7241       for node, nres in result.items():
7242         nres.Raise("Cannot resync disks on node %s" % node)
7243         node_done, node_percent = nres.payload
7244         all_done = all_done and node_done
7245         if node_percent is not None:
7246           min_percent = min(min_percent, node_percent)
7247       if not all_done:
7248         if min_percent < 100:
7249           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7250         time.sleep(2)
7251
7252   def _EnsureSecondary(self, node):
7253     """Demote a node to secondary.
7254
7255     """
7256     self.feedback_fn("* switching node %s to secondary mode" % node)
7257
7258     for dev in self.instance.disks:
7259       self.cfg.SetDiskID(dev, node)
7260
7261     result = self.rpc.call_blockdev_close(node, self.instance.name,
7262                                           self.instance.disks)
7263     result.Raise("Cannot change disk to secondary on node %s" % node)
7264
7265   def _GoStandalone(self):
7266     """Disconnect from the network.
7267
7268     """
7269     self.feedback_fn("* changing into standalone mode")
7270     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7271                                                self.instance.disks)
7272     for node, nres in result.items():
7273       nres.Raise("Cannot disconnect disks node %s" % node)
7274
7275   def _GoReconnect(self, multimaster):
7276     """Reconnect to the network.
7277
7278     """
7279     if multimaster:
7280       msg = "dual-master"
7281     else:
7282       msg = "single-master"
7283     self.feedback_fn("* changing disks into %s mode" % msg)
7284     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7285                                            self.instance.disks,
7286                                            self.instance.name, multimaster)
7287     for node, nres in result.items():
7288       nres.Raise("Cannot change disks config on node %s" % node)
7289
7290   def _ExecCleanup(self):
7291     """Try to cleanup after a failed migration.
7292
7293     The cleanup is done by:
7294       - check that the instance is running only on one node
7295         (and update the config if needed)
7296       - change disks on its secondary node to secondary
7297       - wait until disks are fully synchronized
7298       - disconnect from the network
7299       - change disks into single-master mode
7300       - wait again until disks are fully synchronized
7301
7302     """
7303     instance = self.instance
7304     target_node = self.target_node
7305     source_node = self.source_node
7306
7307     # check running on only one node
7308     self.feedback_fn("* checking where the instance actually runs"
7309                      " (if this hangs, the hypervisor might be in"
7310                      " a bad state)")
7311     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7312     for node, result in ins_l.items():
7313       result.Raise("Can't contact node %s" % node)
7314
7315     runningon_source = instance.name in ins_l[source_node].payload
7316     runningon_target = instance.name in ins_l[target_node].payload
7317
7318     if runningon_source and runningon_target:
7319       raise errors.OpExecError("Instance seems to be running on two nodes,"
7320                                " or the hypervisor is confused; you will have"
7321                                " to ensure manually that it runs only on one"
7322                                " and restart this operation")
7323
7324     if not (runningon_source or runningon_target):
7325       raise errors.OpExecError("Instance does not seem to be running at all;"
7326                                " in this case it's safer to repair by"
7327                                " running 'gnt-instance stop' to ensure disk"
7328                                " shutdown, and then restarting it")
7329
7330     if runningon_target:
7331       # the migration has actually succeeded, we need to update the config
7332       self.feedback_fn("* instance running on secondary node (%s),"
7333                        " updating config" % target_node)
7334       instance.primary_node = target_node
7335       self.cfg.Update(instance, self.feedback_fn)
7336       demoted_node = source_node
7337     else:
7338       self.feedback_fn("* instance confirmed to be running on its"
7339                        " primary node (%s)" % source_node)
7340       demoted_node = target_node
7341
7342     if instance.disk_template in constants.DTS_INT_MIRROR:
7343       self._EnsureSecondary(demoted_node)
7344       try:
7345         self._WaitUntilSync()
7346       except errors.OpExecError:
7347         # we ignore here errors, since if the device is standalone, it
7348         # won't be able to sync
7349         pass
7350       self._GoStandalone()
7351       self._GoReconnect(False)
7352       self._WaitUntilSync()
7353
7354     self.feedback_fn("* done")
7355
7356   def _RevertDiskStatus(self):
7357     """Try to revert the disk status after a failed migration.
7358
7359     """
7360     target_node = self.target_node
7361     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7362       return
7363
7364     try:
7365       self._EnsureSecondary(target_node)
7366       self._GoStandalone()
7367       self._GoReconnect(False)
7368       self._WaitUntilSync()
7369     except errors.OpExecError, err:
7370       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7371                          " please try to recover the instance manually;"
7372                          " error '%s'" % str(err))
7373
7374   def _AbortMigration(self):
7375     """Call the hypervisor code to abort a started migration.
7376
7377     """
7378     instance = self.instance
7379     target_node = self.target_node
7380     source_node = self.source_node
7381     migration_info = self.migration_info
7382
7383     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7384                                                                  instance,
7385                                                                  migration_info,
7386                                                                  False)
7387     abort_msg = abort_result.fail_msg
7388     if abort_msg:
7389       logging.error("Aborting migration failed on target node %s: %s",
7390                     target_node, abort_msg)
7391       # Don't raise an exception here, as we stil have to try to revert the
7392       # disk status, even if this step failed.
7393
7394     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7395         instance, False, self.live)
7396     abort_msg = abort_result.fail_msg
7397     if abort_msg:
7398       logging.error("Aborting migration failed on source node %s: %s",
7399                     source_node, abort_msg)
7400
7401   def _ExecMigration(self):
7402     """Migrate an instance.
7403
7404     The migrate is done by:
7405       - change the disks into dual-master mode
7406       - wait until disks are fully synchronized again
7407       - migrate the instance
7408       - change disks on the new secondary node (the old primary) to secondary
7409       - wait until disks are fully synchronized
7410       - change disks into single-master mode
7411
7412     """
7413     instance = self.instance
7414     target_node = self.target_node
7415     source_node = self.source_node
7416
7417     self.feedback_fn("* checking disk consistency between source and target")
7418     for dev in instance.disks:
7419       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7420         raise errors.OpExecError("Disk %s is degraded or not fully"
7421                                  " synchronized on target node,"
7422                                  " aborting migration" % dev.iv_name)
7423
7424     # First get the migration information from the remote node
7425     result = self.rpc.call_migration_info(source_node, instance)
7426     msg = result.fail_msg
7427     if msg:
7428       log_err = ("Failed fetching source migration information from %s: %s" %
7429                  (source_node, msg))
7430       logging.error(log_err)
7431       raise errors.OpExecError(log_err)
7432
7433     self.migration_info = migration_info = result.payload
7434
7435     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7436       # Then switch the disks to master/master mode
7437       self._EnsureSecondary(target_node)
7438       self._GoStandalone()
7439       self._GoReconnect(True)
7440       self._WaitUntilSync()
7441
7442     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7443     result = self.rpc.call_accept_instance(target_node,
7444                                            instance,
7445                                            migration_info,
7446                                            self.nodes_ip[target_node])
7447
7448     msg = result.fail_msg
7449     if msg:
7450       logging.error("Instance pre-migration failed, trying to revert"
7451                     " disk status: %s", msg)
7452       self.feedback_fn("Pre-migration failed, aborting")
7453       self._AbortMigration()
7454       self._RevertDiskStatus()
7455       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7456                                (instance.name, msg))
7457
7458     self.feedback_fn("* migrating instance to %s" % target_node)
7459     result = self.rpc.call_instance_migrate(source_node, instance,
7460                                             self.nodes_ip[target_node],
7461                                             self.live)
7462     msg = result.fail_msg
7463     if msg:
7464       logging.error("Instance migration failed, trying to revert"
7465                     " disk status: %s", msg)
7466       self.feedback_fn("Migration failed, aborting")
7467       self._AbortMigration()
7468       self._RevertDiskStatus()
7469       raise errors.OpExecError("Could not migrate instance %s: %s" %
7470                                (instance.name, msg))
7471
7472     self.feedback_fn("* starting memory transfer")
7473     last_feedback = time.time()
7474     while True:
7475       result = self.rpc.call_instance_get_migration_status(source_node,
7476                                                            instance)
7477       msg = result.fail_msg
7478       ms = result.payload   # MigrationStatus instance
7479       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7480         logging.error("Instance migration failed, trying to revert"
7481                       " disk status: %s", msg)
7482         self.feedback_fn("Migration failed, aborting")
7483         self._AbortMigration()
7484         self._RevertDiskStatus()
7485         raise errors.OpExecError("Could not migrate instance %s: %s" %
7486                                  (instance.name, msg))
7487
7488       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7489         self.feedback_fn("* memory transfer complete")
7490         break
7491
7492       if (utils.TimeoutExpired(last_feedback,
7493                                self._MIGRATION_FEEDBACK_INTERVAL) and
7494           ms.transferred_ram is not None):
7495         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7496         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7497         last_feedback = time.time()
7498
7499       time.sleep(self._MIGRATION_POLL_INTERVAL)
7500
7501     result = self.rpc.call_instance_finalize_migration_src(source_node,
7502                                                            instance,
7503                                                            True,
7504                                                            self.live)
7505     msg = result.fail_msg
7506     if msg:
7507       logging.error("Instance migration succeeded, but finalization failed"
7508                     " on the source node: %s", msg)
7509       raise errors.OpExecError("Could not finalize instance migration: %s" %
7510                                msg)
7511
7512     instance.primary_node = target_node
7513
7514     # distribute new instance config to the other nodes
7515     self.cfg.Update(instance, self.feedback_fn)
7516
7517     result = self.rpc.call_instance_finalize_migration_dst(target_node,
7518                                                            instance,
7519                                                            migration_info,
7520                                                            True)
7521     msg = result.fail_msg
7522     if msg:
7523       logging.error("Instance migration succeeded, but finalization failed"
7524                     " on the target node: %s", msg)
7525       raise errors.OpExecError("Could not finalize instance migration: %s" %
7526                                msg)
7527
7528     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7529       self._EnsureSecondary(source_node)
7530       self._WaitUntilSync()
7531       self._GoStandalone()
7532       self._GoReconnect(False)
7533       self._WaitUntilSync()
7534
7535     self.feedback_fn("* done")
7536
7537   def _ExecFailover(self):
7538     """Failover an instance.
7539
7540     The failover is done by shutting it down on its present node and
7541     starting it on the secondary.
7542
7543     """
7544     instance = self.instance
7545     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7546
7547     source_node = instance.primary_node
7548     target_node = self.target_node
7549
7550     if instance.admin_up:
7551       self.feedback_fn("* checking disk consistency between source and target")
7552       for dev in instance.disks:
7553         # for drbd, these are drbd over lvm
7554         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7555           if primary_node.offline:
7556             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7557                              " target node %s" %
7558                              (primary_node.name, dev.iv_name, target_node))
7559           elif not self.ignore_consistency:
7560             raise errors.OpExecError("Disk %s is degraded on target node,"
7561                                      " aborting failover" % dev.iv_name)
7562     else:
7563       self.feedback_fn("* not checking disk consistency as instance is not"
7564                        " running")
7565
7566     self.feedback_fn("* shutting down instance on source node")
7567     logging.info("Shutting down instance %s on node %s",
7568                  instance.name, source_node)
7569
7570     result = self.rpc.call_instance_shutdown(source_node, instance,
7571                                              self.shutdown_timeout)
7572     msg = result.fail_msg
7573     if msg:
7574       if self.ignore_consistency or primary_node.offline:
7575         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7576                            " proceeding anyway; please make sure node"
7577                            " %s is down; error details: %s",
7578                            instance.name, source_node, source_node, msg)
7579       else:
7580         raise errors.OpExecError("Could not shutdown instance %s on"
7581                                  " node %s: %s" %
7582                                  (instance.name, source_node, msg))
7583
7584     self.feedback_fn("* deactivating the instance's disks on source node")
7585     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7586       raise errors.OpExecError("Can't shut down the instance's disks")
7587
7588     instance.primary_node = target_node
7589     # distribute new instance config to the other nodes
7590     self.cfg.Update(instance, self.feedback_fn)
7591
7592     # Only start the instance if it's marked as up
7593     if instance.admin_up:
7594       self.feedback_fn("* activating the instance's disks on target node %s" %
7595                        target_node)
7596       logging.info("Starting instance %s on node %s",
7597                    instance.name, target_node)
7598
7599       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7600                                            ignore_secondaries=True)
7601       if not disks_ok:
7602         _ShutdownInstanceDisks(self.lu, instance)
7603         raise errors.OpExecError("Can't activate the instance's disks")
7604
7605       self.feedback_fn("* starting the instance on the target node %s" %
7606                        target_node)
7607       result = self.rpc.call_instance_start(target_node, instance, None, None,
7608                                             False)
7609       msg = result.fail_msg
7610       if msg:
7611         _ShutdownInstanceDisks(self.lu, instance)
7612         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7613                                  (instance.name, target_node, msg))
7614
7615   def Exec(self, feedback_fn):
7616     """Perform the migration.
7617
7618     """
7619     self.feedback_fn = feedback_fn
7620     self.source_node = self.instance.primary_node
7621
7622     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7623     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7624       self.target_node = self.instance.secondary_nodes[0]
7625       # Otherwise self.target_node has been populated either
7626       # directly, or through an iallocator.
7627
7628     self.all_nodes = [self.source_node, self.target_node]
7629     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7630                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7631
7632     if self.failover:
7633       feedback_fn("Failover instance %s" % self.instance.name)
7634       self._ExecFailover()
7635     else:
7636       feedback_fn("Migrating instance %s" % self.instance.name)
7637
7638       if self.cleanup:
7639         return self._ExecCleanup()
7640       else:
7641         return self._ExecMigration()
7642
7643
7644 def _CreateBlockDev(lu, node, instance, device, force_create,
7645                     info, force_open):
7646   """Create a tree of block devices on a given node.
7647
7648   If this device type has to be created on secondaries, create it and
7649   all its children.
7650
7651   If not, just recurse to children keeping the same 'force' value.
7652
7653   @param lu: the lu on whose behalf we execute
7654   @param node: the node on which to create the device
7655   @type instance: L{objects.Instance}
7656   @param instance: the instance which owns the device
7657   @type device: L{objects.Disk}
7658   @param device: the device to create
7659   @type force_create: boolean
7660   @param force_create: whether to force creation of this device; this
7661       will be change to True whenever we find a device which has
7662       CreateOnSecondary() attribute
7663   @param info: the extra 'metadata' we should attach to the device
7664       (this will be represented as a LVM tag)
7665   @type force_open: boolean
7666   @param force_open: this parameter will be passes to the
7667       L{backend.BlockdevCreate} function where it specifies
7668       whether we run on primary or not, and it affects both
7669       the child assembly and the device own Open() execution
7670
7671   """
7672   if device.CreateOnSecondary():
7673     force_create = True
7674
7675   if device.children:
7676     for child in device.children:
7677       _CreateBlockDev(lu, node, instance, child, force_create,
7678                       info, force_open)
7679
7680   if not force_create:
7681     return
7682
7683   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7684
7685
7686 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7687   """Create a single block device on a given node.
7688
7689   This will not recurse over children of the device, so they must be
7690   created in advance.
7691
7692   @param lu: the lu on whose behalf we execute
7693   @param node: the node on which to create the device
7694   @type instance: L{objects.Instance}
7695   @param instance: the instance which owns the device
7696   @type device: L{objects.Disk}
7697   @param device: the device to create
7698   @param info: the extra 'metadata' we should attach to the device
7699       (this will be represented as a LVM tag)
7700   @type force_open: boolean
7701   @param force_open: this parameter will be passes to the
7702       L{backend.BlockdevCreate} function where it specifies
7703       whether we run on primary or not, and it affects both
7704       the child assembly and the device own Open() execution
7705
7706   """
7707   lu.cfg.SetDiskID(device, node)
7708   result = lu.rpc.call_blockdev_create(node, device, device.size,
7709                                        instance.name, force_open, info)
7710   result.Raise("Can't create block device %s on"
7711                " node %s for instance %s" % (device, node, instance.name))
7712   if device.physical_id is None:
7713     device.physical_id = result.payload
7714
7715
7716 def _GenerateUniqueNames(lu, exts):
7717   """Generate a suitable LV name.
7718
7719   This will generate a logical volume name for the given instance.
7720
7721   """
7722   results = []
7723   for val in exts:
7724     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7725     results.append("%s%s" % (new_id, val))
7726   return results
7727
7728
7729 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7730                          iv_name, p_minor, s_minor):
7731   """Generate a drbd8 device complete with its children.
7732
7733   """
7734   assert len(vgnames) == len(names) == 2
7735   port = lu.cfg.AllocatePort()
7736   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7737   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7738                           logical_id=(vgnames[0], names[0]))
7739   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7740                           logical_id=(vgnames[1], names[1]))
7741   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7742                           logical_id=(primary, secondary, port,
7743                                       p_minor, s_minor,
7744                                       shared_secret),
7745                           children=[dev_data, dev_meta],
7746                           iv_name=iv_name)
7747   return drbd_dev
7748
7749
7750 def _GenerateDiskTemplate(lu, template_name,
7751                           instance_name, primary_node,
7752                           secondary_nodes, disk_info,
7753                           file_storage_dir, file_driver,
7754                           base_index, feedback_fn):
7755   """Generate the entire disk layout for a given template type.
7756
7757   """
7758   #TODO: compute space requirements
7759
7760   vgname = lu.cfg.GetVGName()
7761   disk_count = len(disk_info)
7762   disks = []
7763   if template_name == constants.DT_DISKLESS:
7764     pass
7765   elif template_name == constants.DT_PLAIN:
7766     if len(secondary_nodes) != 0:
7767       raise errors.ProgrammerError("Wrong template configuration")
7768
7769     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7770                                       for i in range(disk_count)])
7771     for idx, disk in enumerate(disk_info):
7772       disk_index = idx + base_index
7773       vg = disk.get(constants.IDISK_VG, vgname)
7774       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7775       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7776                               size=disk[constants.IDISK_SIZE],
7777                               logical_id=(vg, names[idx]),
7778                               iv_name="disk/%d" % disk_index,
7779                               mode=disk[constants.IDISK_MODE])
7780       disks.append(disk_dev)
7781   elif template_name == constants.DT_DRBD8:
7782     if len(secondary_nodes) != 1:
7783       raise errors.ProgrammerError("Wrong template configuration")
7784     remote_node = secondary_nodes[0]
7785     minors = lu.cfg.AllocateDRBDMinor(
7786       [primary_node, remote_node] * len(disk_info), instance_name)
7787
7788     names = []
7789     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7790                                                for i in range(disk_count)]):
7791       names.append(lv_prefix + "_data")
7792       names.append(lv_prefix + "_meta")
7793     for idx, disk in enumerate(disk_info):
7794       disk_index = idx + base_index
7795       data_vg = disk.get(constants.IDISK_VG, vgname)
7796       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7797       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7798                                       disk[constants.IDISK_SIZE],
7799                                       [data_vg, meta_vg],
7800                                       names[idx * 2:idx * 2 + 2],
7801                                       "disk/%d" % disk_index,
7802                                       minors[idx * 2], minors[idx * 2 + 1])
7803       disk_dev.mode = disk[constants.IDISK_MODE]
7804       disks.append(disk_dev)
7805   elif template_name == constants.DT_FILE:
7806     if len(secondary_nodes) != 0:
7807       raise errors.ProgrammerError("Wrong template configuration")
7808
7809     opcodes.RequireFileStorage()
7810
7811     for idx, disk in enumerate(disk_info):
7812       disk_index = idx + base_index
7813       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7814                               size=disk[constants.IDISK_SIZE],
7815                               iv_name="disk/%d" % disk_index,
7816                               logical_id=(file_driver,
7817                                           "%s/disk%d" % (file_storage_dir,
7818                                                          disk_index)),
7819                               mode=disk[constants.IDISK_MODE])
7820       disks.append(disk_dev)
7821   elif template_name == constants.DT_SHARED_FILE:
7822     if len(secondary_nodes) != 0:
7823       raise errors.ProgrammerError("Wrong template configuration")
7824
7825     opcodes.RequireSharedFileStorage()
7826
7827     for idx, disk in enumerate(disk_info):
7828       disk_index = idx + base_index
7829       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7830                               size=disk[constants.IDISK_SIZE],
7831                               iv_name="disk/%d" % disk_index,
7832                               logical_id=(file_driver,
7833                                           "%s/disk%d" % (file_storage_dir,
7834                                                          disk_index)),
7835                               mode=disk[constants.IDISK_MODE])
7836       disks.append(disk_dev)
7837   elif template_name == constants.DT_BLOCK:
7838     if len(secondary_nodes) != 0:
7839       raise errors.ProgrammerError("Wrong template configuration")
7840
7841     for idx, disk in enumerate(disk_info):
7842       disk_index = idx + base_index
7843       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7844                               size=disk[constants.IDISK_SIZE],
7845                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7846                                           disk[constants.IDISK_ADOPT]),
7847                               iv_name="disk/%d" % disk_index,
7848                               mode=disk[constants.IDISK_MODE])
7849       disks.append(disk_dev)
7850
7851   else:
7852     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7853   return disks
7854
7855
7856 def _GetInstanceInfoText(instance):
7857   """Compute that text that should be added to the disk's metadata.
7858
7859   """
7860   return "originstname+%s" % instance.name
7861
7862
7863 def _CalcEta(time_taken, written, total_size):
7864   """Calculates the ETA based on size written and total size.
7865
7866   @param time_taken: The time taken so far
7867   @param written: amount written so far
7868   @param total_size: The total size of data to be written
7869   @return: The remaining time in seconds
7870
7871   """
7872   avg_time = time_taken / float(written)
7873   return (total_size - written) * avg_time
7874
7875
7876 def _WipeDisks(lu, instance):
7877   """Wipes instance disks.
7878
7879   @type lu: L{LogicalUnit}
7880   @param lu: the logical unit on whose behalf we execute
7881   @type instance: L{objects.Instance}
7882   @param instance: the instance whose disks we should create
7883   @return: the success of the wipe
7884
7885   """
7886   node = instance.primary_node
7887
7888   for device in instance.disks:
7889     lu.cfg.SetDiskID(device, node)
7890
7891   logging.info("Pause sync of instance %s disks", instance.name)
7892   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7893
7894   for idx, success in enumerate(result.payload):
7895     if not success:
7896       logging.warn("pause-sync of instance %s for disks %d failed",
7897                    instance.name, idx)
7898
7899   try:
7900     for idx, device in enumerate(instance.disks):
7901       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7902       # MAX_WIPE_CHUNK at max
7903       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7904                             constants.MIN_WIPE_CHUNK_PERCENT)
7905       # we _must_ make this an int, otherwise rounding errors will
7906       # occur
7907       wipe_chunk_size = int(wipe_chunk_size)
7908
7909       lu.LogInfo("* Wiping disk %d", idx)
7910       logging.info("Wiping disk %d for instance %s, node %s using"
7911                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7912
7913       offset = 0
7914       size = device.size
7915       last_output = 0
7916       start_time = time.time()
7917
7918       while offset < size:
7919         wipe_size = min(wipe_chunk_size, size - offset)
7920         logging.debug("Wiping disk %d, offset %s, chunk %s",
7921                       idx, offset, wipe_size)
7922         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7923         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7924                      (idx, offset, wipe_size))
7925         now = time.time()
7926         offset += wipe_size
7927         if now - last_output >= 60:
7928           eta = _CalcEta(now - start_time, offset, size)
7929           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7930                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7931           last_output = now
7932   finally:
7933     logging.info("Resume sync of instance %s disks", instance.name)
7934
7935     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7936
7937     for idx, success in enumerate(result.payload):
7938       if not success:
7939         lu.LogWarning("Resume sync of disk %d failed, please have a"
7940                       " look at the status and troubleshoot the issue", idx)
7941         logging.warn("resume-sync of instance %s for disks %d failed",
7942                      instance.name, idx)
7943
7944
7945 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7946   """Create all disks for an instance.
7947
7948   This abstracts away some work from AddInstance.
7949
7950   @type lu: L{LogicalUnit}
7951   @param lu: the logical unit on whose behalf we execute
7952   @type instance: L{objects.Instance}
7953   @param instance: the instance whose disks we should create
7954   @type to_skip: list
7955   @param to_skip: list of indices to skip
7956   @type target_node: string
7957   @param target_node: if passed, overrides the target node for creation
7958   @rtype: boolean
7959   @return: the success of the creation
7960
7961   """
7962   info = _GetInstanceInfoText(instance)
7963   if target_node is None:
7964     pnode = instance.primary_node
7965     all_nodes = instance.all_nodes
7966   else:
7967     pnode = target_node
7968     all_nodes = [pnode]
7969
7970   if instance.disk_template in constants.DTS_FILEBASED:
7971     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7972     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7973
7974     result.Raise("Failed to create directory '%s' on"
7975                  " node %s" % (file_storage_dir, pnode))
7976
7977   # Note: this needs to be kept in sync with adding of disks in
7978   # LUInstanceSetParams
7979   for idx, device in enumerate(instance.disks):
7980     if to_skip and idx in to_skip:
7981       continue
7982     logging.info("Creating volume %s for instance %s",
7983                  device.iv_name, instance.name)
7984     #HARDCODE
7985     for node in all_nodes:
7986       f_create = node == pnode
7987       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7988
7989
7990 def _RemoveDisks(lu, instance, target_node=None):
7991   """Remove all disks for an instance.
7992
7993   This abstracts away some work from `AddInstance()` and
7994   `RemoveInstance()`. Note that in case some of the devices couldn't
7995   be removed, the removal will continue with the other ones (compare
7996   with `_CreateDisks()`).
7997
7998   @type lu: L{LogicalUnit}
7999   @param lu: the logical unit on whose behalf we execute
8000   @type instance: L{objects.Instance}
8001   @param instance: the instance whose disks we should remove
8002   @type target_node: string
8003   @param target_node: used to override the node on which to remove the disks
8004   @rtype: boolean
8005   @return: the success of the removal
8006
8007   """
8008   logging.info("Removing block devices for instance %s", instance.name)
8009
8010   all_result = True
8011   for device in instance.disks:
8012     if target_node:
8013       edata = [(target_node, device)]
8014     else:
8015       edata = device.ComputeNodeTree(instance.primary_node)
8016     for node, disk in edata:
8017       lu.cfg.SetDiskID(disk, node)
8018       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8019       if msg:
8020         lu.LogWarning("Could not remove block device %s on node %s,"
8021                       " continuing anyway: %s", device.iv_name, node, msg)
8022         all_result = False
8023
8024   if instance.disk_template == constants.DT_FILE:
8025     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8026     if target_node:
8027       tgt = target_node
8028     else:
8029       tgt = instance.primary_node
8030     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8031     if result.fail_msg:
8032       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8033                     file_storage_dir, instance.primary_node, result.fail_msg)
8034       all_result = False
8035
8036   return all_result
8037
8038
8039 def _ComputeDiskSizePerVG(disk_template, disks):
8040   """Compute disk size requirements in the volume group
8041
8042   """
8043   def _compute(disks, payload):
8044     """Universal algorithm.
8045
8046     """
8047     vgs = {}
8048     for disk in disks:
8049       vgs[disk[constants.IDISK_VG]] = \
8050         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8051
8052     return vgs
8053
8054   # Required free disk space as a function of disk and swap space
8055   req_size_dict = {
8056     constants.DT_DISKLESS: {},
8057     constants.DT_PLAIN: _compute(disks, 0),
8058     # 128 MB are added for drbd metadata for each disk
8059     constants.DT_DRBD8: _compute(disks, 128),
8060     constants.DT_FILE: {},
8061     constants.DT_SHARED_FILE: {},
8062   }
8063
8064   if disk_template not in req_size_dict:
8065     raise errors.ProgrammerError("Disk template '%s' size requirement"
8066                                  " is unknown" % disk_template)
8067
8068   return req_size_dict[disk_template]
8069
8070
8071 def _ComputeDiskSize(disk_template, disks):
8072   """Compute disk size requirements in the volume group
8073
8074   """
8075   # Required free disk space as a function of disk and swap space
8076   req_size_dict = {
8077     constants.DT_DISKLESS: None,
8078     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8079     # 128 MB are added for drbd metadata for each disk
8080     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8081     constants.DT_FILE: None,
8082     constants.DT_SHARED_FILE: 0,
8083     constants.DT_BLOCK: 0,
8084   }
8085
8086   if disk_template not in req_size_dict:
8087     raise errors.ProgrammerError("Disk template '%s' size requirement"
8088                                  " is unknown" % disk_template)
8089
8090   return req_size_dict[disk_template]
8091
8092
8093 def _FilterVmNodes(lu, nodenames):
8094   """Filters out non-vm_capable nodes from a list.
8095
8096   @type lu: L{LogicalUnit}
8097   @param lu: the logical unit for which we check
8098   @type nodenames: list
8099   @param nodenames: the list of nodes on which we should check
8100   @rtype: list
8101   @return: the list of vm-capable nodes
8102
8103   """
8104   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8105   return [name for name in nodenames if name not in vm_nodes]
8106
8107
8108 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8109   """Hypervisor parameter validation.
8110
8111   This function abstract the hypervisor parameter validation to be
8112   used in both instance create and instance modify.
8113
8114   @type lu: L{LogicalUnit}
8115   @param lu: the logical unit for which we check
8116   @type nodenames: list
8117   @param nodenames: the list of nodes on which we should check
8118   @type hvname: string
8119   @param hvname: the name of the hypervisor we should use
8120   @type hvparams: dict
8121   @param hvparams: the parameters which we need to check
8122   @raise errors.OpPrereqError: if the parameters are not valid
8123
8124   """
8125   nodenames = _FilterVmNodes(lu, nodenames)
8126   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8127                                                   hvname,
8128                                                   hvparams)
8129   for node in nodenames:
8130     info = hvinfo[node]
8131     if info.offline:
8132       continue
8133     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8134
8135
8136 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8137   """OS parameters validation.
8138
8139   @type lu: L{LogicalUnit}
8140   @param lu: the logical unit for which we check
8141   @type required: boolean
8142   @param required: whether the validation should fail if the OS is not
8143       found
8144   @type nodenames: list
8145   @param nodenames: the list of nodes on which we should check
8146   @type osname: string
8147   @param osname: the name of the hypervisor we should use
8148   @type osparams: dict
8149   @param osparams: the parameters which we need to check
8150   @raise errors.OpPrereqError: if the parameters are not valid
8151
8152   """
8153   nodenames = _FilterVmNodes(lu, nodenames)
8154   result = lu.rpc.call_os_validate(required, nodenames, osname,
8155                                    [constants.OS_VALIDATE_PARAMETERS],
8156                                    osparams)
8157   for node, nres in result.items():
8158     # we don't check for offline cases since this should be run only
8159     # against the master node and/or an instance's nodes
8160     nres.Raise("OS Parameters validation failed on node %s" % node)
8161     if not nres.payload:
8162       lu.LogInfo("OS %s not found on node %s, validation skipped",
8163                  osname, node)
8164
8165
8166 class LUInstanceCreate(LogicalUnit):
8167   """Create an instance.
8168
8169   """
8170   HPATH = "instance-add"
8171   HTYPE = constants.HTYPE_INSTANCE
8172   REQ_BGL = False
8173
8174   def CheckArguments(self):
8175     """Check arguments.
8176
8177     """
8178     # do not require name_check to ease forward/backward compatibility
8179     # for tools
8180     if self.op.no_install and self.op.start:
8181       self.LogInfo("No-installation mode selected, disabling startup")
8182       self.op.start = False
8183     # validate/normalize the instance name
8184     self.op.instance_name = \
8185       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8186
8187     if self.op.ip_check and not self.op.name_check:
8188       # TODO: make the ip check more flexible and not depend on the name check
8189       raise errors.OpPrereqError("Cannot do IP address check without a name"
8190                                  " check", errors.ECODE_INVAL)
8191
8192     # check nics' parameter names
8193     for nic in self.op.nics:
8194       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8195
8196     # check disks. parameter names and consistent adopt/no-adopt strategy
8197     has_adopt = has_no_adopt = False
8198     for disk in self.op.disks:
8199       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8200       if constants.IDISK_ADOPT in disk:
8201         has_adopt = True
8202       else:
8203         has_no_adopt = True
8204     if has_adopt and has_no_adopt:
8205       raise errors.OpPrereqError("Either all disks are adopted or none is",
8206                                  errors.ECODE_INVAL)
8207     if has_adopt:
8208       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8209         raise errors.OpPrereqError("Disk adoption is not supported for the"
8210                                    " '%s' disk template" %
8211                                    self.op.disk_template,
8212                                    errors.ECODE_INVAL)
8213       if self.op.iallocator is not None:
8214         raise errors.OpPrereqError("Disk adoption not allowed with an"
8215                                    " iallocator script", errors.ECODE_INVAL)
8216       if self.op.mode == constants.INSTANCE_IMPORT:
8217         raise errors.OpPrereqError("Disk adoption not allowed for"
8218                                    " instance import", errors.ECODE_INVAL)
8219     else:
8220       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8221         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8222                                    " but no 'adopt' parameter given" %
8223                                    self.op.disk_template,
8224                                    errors.ECODE_INVAL)
8225
8226     self.adopt_disks = has_adopt
8227
8228     # instance name verification
8229     if self.op.name_check:
8230       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8231       self.op.instance_name = self.hostname1.name
8232       # used in CheckPrereq for ip ping check
8233       self.check_ip = self.hostname1.ip
8234     else:
8235       self.check_ip = None
8236
8237     # file storage checks
8238     if (self.op.file_driver and
8239         not self.op.file_driver in constants.FILE_DRIVER):
8240       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8241                                  self.op.file_driver, errors.ECODE_INVAL)
8242
8243     if self.op.disk_template == constants.DT_FILE:
8244       opcodes.RequireFileStorage()
8245     elif self.op.disk_template == constants.DT_SHARED_FILE:
8246       opcodes.RequireSharedFileStorage()
8247
8248     ### Node/iallocator related checks
8249     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8250
8251     if self.op.pnode is not None:
8252       if self.op.disk_template in constants.DTS_INT_MIRROR:
8253         if self.op.snode is None:
8254           raise errors.OpPrereqError("The networked disk templates need"
8255                                      " a mirror node", errors.ECODE_INVAL)
8256       elif self.op.snode:
8257         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8258                         " template")
8259         self.op.snode = None
8260
8261     self._cds = _GetClusterDomainSecret()
8262
8263     if self.op.mode == constants.INSTANCE_IMPORT:
8264       # On import force_variant must be True, because if we forced it at
8265       # initial install, our only chance when importing it back is that it
8266       # works again!
8267       self.op.force_variant = True
8268
8269       if self.op.no_install:
8270         self.LogInfo("No-installation mode has no effect during import")
8271
8272     elif self.op.mode == constants.INSTANCE_CREATE:
8273       if self.op.os_type is None:
8274         raise errors.OpPrereqError("No guest OS specified",
8275                                    errors.ECODE_INVAL)
8276       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8277         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8278                                    " installation" % self.op.os_type,
8279                                    errors.ECODE_STATE)
8280       if self.op.disk_template is None:
8281         raise errors.OpPrereqError("No disk template specified",
8282                                    errors.ECODE_INVAL)
8283
8284     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8285       # Check handshake to ensure both clusters have the same domain secret
8286       src_handshake = self.op.source_handshake
8287       if not src_handshake:
8288         raise errors.OpPrereqError("Missing source handshake",
8289                                    errors.ECODE_INVAL)
8290
8291       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8292                                                            src_handshake)
8293       if errmsg:
8294         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8295                                    errors.ECODE_INVAL)
8296
8297       # Load and check source CA
8298       self.source_x509_ca_pem = self.op.source_x509_ca
8299       if not self.source_x509_ca_pem:
8300         raise errors.OpPrereqError("Missing source X509 CA",
8301                                    errors.ECODE_INVAL)
8302
8303       try:
8304         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8305                                                     self._cds)
8306       except OpenSSL.crypto.Error, err:
8307         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8308                                    (err, ), errors.ECODE_INVAL)
8309
8310       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8311       if errcode is not None:
8312         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8313                                    errors.ECODE_INVAL)
8314
8315       self.source_x509_ca = cert
8316
8317       src_instance_name = self.op.source_instance_name
8318       if not src_instance_name:
8319         raise errors.OpPrereqError("Missing source instance name",
8320                                    errors.ECODE_INVAL)
8321
8322       self.source_instance_name = \
8323           netutils.GetHostname(name=src_instance_name).name
8324
8325     else:
8326       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8327                                  self.op.mode, errors.ECODE_INVAL)
8328
8329   def ExpandNames(self):
8330     """ExpandNames for CreateInstance.
8331
8332     Figure out the right locks for instance creation.
8333
8334     """
8335     self.needed_locks = {}
8336
8337     instance_name = self.op.instance_name
8338     # this is just a preventive check, but someone might still add this
8339     # instance in the meantime, and creation will fail at lock-add time
8340     if instance_name in self.cfg.GetInstanceList():
8341       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8342                                  instance_name, errors.ECODE_EXISTS)
8343
8344     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8345
8346     if self.op.iallocator:
8347       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8348     else:
8349       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8350       nodelist = [self.op.pnode]
8351       if self.op.snode is not None:
8352         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8353         nodelist.append(self.op.snode)
8354       self.needed_locks[locking.LEVEL_NODE] = nodelist
8355
8356     # in case of import lock the source node too
8357     if self.op.mode == constants.INSTANCE_IMPORT:
8358       src_node = self.op.src_node
8359       src_path = self.op.src_path
8360
8361       if src_path is None:
8362         self.op.src_path = src_path = self.op.instance_name
8363
8364       if src_node is None:
8365         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8366         self.op.src_node = None
8367         if os.path.isabs(src_path):
8368           raise errors.OpPrereqError("Importing an instance from a path"
8369                                      " requires a source node option",
8370                                      errors.ECODE_INVAL)
8371       else:
8372         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8373         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8374           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8375         if not os.path.isabs(src_path):
8376           self.op.src_path = src_path = \
8377             utils.PathJoin(constants.EXPORT_DIR, src_path)
8378
8379   def _RunAllocator(self):
8380     """Run the allocator based on input opcode.
8381
8382     """
8383     nics = [n.ToDict() for n in self.nics]
8384     ial = IAllocator(self.cfg, self.rpc,
8385                      mode=constants.IALLOCATOR_MODE_ALLOC,
8386                      name=self.op.instance_name,
8387                      disk_template=self.op.disk_template,
8388                      tags=self.op.tags,
8389                      os=self.op.os_type,
8390                      vcpus=self.be_full[constants.BE_VCPUS],
8391                      memory=self.be_full[constants.BE_MEMORY],
8392                      disks=self.disks,
8393                      nics=nics,
8394                      hypervisor=self.op.hypervisor,
8395                      )
8396
8397     ial.Run(self.op.iallocator)
8398
8399     if not ial.success:
8400       raise errors.OpPrereqError("Can't compute nodes using"
8401                                  " iallocator '%s': %s" %
8402                                  (self.op.iallocator, ial.info),
8403                                  errors.ECODE_NORES)
8404     if len(ial.result) != ial.required_nodes:
8405       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8406                                  " of nodes (%s), required %s" %
8407                                  (self.op.iallocator, len(ial.result),
8408                                   ial.required_nodes), errors.ECODE_FAULT)
8409     self.op.pnode = ial.result[0]
8410     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8411                  self.op.instance_name, self.op.iallocator,
8412                  utils.CommaJoin(ial.result))
8413     if ial.required_nodes == 2:
8414       self.op.snode = ial.result[1]
8415
8416   def BuildHooksEnv(self):
8417     """Build hooks env.
8418
8419     This runs on master, primary and secondary nodes of the instance.
8420
8421     """
8422     env = {
8423       "ADD_MODE": self.op.mode,
8424       }
8425     if self.op.mode == constants.INSTANCE_IMPORT:
8426       env["SRC_NODE"] = self.op.src_node
8427       env["SRC_PATH"] = self.op.src_path
8428       env["SRC_IMAGES"] = self.src_images
8429
8430     env.update(_BuildInstanceHookEnv(
8431       name=self.op.instance_name,
8432       primary_node=self.op.pnode,
8433       secondary_nodes=self.secondaries,
8434       status=self.op.start,
8435       os_type=self.op.os_type,
8436       memory=self.be_full[constants.BE_MEMORY],
8437       vcpus=self.be_full[constants.BE_VCPUS],
8438       nics=_NICListToTuple(self, self.nics),
8439       disk_template=self.op.disk_template,
8440       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8441              for d in self.disks],
8442       bep=self.be_full,
8443       hvp=self.hv_full,
8444       hypervisor_name=self.op.hypervisor,
8445       tags=self.op.tags,
8446     ))
8447
8448     return env
8449
8450   def BuildHooksNodes(self):
8451     """Build hooks nodes.
8452
8453     """
8454     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8455     return nl, nl
8456
8457   def _ReadExportInfo(self):
8458     """Reads the export information from disk.
8459
8460     It will override the opcode source node and path with the actual
8461     information, if these two were not specified before.
8462
8463     @return: the export information
8464
8465     """
8466     assert self.op.mode == constants.INSTANCE_IMPORT
8467
8468     src_node = self.op.src_node
8469     src_path = self.op.src_path
8470
8471     if src_node is None:
8472       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8473       exp_list = self.rpc.call_export_list(locked_nodes)
8474       found = False
8475       for node in exp_list:
8476         if exp_list[node].fail_msg:
8477           continue
8478         if src_path in exp_list[node].payload:
8479           found = True
8480           self.op.src_node = src_node = node
8481           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8482                                                        src_path)
8483           break
8484       if not found:
8485         raise errors.OpPrereqError("No export found for relative path %s" %
8486                                     src_path, errors.ECODE_INVAL)
8487
8488     _CheckNodeOnline(self, src_node)
8489     result = self.rpc.call_export_info(src_node, src_path)
8490     result.Raise("No export or invalid export found in dir %s" % src_path)
8491
8492     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8493     if not export_info.has_section(constants.INISECT_EXP):
8494       raise errors.ProgrammerError("Corrupted export config",
8495                                    errors.ECODE_ENVIRON)
8496
8497     ei_version = export_info.get(constants.INISECT_EXP, "version")
8498     if (int(ei_version) != constants.EXPORT_VERSION):
8499       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8500                                  (ei_version, constants.EXPORT_VERSION),
8501                                  errors.ECODE_ENVIRON)
8502     return export_info
8503
8504   def _ReadExportParams(self, einfo):
8505     """Use export parameters as defaults.
8506
8507     In case the opcode doesn't specify (as in override) some instance
8508     parameters, then try to use them from the export information, if
8509     that declares them.
8510
8511     """
8512     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8513
8514     if self.op.disk_template is None:
8515       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8516         self.op.disk_template = einfo.get(constants.INISECT_INS,
8517                                           "disk_template")
8518         if self.op.disk_template not in constants.DISK_TEMPLATES:
8519           raise errors.OpPrereqError("Disk template specified in configuration"
8520                                      " file is not one of the allowed values:"
8521                                      " %s" % " ".join(constants.DISK_TEMPLATES))
8522       else:
8523         raise errors.OpPrereqError("No disk template specified and the export"
8524                                    " is missing the disk_template information",
8525                                    errors.ECODE_INVAL)
8526
8527     if not self.op.disks:
8528       disks = []
8529       # TODO: import the disk iv_name too
8530       for idx in range(constants.MAX_DISKS):
8531         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8532           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8533           disks.append({constants.IDISK_SIZE: disk_sz})
8534       self.op.disks = disks
8535       if not disks and self.op.disk_template != constants.DT_DISKLESS:
8536         raise errors.OpPrereqError("No disk info specified and the export"
8537                                    " is missing the disk information",
8538                                    errors.ECODE_INVAL)
8539
8540     if not self.op.nics:
8541       nics = []
8542       for idx in range(constants.MAX_NICS):
8543         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8544           ndict = {}
8545           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8546             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8547             ndict[name] = v
8548           nics.append(ndict)
8549         else:
8550           break
8551       self.op.nics = nics
8552
8553     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8554       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8555
8556     if (self.op.hypervisor is None and
8557         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8558       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8559
8560     if einfo.has_section(constants.INISECT_HYP):
8561       # use the export parameters but do not override the ones
8562       # specified by the user
8563       for name, value in einfo.items(constants.INISECT_HYP):
8564         if name not in self.op.hvparams:
8565           self.op.hvparams[name] = value
8566
8567     if einfo.has_section(constants.INISECT_BEP):
8568       # use the parameters, without overriding
8569       for name, value in einfo.items(constants.INISECT_BEP):
8570         if name not in self.op.beparams:
8571           self.op.beparams[name] = value
8572     else:
8573       # try to read the parameters old style, from the main section
8574       for name in constants.BES_PARAMETERS:
8575         if (name not in self.op.beparams and
8576             einfo.has_option(constants.INISECT_INS, name)):
8577           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8578
8579     if einfo.has_section(constants.INISECT_OSP):
8580       # use the parameters, without overriding
8581       for name, value in einfo.items(constants.INISECT_OSP):
8582         if name not in self.op.osparams:
8583           self.op.osparams[name] = value
8584
8585   def _RevertToDefaults(self, cluster):
8586     """Revert the instance parameters to the default values.
8587
8588     """
8589     # hvparams
8590     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8591     for name in self.op.hvparams.keys():
8592       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8593         del self.op.hvparams[name]
8594     # beparams
8595     be_defs = cluster.SimpleFillBE({})
8596     for name in self.op.beparams.keys():
8597       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8598         del self.op.beparams[name]
8599     # nic params
8600     nic_defs = cluster.SimpleFillNIC({})
8601     for nic in self.op.nics:
8602       for name in constants.NICS_PARAMETERS:
8603         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8604           del nic[name]
8605     # osparams
8606     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8607     for name in self.op.osparams.keys():
8608       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8609         del self.op.osparams[name]
8610
8611   def _CalculateFileStorageDir(self):
8612     """Calculate final instance file storage dir.
8613
8614     """
8615     # file storage dir calculation/check
8616     self.instance_file_storage_dir = None
8617     if self.op.disk_template in constants.DTS_FILEBASED:
8618       # build the full file storage dir path
8619       joinargs = []
8620
8621       if self.op.disk_template == constants.DT_SHARED_FILE:
8622         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8623       else:
8624         get_fsd_fn = self.cfg.GetFileStorageDir
8625
8626       cfg_storagedir = get_fsd_fn()
8627       if not cfg_storagedir:
8628         raise errors.OpPrereqError("Cluster file storage dir not defined")
8629       joinargs.append(cfg_storagedir)
8630
8631       if self.op.file_storage_dir is not None:
8632         joinargs.append(self.op.file_storage_dir)
8633
8634       joinargs.append(self.op.instance_name)
8635
8636       # pylint: disable=W0142
8637       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8638
8639   def CheckPrereq(self):
8640     """Check prerequisites.
8641
8642     """
8643     self._CalculateFileStorageDir()
8644
8645     if self.op.mode == constants.INSTANCE_IMPORT:
8646       export_info = self._ReadExportInfo()
8647       self._ReadExportParams(export_info)
8648
8649     if (not self.cfg.GetVGName() and
8650         self.op.disk_template not in constants.DTS_NOT_LVM):
8651       raise errors.OpPrereqError("Cluster does not support lvm-based"
8652                                  " instances", errors.ECODE_STATE)
8653
8654     if (self.op.hypervisor is None or
8655         self.op.hypervisor == constants.VALUE_AUTO):
8656       self.op.hypervisor = self.cfg.GetHypervisorType()
8657
8658     cluster = self.cfg.GetClusterInfo()
8659     enabled_hvs = cluster.enabled_hypervisors
8660     if self.op.hypervisor not in enabled_hvs:
8661       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8662                                  " cluster (%s)" % (self.op.hypervisor,
8663                                   ",".join(enabled_hvs)),
8664                                  errors.ECODE_STATE)
8665
8666     # Check tag validity
8667     for tag in self.op.tags:
8668       objects.TaggableObject.ValidateTag(tag)
8669
8670     # check hypervisor parameter syntax (locally)
8671     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8672     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8673                                       self.op.hvparams)
8674     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8675     hv_type.CheckParameterSyntax(filled_hvp)
8676     self.hv_full = filled_hvp
8677     # check that we don't specify global parameters on an instance
8678     _CheckGlobalHvParams(self.op.hvparams)
8679
8680     # fill and remember the beparams dict
8681     default_beparams = cluster.beparams[constants.PP_DEFAULT]
8682     for param, value in self.op.beparams.iteritems():
8683       if value == constants.VALUE_AUTO:
8684         self.op.beparams[param] = default_beparams[param]
8685     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8686     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8687
8688     # build os parameters
8689     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8690
8691     # now that hvp/bep are in final format, let's reset to defaults,
8692     # if told to do so
8693     if self.op.identify_defaults:
8694       self._RevertToDefaults(cluster)
8695
8696     # NIC buildup
8697     self.nics = []
8698     for idx, nic in enumerate(self.op.nics):
8699       nic_mode_req = nic.get(constants.INIC_MODE, None)
8700       nic_mode = nic_mode_req
8701       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8702         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8703
8704       # in routed mode, for the first nic, the default ip is 'auto'
8705       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8706         default_ip_mode = constants.VALUE_AUTO
8707       else:
8708         default_ip_mode = constants.VALUE_NONE
8709
8710       # ip validity checks
8711       ip = nic.get(constants.INIC_IP, default_ip_mode)
8712       if ip is None or ip.lower() == constants.VALUE_NONE:
8713         nic_ip = None
8714       elif ip.lower() == constants.VALUE_AUTO:
8715         if not self.op.name_check:
8716           raise errors.OpPrereqError("IP address set to auto but name checks"
8717                                      " have been skipped",
8718                                      errors.ECODE_INVAL)
8719         nic_ip = self.hostname1.ip
8720       else:
8721         if not netutils.IPAddress.IsValid(ip):
8722           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8723                                      errors.ECODE_INVAL)
8724         nic_ip = ip
8725
8726       # TODO: check the ip address for uniqueness
8727       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8728         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8729                                    errors.ECODE_INVAL)
8730
8731       # MAC address verification
8732       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8733       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8734         mac = utils.NormalizeAndValidateMac(mac)
8735
8736         try:
8737           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8738         except errors.ReservationError:
8739           raise errors.OpPrereqError("MAC address %s already in use"
8740                                      " in cluster" % mac,
8741                                      errors.ECODE_NOTUNIQUE)
8742
8743       #  Build nic parameters
8744       link = nic.get(constants.INIC_LINK, None)
8745       if link == constants.VALUE_AUTO:
8746         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8747       nicparams = {}
8748       if nic_mode_req:
8749         nicparams[constants.NIC_MODE] = nic_mode
8750       if link:
8751         nicparams[constants.NIC_LINK] = link
8752
8753       check_params = cluster.SimpleFillNIC(nicparams)
8754       objects.NIC.CheckParameterSyntax(check_params)
8755       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8756
8757     # disk checks/pre-build
8758     default_vg = self.cfg.GetVGName()
8759     self.disks = []
8760     for disk in self.op.disks:
8761       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8762       if mode not in constants.DISK_ACCESS_SET:
8763         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8764                                    mode, errors.ECODE_INVAL)
8765       size = disk.get(constants.IDISK_SIZE, None)
8766       if size is None:
8767         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8768       try:
8769         size = int(size)
8770       except (TypeError, ValueError):
8771         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8772                                    errors.ECODE_INVAL)
8773
8774       data_vg = disk.get(constants.IDISK_VG, default_vg)
8775       new_disk = {
8776         constants.IDISK_SIZE: size,
8777         constants.IDISK_MODE: mode,
8778         constants.IDISK_VG: data_vg,
8779         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8780         }
8781       if constants.IDISK_ADOPT in disk:
8782         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8783       self.disks.append(new_disk)
8784
8785     if self.op.mode == constants.INSTANCE_IMPORT:
8786       disk_images = []
8787       for idx in range(len(self.disks)):
8788         option = "disk%d_dump" % idx
8789         if export_info.has_option(constants.INISECT_INS, option):
8790           # FIXME: are the old os-es, disk sizes, etc. useful?
8791           export_name = export_info.get(constants.INISECT_INS, option)
8792           image = utils.PathJoin(self.op.src_path, export_name)
8793           disk_images.append(image)
8794         else:
8795           disk_images.append(False)
8796
8797       self.src_images = disk_images
8798
8799       old_name = export_info.get(constants.INISECT_INS, "name")
8800       if self.op.instance_name == old_name:
8801         for idx, nic in enumerate(self.nics):
8802           if nic.mac == constants.VALUE_AUTO:
8803             nic_mac_ini = "nic%d_mac" % idx
8804             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8805
8806     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8807
8808     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8809     if self.op.ip_check:
8810       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8811         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8812                                    (self.check_ip, self.op.instance_name),
8813                                    errors.ECODE_NOTUNIQUE)
8814
8815     #### mac address generation
8816     # By generating here the mac address both the allocator and the hooks get
8817     # the real final mac address rather than the 'auto' or 'generate' value.
8818     # There is a race condition between the generation and the instance object
8819     # creation, which means that we know the mac is valid now, but we're not
8820     # sure it will be when we actually add the instance. If things go bad
8821     # adding the instance will abort because of a duplicate mac, and the
8822     # creation job will fail.
8823     for nic in self.nics:
8824       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8825         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8826
8827     #### allocator run
8828
8829     if self.op.iallocator is not None:
8830       self._RunAllocator()
8831
8832     #### node related checks
8833
8834     # check primary node
8835     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8836     assert self.pnode is not None, \
8837       "Cannot retrieve locked node %s" % self.op.pnode
8838     if pnode.offline:
8839       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8840                                  pnode.name, errors.ECODE_STATE)
8841     if pnode.drained:
8842       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8843                                  pnode.name, errors.ECODE_STATE)
8844     if not pnode.vm_capable:
8845       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8846                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8847
8848     self.secondaries = []
8849
8850     # mirror node verification
8851     if self.op.disk_template in constants.DTS_INT_MIRROR:
8852       if self.op.snode == pnode.name:
8853         raise errors.OpPrereqError("The secondary node cannot be the"
8854                                    " primary node", errors.ECODE_INVAL)
8855       _CheckNodeOnline(self, self.op.snode)
8856       _CheckNodeNotDrained(self, self.op.snode)
8857       _CheckNodeVmCapable(self, self.op.snode)
8858       self.secondaries.append(self.op.snode)
8859
8860     nodenames = [pnode.name] + self.secondaries
8861
8862     if not self.adopt_disks:
8863       # Check lv size requirements, if not adopting
8864       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8865       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8866
8867     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8868       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8869                                 disk[constants.IDISK_ADOPT])
8870                      for disk in self.disks])
8871       if len(all_lvs) != len(self.disks):
8872         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8873                                    errors.ECODE_INVAL)
8874       for lv_name in all_lvs:
8875         try:
8876           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8877           # to ReserveLV uses the same syntax
8878           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8879         except errors.ReservationError:
8880           raise errors.OpPrereqError("LV named %s used by another instance" %
8881                                      lv_name, errors.ECODE_NOTUNIQUE)
8882
8883       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8884       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8885
8886       node_lvs = self.rpc.call_lv_list([pnode.name],
8887                                        vg_names.payload.keys())[pnode.name]
8888       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8889       node_lvs = node_lvs.payload
8890
8891       delta = all_lvs.difference(node_lvs.keys())
8892       if delta:
8893         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8894                                    utils.CommaJoin(delta),
8895                                    errors.ECODE_INVAL)
8896       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8897       if online_lvs:
8898         raise errors.OpPrereqError("Online logical volumes found, cannot"
8899                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8900                                    errors.ECODE_STATE)
8901       # update the size of disk based on what is found
8902       for dsk in self.disks:
8903         dsk[constants.IDISK_SIZE] = \
8904           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8905                                         dsk[constants.IDISK_ADOPT])][0]))
8906
8907     elif self.op.disk_template == constants.DT_BLOCK:
8908       # Normalize and de-duplicate device paths
8909       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8910                        for disk in self.disks])
8911       if len(all_disks) != len(self.disks):
8912         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8913                                    errors.ECODE_INVAL)
8914       baddisks = [d for d in all_disks
8915                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8916       if baddisks:
8917         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8918                                    " cannot be adopted" %
8919                                    (", ".join(baddisks),
8920                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8921                                    errors.ECODE_INVAL)
8922
8923       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8924                                             list(all_disks))[pnode.name]
8925       node_disks.Raise("Cannot get block device information from node %s" %
8926                        pnode.name)
8927       node_disks = node_disks.payload
8928       delta = all_disks.difference(node_disks.keys())
8929       if delta:
8930         raise errors.OpPrereqError("Missing block device(s): %s" %
8931                                    utils.CommaJoin(delta),
8932                                    errors.ECODE_INVAL)
8933       for dsk in self.disks:
8934         dsk[constants.IDISK_SIZE] = \
8935           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8936
8937     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8938
8939     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8940     # check OS parameters (remotely)
8941     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8942
8943     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8944
8945     # memory check on primary node
8946     if self.op.start:
8947       _CheckNodeFreeMemory(self, self.pnode.name,
8948                            "creating instance %s" % self.op.instance_name,
8949                            self.be_full[constants.BE_MEMORY],
8950                            self.op.hypervisor)
8951
8952     self.dry_run_result = list(nodenames)
8953
8954   def Exec(self, feedback_fn):
8955     """Create and add the instance to the cluster.
8956
8957     """
8958     instance = self.op.instance_name
8959     pnode_name = self.pnode.name
8960
8961     ht_kind = self.op.hypervisor
8962     if ht_kind in constants.HTS_REQ_PORT:
8963       network_port = self.cfg.AllocatePort()
8964     else:
8965       network_port = None
8966
8967     disks = _GenerateDiskTemplate(self,
8968                                   self.op.disk_template,
8969                                   instance, pnode_name,
8970                                   self.secondaries,
8971                                   self.disks,
8972                                   self.instance_file_storage_dir,
8973                                   self.op.file_driver,
8974                                   0,
8975                                   feedback_fn)
8976
8977     iobj = objects.Instance(name=instance, os=self.op.os_type,
8978                             primary_node=pnode_name,
8979                             nics=self.nics, disks=disks,
8980                             disk_template=self.op.disk_template,
8981                             admin_up=False,
8982                             network_port=network_port,
8983                             beparams=self.op.beparams,
8984                             hvparams=self.op.hvparams,
8985                             hypervisor=self.op.hypervisor,
8986                             osparams=self.op.osparams,
8987                             )
8988
8989     if self.op.tags:
8990       for tag in self.op.tags:
8991         iobj.AddTag(tag)
8992
8993     if self.adopt_disks:
8994       if self.op.disk_template == constants.DT_PLAIN:
8995         # rename LVs to the newly-generated names; we need to construct
8996         # 'fake' LV disks with the old data, plus the new unique_id
8997         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8998         rename_to = []
8999         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9000           rename_to.append(t_dsk.logical_id)
9001           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9002           self.cfg.SetDiskID(t_dsk, pnode_name)
9003         result = self.rpc.call_blockdev_rename(pnode_name,
9004                                                zip(tmp_disks, rename_to))
9005         result.Raise("Failed to rename adoped LVs")
9006     else:
9007       feedback_fn("* creating instance disks...")
9008       try:
9009         _CreateDisks(self, iobj)
9010       except errors.OpExecError:
9011         self.LogWarning("Device creation failed, reverting...")
9012         try:
9013           _RemoveDisks(self, iobj)
9014         finally:
9015           self.cfg.ReleaseDRBDMinors(instance)
9016           raise
9017
9018     feedback_fn("adding instance %s to cluster config" % instance)
9019
9020     self.cfg.AddInstance(iobj, self.proc.GetECId())
9021
9022     # Declare that we don't want to remove the instance lock anymore, as we've
9023     # added the instance to the config
9024     del self.remove_locks[locking.LEVEL_INSTANCE]
9025
9026     if self.op.mode == constants.INSTANCE_IMPORT:
9027       # Release unused nodes
9028       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9029     else:
9030       # Release all nodes
9031       _ReleaseLocks(self, locking.LEVEL_NODE)
9032
9033     disk_abort = False
9034     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9035       feedback_fn("* wiping instance disks...")
9036       try:
9037         _WipeDisks(self, iobj)
9038       except errors.OpExecError, err:
9039         logging.exception("Wiping disks failed")
9040         self.LogWarning("Wiping instance disks failed (%s)", err)
9041         disk_abort = True
9042
9043     if disk_abort:
9044       # Something is already wrong with the disks, don't do anything else
9045       pass
9046     elif self.op.wait_for_sync:
9047       disk_abort = not _WaitForSync(self, iobj)
9048     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9049       # make sure the disks are not degraded (still sync-ing is ok)
9050       feedback_fn("* checking mirrors status")
9051       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9052     else:
9053       disk_abort = False
9054
9055     if disk_abort:
9056       _RemoveDisks(self, iobj)
9057       self.cfg.RemoveInstance(iobj.name)
9058       # Make sure the instance lock gets removed
9059       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9060       raise errors.OpExecError("There are some degraded disks for"
9061                                " this instance")
9062
9063     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9064       if self.op.mode == constants.INSTANCE_CREATE:
9065         if not self.op.no_install:
9066           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9067                         not self.op.wait_for_sync)
9068           if pause_sync:
9069             feedback_fn("* pausing disk sync to install instance OS")
9070             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9071                                                               iobj.disks, True)
9072             for idx, success in enumerate(result.payload):
9073               if not success:
9074                 logging.warn("pause-sync of instance %s for disk %d failed",
9075                              instance, idx)
9076
9077           feedback_fn("* running the instance OS create scripts...")
9078           # FIXME: pass debug option from opcode to backend
9079           os_add_result = \
9080             self.rpc.call_instance_os_add(pnode_name, iobj, False,
9081                                           self.op.debug_level)
9082           if pause_sync:
9083             feedback_fn("* resuming disk sync")
9084             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9085                                                               iobj.disks, False)
9086             for idx, success in enumerate(result.payload):
9087               if not success:
9088                 logging.warn("resume-sync of instance %s for disk %d failed",
9089                              instance, idx)
9090
9091           os_add_result.Raise("Could not add os for instance %s"
9092                               " on node %s" % (instance, pnode_name))
9093
9094       elif self.op.mode == constants.INSTANCE_IMPORT:
9095         feedback_fn("* running the instance OS import scripts...")
9096
9097         transfers = []
9098
9099         for idx, image in enumerate(self.src_images):
9100           if not image:
9101             continue
9102
9103           # FIXME: pass debug option from opcode to backend
9104           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9105                                              constants.IEIO_FILE, (image, ),
9106                                              constants.IEIO_SCRIPT,
9107                                              (iobj.disks[idx], idx),
9108                                              None)
9109           transfers.append(dt)
9110
9111         import_result = \
9112           masterd.instance.TransferInstanceData(self, feedback_fn,
9113                                                 self.op.src_node, pnode_name,
9114                                                 self.pnode.secondary_ip,
9115                                                 iobj, transfers)
9116         if not compat.all(import_result):
9117           self.LogWarning("Some disks for instance %s on node %s were not"
9118                           " imported successfully" % (instance, pnode_name))
9119
9120       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9121         feedback_fn("* preparing remote import...")
9122         # The source cluster will stop the instance before attempting to make a
9123         # connection. In some cases stopping an instance can take a long time,
9124         # hence the shutdown timeout is added to the connection timeout.
9125         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9126                            self.op.source_shutdown_timeout)
9127         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9128
9129         assert iobj.primary_node == self.pnode.name
9130         disk_results = \
9131           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9132                                         self.source_x509_ca,
9133                                         self._cds, timeouts)
9134         if not compat.all(disk_results):
9135           # TODO: Should the instance still be started, even if some disks
9136           # failed to import (valid for local imports, too)?
9137           self.LogWarning("Some disks for instance %s on node %s were not"
9138                           " imported successfully" % (instance, pnode_name))
9139
9140         # Run rename script on newly imported instance
9141         assert iobj.name == instance
9142         feedback_fn("Running rename script for %s" % instance)
9143         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9144                                                    self.source_instance_name,
9145                                                    self.op.debug_level)
9146         if result.fail_msg:
9147           self.LogWarning("Failed to run rename script for %s on node"
9148                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9149
9150       else:
9151         # also checked in the prereq part
9152         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9153                                      % self.op.mode)
9154
9155     if self.op.start:
9156       iobj.admin_up = True
9157       self.cfg.Update(iobj, feedback_fn)
9158       logging.info("Starting instance %s on node %s", instance, pnode_name)
9159       feedback_fn("* starting instance...")
9160       result = self.rpc.call_instance_start(pnode_name, iobj,
9161                                             None, None, False)
9162       result.Raise("Could not start instance")
9163
9164     return list(iobj.all_nodes)
9165
9166
9167 class LUInstanceConsole(NoHooksLU):
9168   """Connect to an instance's console.
9169
9170   This is somewhat special in that it returns the command line that
9171   you need to run on the master node in order to connect to the
9172   console.
9173
9174   """
9175   REQ_BGL = False
9176
9177   def ExpandNames(self):
9178     self._ExpandAndLockInstance()
9179
9180   def CheckPrereq(self):
9181     """Check prerequisites.
9182
9183     This checks that the instance is in the cluster.
9184
9185     """
9186     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9187     assert self.instance is not None, \
9188       "Cannot retrieve locked instance %s" % self.op.instance_name
9189     _CheckNodeOnline(self, self.instance.primary_node)
9190
9191   def Exec(self, feedback_fn):
9192     """Connect to the console of an instance
9193
9194     """
9195     instance = self.instance
9196     node = instance.primary_node
9197
9198     node_insts = self.rpc.call_instance_list([node],
9199                                              [instance.hypervisor])[node]
9200     node_insts.Raise("Can't get node information from %s" % node)
9201
9202     if instance.name not in node_insts.payload:
9203       if instance.admin_up:
9204         state = constants.INSTST_ERRORDOWN
9205       else:
9206         state = constants.INSTST_ADMINDOWN
9207       raise errors.OpExecError("Instance %s is not running (state %s)" %
9208                                (instance.name, state))
9209
9210     logging.debug("Connecting to console of %s on %s", instance.name, node)
9211
9212     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9213
9214
9215 def _GetInstanceConsole(cluster, instance):
9216   """Returns console information for an instance.
9217
9218   @type cluster: L{objects.Cluster}
9219   @type instance: L{objects.Instance}
9220   @rtype: dict
9221
9222   """
9223   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9224   # beparams and hvparams are passed separately, to avoid editing the
9225   # instance and then saving the defaults in the instance itself.
9226   hvparams = cluster.FillHV(instance)
9227   beparams = cluster.FillBE(instance)
9228   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9229
9230   assert console.instance == instance.name
9231   assert console.Validate()
9232
9233   return console.ToDict()
9234
9235
9236 class LUInstanceReplaceDisks(LogicalUnit):
9237   """Replace the disks of an instance.
9238
9239   """
9240   HPATH = "mirrors-replace"
9241   HTYPE = constants.HTYPE_INSTANCE
9242   REQ_BGL = False
9243
9244   def CheckArguments(self):
9245     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9246                                   self.op.iallocator)
9247
9248   def ExpandNames(self):
9249     self._ExpandAndLockInstance()
9250
9251     assert locking.LEVEL_NODE not in self.needed_locks
9252     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9253
9254     assert self.op.iallocator is None or self.op.remote_node is None, \
9255       "Conflicting options"
9256
9257     if self.op.remote_node is not None:
9258       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9259
9260       # Warning: do not remove the locking of the new secondary here
9261       # unless DRBD8.AddChildren is changed to work in parallel;
9262       # currently it doesn't since parallel invocations of
9263       # FindUnusedMinor will conflict
9264       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9265       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9266     else:
9267       self.needed_locks[locking.LEVEL_NODE] = []
9268       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9269
9270       if self.op.iallocator is not None:
9271         # iallocator will select a new node in the same group
9272         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9273
9274     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9275                                    self.op.iallocator, self.op.remote_node,
9276                                    self.op.disks, False, self.op.early_release)
9277
9278     self.tasklets = [self.replacer]
9279
9280   def DeclareLocks(self, level):
9281     if level == locking.LEVEL_NODEGROUP:
9282       assert self.op.remote_node is None
9283       assert self.op.iallocator is not None
9284       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9285
9286       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9287       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9288         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9289
9290     elif level == locking.LEVEL_NODE:
9291       if self.op.iallocator is not None:
9292         assert self.op.remote_node is None
9293         assert not self.needed_locks[locking.LEVEL_NODE]
9294
9295         # Lock member nodes of all locked groups
9296         self.needed_locks[locking.LEVEL_NODE] = [node_name
9297           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9298           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9299       else:
9300         self._LockInstancesNodes()
9301
9302   def BuildHooksEnv(self):
9303     """Build hooks env.
9304
9305     This runs on the master, the primary and all the secondaries.
9306
9307     """
9308     instance = self.replacer.instance
9309     env = {
9310       "MODE": self.op.mode,
9311       "NEW_SECONDARY": self.op.remote_node,
9312       "OLD_SECONDARY": instance.secondary_nodes[0],
9313       }
9314     env.update(_BuildInstanceHookEnvByObject(self, instance))
9315     return env
9316
9317   def BuildHooksNodes(self):
9318     """Build hooks nodes.
9319
9320     """
9321     instance = self.replacer.instance
9322     nl = [
9323       self.cfg.GetMasterNode(),
9324       instance.primary_node,
9325       ]
9326     if self.op.remote_node is not None:
9327       nl.append(self.op.remote_node)
9328     return nl, nl
9329
9330   def CheckPrereq(self):
9331     """Check prerequisites.
9332
9333     """
9334     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9335             self.op.iallocator is None)
9336
9337     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9338     if owned_groups:
9339       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9340
9341     return LogicalUnit.CheckPrereq(self)
9342
9343
9344 class TLReplaceDisks(Tasklet):
9345   """Replaces disks for an instance.
9346
9347   Note: Locking is not within the scope of this class.
9348
9349   """
9350   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9351                disks, delay_iallocator, early_release):
9352     """Initializes this class.
9353
9354     """
9355     Tasklet.__init__(self, lu)
9356
9357     # Parameters
9358     self.instance_name = instance_name
9359     self.mode = mode
9360     self.iallocator_name = iallocator_name
9361     self.remote_node = remote_node
9362     self.disks = disks
9363     self.delay_iallocator = delay_iallocator
9364     self.early_release = early_release
9365
9366     # Runtime data
9367     self.instance = None
9368     self.new_node = None
9369     self.target_node = None
9370     self.other_node = None
9371     self.remote_node_info = None
9372     self.node_secondary_ip = None
9373
9374   @staticmethod
9375   def CheckArguments(mode, remote_node, iallocator):
9376     """Helper function for users of this class.
9377
9378     """
9379     # check for valid parameter combination
9380     if mode == constants.REPLACE_DISK_CHG:
9381       if remote_node is None and iallocator is None:
9382         raise errors.OpPrereqError("When changing the secondary either an"
9383                                    " iallocator script must be used or the"
9384                                    " new node given", errors.ECODE_INVAL)
9385
9386       if remote_node is not None and iallocator is not None:
9387         raise errors.OpPrereqError("Give either the iallocator or the new"
9388                                    " secondary, not both", errors.ECODE_INVAL)
9389
9390     elif remote_node is not None or iallocator is not None:
9391       # Not replacing the secondary
9392       raise errors.OpPrereqError("The iallocator and new node options can"
9393                                  " only be used when changing the"
9394                                  " secondary node", errors.ECODE_INVAL)
9395
9396   @staticmethod
9397   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9398     """Compute a new secondary node using an IAllocator.
9399
9400     """
9401     ial = IAllocator(lu.cfg, lu.rpc,
9402                      mode=constants.IALLOCATOR_MODE_RELOC,
9403                      name=instance_name,
9404                      relocate_from=list(relocate_from))
9405
9406     ial.Run(iallocator_name)
9407
9408     if not ial.success:
9409       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9410                                  " %s" % (iallocator_name, ial.info),
9411                                  errors.ECODE_NORES)
9412
9413     if len(ial.result) != ial.required_nodes:
9414       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9415                                  " of nodes (%s), required %s" %
9416                                  (iallocator_name,
9417                                   len(ial.result), ial.required_nodes),
9418                                  errors.ECODE_FAULT)
9419
9420     remote_node_name = ial.result[0]
9421
9422     lu.LogInfo("Selected new secondary for instance '%s': %s",
9423                instance_name, remote_node_name)
9424
9425     return remote_node_name
9426
9427   def _FindFaultyDisks(self, node_name):
9428     """Wrapper for L{_FindFaultyInstanceDisks}.
9429
9430     """
9431     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9432                                     node_name, True)
9433
9434   def _CheckDisksActivated(self, instance):
9435     """Checks if the instance disks are activated.
9436
9437     @param instance: The instance to check disks
9438     @return: True if they are activated, False otherwise
9439
9440     """
9441     nodes = instance.all_nodes
9442
9443     for idx, dev in enumerate(instance.disks):
9444       for node in nodes:
9445         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9446         self.cfg.SetDiskID(dev, node)
9447
9448         result = self.rpc.call_blockdev_find(node, dev)
9449
9450         if result.offline:
9451           continue
9452         elif result.fail_msg or not result.payload:
9453           return False
9454
9455     return True
9456
9457   def CheckPrereq(self):
9458     """Check prerequisites.
9459
9460     This checks that the instance is in the cluster.
9461
9462     """
9463     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9464     assert instance is not None, \
9465       "Cannot retrieve locked instance %s" % self.instance_name
9466
9467     if instance.disk_template != constants.DT_DRBD8:
9468       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9469                                  " instances", errors.ECODE_INVAL)
9470
9471     if len(instance.secondary_nodes) != 1:
9472       raise errors.OpPrereqError("The instance has a strange layout,"
9473                                  " expected one secondary but found %d" %
9474                                  len(instance.secondary_nodes),
9475                                  errors.ECODE_FAULT)
9476
9477     if not self.delay_iallocator:
9478       self._CheckPrereq2()
9479
9480   def _CheckPrereq2(self):
9481     """Check prerequisites, second part.
9482
9483     This function should always be part of CheckPrereq. It was separated and is
9484     now called from Exec because during node evacuation iallocator was only
9485     called with an unmodified cluster model, not taking planned changes into
9486     account.
9487
9488     """
9489     instance = self.instance
9490     secondary_node = instance.secondary_nodes[0]
9491
9492     if self.iallocator_name is None:
9493       remote_node = self.remote_node
9494     else:
9495       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9496                                        instance.name, instance.secondary_nodes)
9497
9498     if remote_node is None:
9499       self.remote_node_info = None
9500     else:
9501       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9502              "Remote node '%s' is not locked" % remote_node
9503
9504       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9505       assert self.remote_node_info is not None, \
9506         "Cannot retrieve locked node %s" % remote_node
9507
9508     if remote_node == self.instance.primary_node:
9509       raise errors.OpPrereqError("The specified node is the primary node of"
9510                                  " the instance", errors.ECODE_INVAL)
9511
9512     if remote_node == secondary_node:
9513       raise errors.OpPrereqError("The specified node is already the"
9514                                  " secondary node of the instance",
9515                                  errors.ECODE_INVAL)
9516
9517     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9518                                     constants.REPLACE_DISK_CHG):
9519       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9520                                  errors.ECODE_INVAL)
9521
9522     if self.mode == constants.REPLACE_DISK_AUTO:
9523       if not self._CheckDisksActivated(instance):
9524         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9525                                    " first" % self.instance_name,
9526                                    errors.ECODE_STATE)
9527       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9528       faulty_secondary = self._FindFaultyDisks(secondary_node)
9529
9530       if faulty_primary and faulty_secondary:
9531         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9532                                    " one node and can not be repaired"
9533                                    " automatically" % self.instance_name,
9534                                    errors.ECODE_STATE)
9535
9536       if faulty_primary:
9537         self.disks = faulty_primary
9538         self.target_node = instance.primary_node
9539         self.other_node = secondary_node
9540         check_nodes = [self.target_node, self.other_node]
9541       elif faulty_secondary:
9542         self.disks = faulty_secondary
9543         self.target_node = secondary_node
9544         self.other_node = instance.primary_node
9545         check_nodes = [self.target_node, self.other_node]
9546       else:
9547         self.disks = []
9548         check_nodes = []
9549
9550     else:
9551       # Non-automatic modes
9552       if self.mode == constants.REPLACE_DISK_PRI:
9553         self.target_node = instance.primary_node
9554         self.other_node = secondary_node
9555         check_nodes = [self.target_node, self.other_node]
9556
9557       elif self.mode == constants.REPLACE_DISK_SEC:
9558         self.target_node = secondary_node
9559         self.other_node = instance.primary_node
9560         check_nodes = [self.target_node, self.other_node]
9561
9562       elif self.mode == constants.REPLACE_DISK_CHG:
9563         self.new_node = remote_node
9564         self.other_node = instance.primary_node
9565         self.target_node = secondary_node
9566         check_nodes = [self.new_node, self.other_node]
9567
9568         _CheckNodeNotDrained(self.lu, remote_node)
9569         _CheckNodeVmCapable(self.lu, remote_node)
9570
9571         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9572         assert old_node_info is not None
9573         if old_node_info.offline and not self.early_release:
9574           # doesn't make sense to delay the release
9575           self.early_release = True
9576           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9577                           " early-release mode", secondary_node)
9578
9579       else:
9580         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9581                                      self.mode)
9582
9583       # If not specified all disks should be replaced
9584       if not self.disks:
9585         self.disks = range(len(self.instance.disks))
9586
9587     for node in check_nodes:
9588       _CheckNodeOnline(self.lu, node)
9589
9590     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9591                                                           self.other_node,
9592                                                           self.target_node]
9593                               if node_name is not None)
9594
9595     # Release unneeded node locks
9596     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9597
9598     # Release any owned node group
9599     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9600       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9601
9602     # Check whether disks are valid
9603     for disk_idx in self.disks:
9604       instance.FindDisk(disk_idx)
9605
9606     # Get secondary node IP addresses
9607     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9608                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9609
9610   def Exec(self, feedback_fn):
9611     """Execute disk replacement.
9612
9613     This dispatches the disk replacement to the appropriate handler.
9614
9615     """
9616     if self.delay_iallocator:
9617       self._CheckPrereq2()
9618
9619     if __debug__:
9620       # Verify owned locks before starting operation
9621       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9622       assert set(owned_nodes) == set(self.node_secondary_ip), \
9623           ("Incorrect node locks, owning %s, expected %s" %
9624            (owned_nodes, self.node_secondary_ip.keys()))
9625
9626       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9627       assert list(owned_instances) == [self.instance_name], \
9628           "Instance '%s' not locked" % self.instance_name
9629
9630       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9631           "Should not own any node group lock at this point"
9632
9633     if not self.disks:
9634       feedback_fn("No disks need replacement")
9635       return
9636
9637     feedback_fn("Replacing disk(s) %s for %s" %
9638                 (utils.CommaJoin(self.disks), self.instance.name))
9639
9640     activate_disks = (not self.instance.admin_up)
9641
9642     # Activate the instance disks if we're replacing them on a down instance
9643     if activate_disks:
9644       _StartInstanceDisks(self.lu, self.instance, True)
9645
9646     try:
9647       # Should we replace the secondary node?
9648       if self.new_node is not None:
9649         fn = self._ExecDrbd8Secondary
9650       else:
9651         fn = self._ExecDrbd8DiskOnly
9652
9653       result = fn(feedback_fn)
9654     finally:
9655       # Deactivate the instance disks if we're replacing them on a
9656       # down instance
9657       if activate_disks:
9658         _SafeShutdownInstanceDisks(self.lu, self.instance)
9659
9660     if __debug__:
9661       # Verify owned locks
9662       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9663       nodes = frozenset(self.node_secondary_ip)
9664       assert ((self.early_release and not owned_nodes) or
9665               (not self.early_release and not (set(owned_nodes) - nodes))), \
9666         ("Not owning the correct locks, early_release=%s, owned=%r,"
9667          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9668
9669     return result
9670
9671   def _CheckVolumeGroup(self, nodes):
9672     self.lu.LogInfo("Checking volume groups")
9673
9674     vgname = self.cfg.GetVGName()
9675
9676     # Make sure volume group exists on all involved nodes
9677     results = self.rpc.call_vg_list(nodes)
9678     if not results:
9679       raise errors.OpExecError("Can't list volume groups on the nodes")
9680
9681     for node in nodes:
9682       res = results[node]
9683       res.Raise("Error checking node %s" % node)
9684       if vgname not in res.payload:
9685         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9686                                  (vgname, node))
9687
9688   def _CheckDisksExistence(self, nodes):
9689     # Check disk existence
9690     for idx, dev in enumerate(self.instance.disks):
9691       if idx not in self.disks:
9692         continue
9693
9694       for node in nodes:
9695         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9696         self.cfg.SetDiskID(dev, node)
9697
9698         result = self.rpc.call_blockdev_find(node, dev)
9699
9700         msg = result.fail_msg
9701         if msg or not result.payload:
9702           if not msg:
9703             msg = "disk not found"
9704           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9705                                    (idx, node, msg))
9706
9707   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9708     for idx, dev in enumerate(self.instance.disks):
9709       if idx not in self.disks:
9710         continue
9711
9712       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9713                       (idx, node_name))
9714
9715       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9716                                    ldisk=ldisk):
9717         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9718                                  " replace disks for instance %s" %
9719                                  (node_name, self.instance.name))
9720
9721   def _CreateNewStorage(self, node_name):
9722     """Create new storage on the primary or secondary node.
9723
9724     This is only used for same-node replaces, not for changing the
9725     secondary node, hence we don't want to modify the existing disk.
9726
9727     """
9728     iv_names = {}
9729
9730     for idx, dev in enumerate(self.instance.disks):
9731       if idx not in self.disks:
9732         continue
9733
9734       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9735
9736       self.cfg.SetDiskID(dev, node_name)
9737
9738       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9739       names = _GenerateUniqueNames(self.lu, lv_names)
9740
9741       vg_data = dev.children[0].logical_id[0]
9742       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9743                              logical_id=(vg_data, names[0]))
9744       vg_meta = dev.children[1].logical_id[0]
9745       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9746                              logical_id=(vg_meta, names[1]))
9747
9748       new_lvs = [lv_data, lv_meta]
9749       old_lvs = [child.Copy() for child in dev.children]
9750       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9751
9752       # we pass force_create=True to force the LVM creation
9753       for new_lv in new_lvs:
9754         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9755                         _GetInstanceInfoText(self.instance), False)
9756
9757     return iv_names
9758
9759   def _CheckDevices(self, node_name, iv_names):
9760     for name, (dev, _, _) in iv_names.iteritems():
9761       self.cfg.SetDiskID(dev, node_name)
9762
9763       result = self.rpc.call_blockdev_find(node_name, dev)
9764
9765       msg = result.fail_msg
9766       if msg or not result.payload:
9767         if not msg:
9768           msg = "disk not found"
9769         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9770                                  (name, msg))
9771
9772       if result.payload.is_degraded:
9773         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9774
9775   def _RemoveOldStorage(self, node_name, iv_names):
9776     for name, (_, old_lvs, _) in iv_names.iteritems():
9777       self.lu.LogInfo("Remove logical volumes for %s" % name)
9778
9779       for lv in old_lvs:
9780         self.cfg.SetDiskID(lv, node_name)
9781
9782         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9783         if msg:
9784           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9785                              hint="remove unused LVs manually")
9786
9787   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9788     """Replace a disk on the primary or secondary for DRBD 8.
9789
9790     The algorithm for replace is quite complicated:
9791
9792       1. for each disk to be replaced:
9793
9794         1. create new LVs on the target node with unique names
9795         1. detach old LVs from the drbd device
9796         1. rename old LVs to name_replaced.<time_t>
9797         1. rename new LVs to old LVs
9798         1. attach the new LVs (with the old names now) to the drbd device
9799
9800       1. wait for sync across all devices
9801
9802       1. for each modified disk:
9803
9804         1. remove old LVs (which have the name name_replaces.<time_t>)
9805
9806     Failures are not very well handled.
9807
9808     """
9809     steps_total = 6
9810
9811     # Step: check device activation
9812     self.lu.LogStep(1, steps_total, "Check device existence")
9813     self._CheckDisksExistence([self.other_node, self.target_node])
9814     self._CheckVolumeGroup([self.target_node, self.other_node])
9815
9816     # Step: check other node consistency
9817     self.lu.LogStep(2, steps_total, "Check peer consistency")
9818     self._CheckDisksConsistency(self.other_node,
9819                                 self.other_node == self.instance.primary_node,
9820                                 False)
9821
9822     # Step: create new storage
9823     self.lu.LogStep(3, steps_total, "Allocate new storage")
9824     iv_names = self._CreateNewStorage(self.target_node)
9825
9826     # Step: for each lv, detach+rename*2+attach
9827     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9828     for dev, old_lvs, new_lvs in iv_names.itervalues():
9829       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9830
9831       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9832                                                      old_lvs)
9833       result.Raise("Can't detach drbd from local storage on node"
9834                    " %s for device %s" % (self.target_node, dev.iv_name))
9835       #dev.children = []
9836       #cfg.Update(instance)
9837
9838       # ok, we created the new LVs, so now we know we have the needed
9839       # storage; as such, we proceed on the target node to rename
9840       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9841       # using the assumption that logical_id == physical_id (which in
9842       # turn is the unique_id on that node)
9843
9844       # FIXME(iustin): use a better name for the replaced LVs
9845       temp_suffix = int(time.time())
9846       ren_fn = lambda d, suff: (d.physical_id[0],
9847                                 d.physical_id[1] + "_replaced-%s" % suff)
9848
9849       # Build the rename list based on what LVs exist on the node
9850       rename_old_to_new = []
9851       for to_ren in old_lvs:
9852         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9853         if not result.fail_msg and result.payload:
9854           # device exists
9855           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9856
9857       self.lu.LogInfo("Renaming the old LVs on the target node")
9858       result = self.rpc.call_blockdev_rename(self.target_node,
9859                                              rename_old_to_new)
9860       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9861
9862       # Now we rename the new LVs to the old LVs
9863       self.lu.LogInfo("Renaming the new LVs on the target node")
9864       rename_new_to_old = [(new, old.physical_id)
9865                            for old, new in zip(old_lvs, new_lvs)]
9866       result = self.rpc.call_blockdev_rename(self.target_node,
9867                                              rename_new_to_old)
9868       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9869
9870       # Intermediate steps of in memory modifications
9871       for old, new in zip(old_lvs, new_lvs):
9872         new.logical_id = old.logical_id
9873         self.cfg.SetDiskID(new, self.target_node)
9874
9875       # We need to modify old_lvs so that removal later removes the
9876       # right LVs, not the newly added ones; note that old_lvs is a
9877       # copy here
9878       for disk in old_lvs:
9879         disk.logical_id = ren_fn(disk, temp_suffix)
9880         self.cfg.SetDiskID(disk, self.target_node)
9881
9882       # Now that the new lvs have the old name, we can add them to the device
9883       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9884       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9885                                                   new_lvs)
9886       msg = result.fail_msg
9887       if msg:
9888         for new_lv in new_lvs:
9889           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9890                                                new_lv).fail_msg
9891           if msg2:
9892             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9893                                hint=("cleanup manually the unused logical"
9894                                      "volumes"))
9895         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9896
9897     cstep = 5
9898     if self.early_release:
9899       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9900       cstep += 1
9901       self._RemoveOldStorage(self.target_node, iv_names)
9902       # WARNING: we release both node locks here, do not do other RPCs
9903       # than WaitForSync to the primary node
9904       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9905                     names=[self.target_node, self.other_node])
9906
9907     # Wait for sync
9908     # This can fail as the old devices are degraded and _WaitForSync
9909     # does a combined result over all disks, so we don't check its return value
9910     self.lu.LogStep(cstep, steps_total, "Sync devices")
9911     cstep += 1
9912     _WaitForSync(self.lu, self.instance)
9913
9914     # Check all devices manually
9915     self._CheckDevices(self.instance.primary_node, iv_names)
9916
9917     # Step: remove old storage
9918     if not self.early_release:
9919       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9920       cstep += 1
9921       self._RemoveOldStorage(self.target_node, iv_names)
9922
9923   def _ExecDrbd8Secondary(self, feedback_fn):
9924     """Replace the secondary node for DRBD 8.
9925
9926     The algorithm for replace is quite complicated:
9927       - for all disks of the instance:
9928         - create new LVs on the new node with same names
9929         - shutdown the drbd device on the old secondary
9930         - disconnect the drbd network on the primary
9931         - create the drbd device on the new secondary
9932         - network attach the drbd on the primary, using an artifice:
9933           the drbd code for Attach() will connect to the network if it
9934           finds a device which is connected to the good local disks but
9935           not network enabled
9936       - wait for sync across all devices
9937       - remove all disks from the old secondary
9938
9939     Failures are not very well handled.
9940
9941     """
9942     steps_total = 6
9943
9944     pnode = self.instance.primary_node
9945
9946     # Step: check device activation
9947     self.lu.LogStep(1, steps_total, "Check device existence")
9948     self._CheckDisksExistence([self.instance.primary_node])
9949     self._CheckVolumeGroup([self.instance.primary_node])
9950
9951     # Step: check other node consistency
9952     self.lu.LogStep(2, steps_total, "Check peer consistency")
9953     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9954
9955     # Step: create new storage
9956     self.lu.LogStep(3, steps_total, "Allocate new storage")
9957     for idx, dev in enumerate(self.instance.disks):
9958       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9959                       (self.new_node, idx))
9960       # we pass force_create=True to force LVM creation
9961       for new_lv in dev.children:
9962         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9963                         _GetInstanceInfoText(self.instance), False)
9964
9965     # Step 4: dbrd minors and drbd setups changes
9966     # after this, we must manually remove the drbd minors on both the
9967     # error and the success paths
9968     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9969     minors = self.cfg.AllocateDRBDMinor([self.new_node
9970                                          for dev in self.instance.disks],
9971                                         self.instance.name)
9972     logging.debug("Allocated minors %r", minors)
9973
9974     iv_names = {}
9975     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9976       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9977                       (self.new_node, idx))
9978       # create new devices on new_node; note that we create two IDs:
9979       # one without port, so the drbd will be activated without
9980       # networking information on the new node at this stage, and one
9981       # with network, for the latter activation in step 4
9982       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9983       if self.instance.primary_node == o_node1:
9984         p_minor = o_minor1
9985       else:
9986         assert self.instance.primary_node == o_node2, "Three-node instance?"
9987         p_minor = o_minor2
9988
9989       new_alone_id = (self.instance.primary_node, self.new_node, None,
9990                       p_minor, new_minor, o_secret)
9991       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9992                     p_minor, new_minor, o_secret)
9993
9994       iv_names[idx] = (dev, dev.children, new_net_id)
9995       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9996                     new_net_id)
9997       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9998                               logical_id=new_alone_id,
9999                               children=dev.children,
10000                               size=dev.size)
10001       try:
10002         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10003                               _GetInstanceInfoText(self.instance), False)
10004       except errors.GenericError:
10005         self.cfg.ReleaseDRBDMinors(self.instance.name)
10006         raise
10007
10008     # We have new devices, shutdown the drbd on the old secondary
10009     for idx, dev in enumerate(self.instance.disks):
10010       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10011       self.cfg.SetDiskID(dev, self.target_node)
10012       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10013       if msg:
10014         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10015                            "node: %s" % (idx, msg),
10016                            hint=("Please cleanup this device manually as"
10017                                  " soon as possible"))
10018
10019     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10020     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10021                                                self.instance.disks)[pnode]
10022
10023     msg = result.fail_msg
10024     if msg:
10025       # detaches didn't succeed (unlikely)
10026       self.cfg.ReleaseDRBDMinors(self.instance.name)
10027       raise errors.OpExecError("Can't detach the disks from the network on"
10028                                " old node: %s" % (msg,))
10029
10030     # if we managed to detach at least one, we update all the disks of
10031     # the instance to point to the new secondary
10032     self.lu.LogInfo("Updating instance configuration")
10033     for dev, _, new_logical_id in iv_names.itervalues():
10034       dev.logical_id = new_logical_id
10035       self.cfg.SetDiskID(dev, self.instance.primary_node)
10036
10037     self.cfg.Update(self.instance, feedback_fn)
10038
10039     # and now perform the drbd attach
10040     self.lu.LogInfo("Attaching primary drbds to new secondary"
10041                     " (standalone => connected)")
10042     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10043                                             self.new_node],
10044                                            self.node_secondary_ip,
10045                                            self.instance.disks,
10046                                            self.instance.name,
10047                                            False)
10048     for to_node, to_result in result.items():
10049       msg = to_result.fail_msg
10050       if msg:
10051         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10052                            to_node, msg,
10053                            hint=("please do a gnt-instance info to see the"
10054                                  " status of disks"))
10055     cstep = 5
10056     if self.early_release:
10057       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10058       cstep += 1
10059       self._RemoveOldStorage(self.target_node, iv_names)
10060       # WARNING: we release all node locks here, do not do other RPCs
10061       # than WaitForSync to the primary node
10062       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10063                     names=[self.instance.primary_node,
10064                            self.target_node,
10065                            self.new_node])
10066
10067     # Wait for sync
10068     # This can fail as the old devices are degraded and _WaitForSync
10069     # does a combined result over all disks, so we don't check its return value
10070     self.lu.LogStep(cstep, steps_total, "Sync devices")
10071     cstep += 1
10072     _WaitForSync(self.lu, self.instance)
10073
10074     # Check all devices manually
10075     self._CheckDevices(self.instance.primary_node, iv_names)
10076
10077     # Step: remove old storage
10078     if not self.early_release:
10079       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10080       self._RemoveOldStorage(self.target_node, iv_names)
10081
10082
10083 class LURepairNodeStorage(NoHooksLU):
10084   """Repairs the volume group on a node.
10085
10086   """
10087   REQ_BGL = False
10088
10089   def CheckArguments(self):
10090     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10091
10092     storage_type = self.op.storage_type
10093
10094     if (constants.SO_FIX_CONSISTENCY not in
10095         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10096       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10097                                  " repaired" % storage_type,
10098                                  errors.ECODE_INVAL)
10099
10100   def ExpandNames(self):
10101     self.needed_locks = {
10102       locking.LEVEL_NODE: [self.op.node_name],
10103       }
10104
10105   def _CheckFaultyDisks(self, instance, node_name):
10106     """Ensure faulty disks abort the opcode or at least warn."""
10107     try:
10108       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10109                                   node_name, True):
10110         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10111                                    " node '%s'" % (instance.name, node_name),
10112                                    errors.ECODE_STATE)
10113     except errors.OpPrereqError, err:
10114       if self.op.ignore_consistency:
10115         self.proc.LogWarning(str(err.args[0]))
10116       else:
10117         raise
10118
10119   def CheckPrereq(self):
10120     """Check prerequisites.
10121
10122     """
10123     # Check whether any instance on this node has faulty disks
10124     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10125       if not inst.admin_up:
10126         continue
10127       check_nodes = set(inst.all_nodes)
10128       check_nodes.discard(self.op.node_name)
10129       for inst_node_name in check_nodes:
10130         self._CheckFaultyDisks(inst, inst_node_name)
10131
10132   def Exec(self, feedback_fn):
10133     feedback_fn("Repairing storage unit '%s' on %s ..." %
10134                 (self.op.name, self.op.node_name))
10135
10136     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10137     result = self.rpc.call_storage_execute(self.op.node_name,
10138                                            self.op.storage_type, st_args,
10139                                            self.op.name,
10140                                            constants.SO_FIX_CONSISTENCY)
10141     result.Raise("Failed to repair storage unit '%s' on %s" %
10142                  (self.op.name, self.op.node_name))
10143
10144
10145 class LUNodeEvacuate(NoHooksLU):
10146   """Evacuates instances off a list of nodes.
10147
10148   """
10149   REQ_BGL = False
10150
10151   def CheckArguments(self):
10152     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10153
10154   def ExpandNames(self):
10155     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10156
10157     if self.op.remote_node is not None:
10158       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10159       assert self.op.remote_node
10160
10161       if self.op.remote_node == self.op.node_name:
10162         raise errors.OpPrereqError("Can not use evacuated node as a new"
10163                                    " secondary node", errors.ECODE_INVAL)
10164
10165       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10166         raise errors.OpPrereqError("Without the use of an iallocator only"
10167                                    " secondary instances can be evacuated",
10168                                    errors.ECODE_INVAL)
10169
10170     # Declare locks
10171     self.share_locks = _ShareAll()
10172     self.needed_locks = {
10173       locking.LEVEL_INSTANCE: [],
10174       locking.LEVEL_NODEGROUP: [],
10175       locking.LEVEL_NODE: [],
10176       }
10177
10178     if self.op.remote_node is None:
10179       # Iallocator will choose any node(s) in the same group
10180       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10181     else:
10182       group_nodes = frozenset([self.op.remote_node])
10183
10184     # Determine nodes to be locked
10185     self.lock_nodes = set([self.op.node_name]) | group_nodes
10186
10187   def _DetermineInstances(self):
10188     """Builds list of instances to operate on.
10189
10190     """
10191     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10192
10193     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10194       # Primary instances only
10195       inst_fn = _GetNodePrimaryInstances
10196       assert self.op.remote_node is None, \
10197         "Evacuating primary instances requires iallocator"
10198     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10199       # Secondary instances only
10200       inst_fn = _GetNodeSecondaryInstances
10201     else:
10202       # All instances
10203       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10204       inst_fn = _GetNodeInstances
10205
10206     return inst_fn(self.cfg, self.op.node_name)
10207
10208   def DeclareLocks(self, level):
10209     if level == locking.LEVEL_INSTANCE:
10210       # Lock instances optimistically, needs verification once node and group
10211       # locks have been acquired
10212       self.needed_locks[locking.LEVEL_INSTANCE] = \
10213         set(i.name for i in self._DetermineInstances())
10214
10215     elif level == locking.LEVEL_NODEGROUP:
10216       # Lock node groups optimistically, needs verification once nodes have
10217       # been acquired
10218       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10219         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10220
10221     elif level == locking.LEVEL_NODE:
10222       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10223
10224   def CheckPrereq(self):
10225     # Verify locks
10226     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10227     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10228     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10229
10230     assert owned_nodes == self.lock_nodes
10231
10232     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10233     if owned_groups != wanted_groups:
10234       raise errors.OpExecError("Node groups changed since locks were acquired,"
10235                                " current groups are '%s', used to be '%s'" %
10236                                (utils.CommaJoin(wanted_groups),
10237                                 utils.CommaJoin(owned_groups)))
10238
10239     # Determine affected instances
10240     self.instances = self._DetermineInstances()
10241     self.instance_names = [i.name for i in self.instances]
10242
10243     if set(self.instance_names) != owned_instances:
10244       raise errors.OpExecError("Instances on node '%s' changed since locks"
10245                                " were acquired, current instances are '%s',"
10246                                " used to be '%s'" %
10247                                (self.op.node_name,
10248                                 utils.CommaJoin(self.instance_names),
10249                                 utils.CommaJoin(owned_instances)))
10250
10251     if self.instance_names:
10252       self.LogInfo("Evacuating instances from node '%s': %s",
10253                    self.op.node_name,
10254                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10255     else:
10256       self.LogInfo("No instances to evacuate from node '%s'",
10257                    self.op.node_name)
10258
10259     if self.op.remote_node is not None:
10260       for i in self.instances:
10261         if i.primary_node == self.op.remote_node:
10262           raise errors.OpPrereqError("Node %s is the primary node of"
10263                                      " instance %s, cannot use it as"
10264                                      " secondary" %
10265                                      (self.op.remote_node, i.name),
10266                                      errors.ECODE_INVAL)
10267
10268   def Exec(self, feedback_fn):
10269     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10270
10271     if not self.instance_names:
10272       # No instances to evacuate
10273       jobs = []
10274
10275     elif self.op.iallocator is not None:
10276       # TODO: Implement relocation to other group
10277       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10278                        evac_mode=self.op.mode,
10279                        instances=list(self.instance_names))
10280
10281       ial.Run(self.op.iallocator)
10282
10283       if not ial.success:
10284         raise errors.OpPrereqError("Can't compute node evacuation using"
10285                                    " iallocator '%s': %s" %
10286                                    (self.op.iallocator, ial.info),
10287                                    errors.ECODE_NORES)
10288
10289       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10290
10291     elif self.op.remote_node is not None:
10292       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10293       jobs = [
10294         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10295                                         remote_node=self.op.remote_node,
10296                                         disks=[],
10297                                         mode=constants.REPLACE_DISK_CHG,
10298                                         early_release=self.op.early_release)]
10299         for instance_name in self.instance_names
10300         ]
10301
10302     else:
10303       raise errors.ProgrammerError("No iallocator or remote node")
10304
10305     return ResultWithJobs(jobs)
10306
10307
10308 def _SetOpEarlyRelease(early_release, op):
10309   """Sets C{early_release} flag on opcodes if available.
10310
10311   """
10312   try:
10313     op.early_release = early_release
10314   except AttributeError:
10315     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10316
10317   return op
10318
10319
10320 def _NodeEvacDest(use_nodes, group, nodes):
10321   """Returns group or nodes depending on caller's choice.
10322
10323   """
10324   if use_nodes:
10325     return utils.CommaJoin(nodes)
10326   else:
10327     return group
10328
10329
10330 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10331   """Unpacks the result of change-group and node-evacuate iallocator requests.
10332
10333   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10334   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10335
10336   @type lu: L{LogicalUnit}
10337   @param lu: Logical unit instance
10338   @type alloc_result: tuple/list
10339   @param alloc_result: Result from iallocator
10340   @type early_release: bool
10341   @param early_release: Whether to release locks early if possible
10342   @type use_nodes: bool
10343   @param use_nodes: Whether to display node names instead of groups
10344
10345   """
10346   (moved, failed, jobs) = alloc_result
10347
10348   if failed:
10349     lu.LogWarning("Unable to evacuate instances %s",
10350                   utils.CommaJoin("%s (%s)" % (name, reason)
10351                                   for (name, reason) in failed))
10352
10353   if moved:
10354     lu.LogInfo("Instances to be moved: %s",
10355                utils.CommaJoin("%s (to %s)" %
10356                                (name, _NodeEvacDest(use_nodes, group, nodes))
10357                                for (name, group, nodes) in moved))
10358
10359   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10360               map(opcodes.OpCode.LoadOpCode, ops))
10361           for ops in jobs]
10362
10363
10364 class LUInstanceGrowDisk(LogicalUnit):
10365   """Grow a disk of an instance.
10366
10367   """
10368   HPATH = "disk-grow"
10369   HTYPE = constants.HTYPE_INSTANCE
10370   REQ_BGL = False
10371
10372   def ExpandNames(self):
10373     self._ExpandAndLockInstance()
10374     self.needed_locks[locking.LEVEL_NODE] = []
10375     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10376
10377   def DeclareLocks(self, level):
10378     if level == locking.LEVEL_NODE:
10379       self._LockInstancesNodes()
10380
10381   def BuildHooksEnv(self):
10382     """Build hooks env.
10383
10384     This runs on the master, the primary and all the secondaries.
10385
10386     """
10387     env = {
10388       "DISK": self.op.disk,
10389       "AMOUNT": self.op.amount,
10390       }
10391     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10392     return env
10393
10394   def BuildHooksNodes(self):
10395     """Build hooks nodes.
10396
10397     """
10398     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10399     return (nl, nl)
10400
10401   def CheckPrereq(self):
10402     """Check prerequisites.
10403
10404     This checks that the instance is in the cluster.
10405
10406     """
10407     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10408     assert instance is not None, \
10409       "Cannot retrieve locked instance %s" % self.op.instance_name
10410     nodenames = list(instance.all_nodes)
10411     for node in nodenames:
10412       _CheckNodeOnline(self, node)
10413
10414     self.instance = instance
10415
10416     if instance.disk_template not in constants.DTS_GROWABLE:
10417       raise errors.OpPrereqError("Instance's disk layout does not support"
10418                                  " growing", errors.ECODE_INVAL)
10419
10420     self.disk = instance.FindDisk(self.op.disk)
10421
10422     if instance.disk_template not in (constants.DT_FILE,
10423                                       constants.DT_SHARED_FILE):
10424       # TODO: check the free disk space for file, when that feature will be
10425       # supported
10426       _CheckNodesFreeDiskPerVG(self, nodenames,
10427                                self.disk.ComputeGrowth(self.op.amount))
10428
10429   def Exec(self, feedback_fn):
10430     """Execute disk grow.
10431
10432     """
10433     instance = self.instance
10434     disk = self.disk
10435
10436     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10437     if not disks_ok:
10438       raise errors.OpExecError("Cannot activate block device to grow")
10439
10440     # First run all grow ops in dry-run mode
10441     for node in instance.all_nodes:
10442       self.cfg.SetDiskID(disk, node)
10443       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10444       result.Raise("Grow request failed to node %s" % node)
10445
10446     # We know that (as far as we can test) operations across different
10447     # nodes will succeed, time to run it for real
10448     for node in instance.all_nodes:
10449       self.cfg.SetDiskID(disk, node)
10450       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10451       result.Raise("Grow request failed to node %s" % node)
10452
10453       # TODO: Rewrite code to work properly
10454       # DRBD goes into sync mode for a short amount of time after executing the
10455       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10456       # calling "resize" in sync mode fails. Sleeping for a short amount of
10457       # time is a work-around.
10458       time.sleep(5)
10459
10460     disk.RecordGrow(self.op.amount)
10461     self.cfg.Update(instance, feedback_fn)
10462     if self.op.wait_for_sync:
10463       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10464       if disk_abort:
10465         self.proc.LogWarning("Disk sync-ing has not returned a good"
10466                              " status; please check the instance")
10467       if not instance.admin_up:
10468         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10469     elif not instance.admin_up:
10470       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10471                            " not supposed to be running because no wait for"
10472                            " sync mode was requested")
10473
10474
10475 class LUInstanceQueryData(NoHooksLU):
10476   """Query runtime instance data.
10477
10478   """
10479   REQ_BGL = False
10480
10481   def ExpandNames(self):
10482     self.needed_locks = {}
10483
10484     # Use locking if requested or when non-static information is wanted
10485     if not (self.op.static or self.op.use_locking):
10486       self.LogWarning("Non-static data requested, locks need to be acquired")
10487       self.op.use_locking = True
10488
10489     if self.op.instances or not self.op.use_locking:
10490       # Expand instance names right here
10491       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10492     else:
10493       # Will use acquired locks
10494       self.wanted_names = None
10495
10496     if self.op.use_locking:
10497       self.share_locks = _ShareAll()
10498
10499       if self.wanted_names is None:
10500         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10501       else:
10502         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10503
10504       self.needed_locks[locking.LEVEL_NODE] = []
10505       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10506
10507   def DeclareLocks(self, level):
10508     if self.op.use_locking and level == locking.LEVEL_NODE:
10509       self._LockInstancesNodes()
10510
10511   def CheckPrereq(self):
10512     """Check prerequisites.
10513
10514     This only checks the optional instance list against the existing names.
10515
10516     """
10517     if self.wanted_names is None:
10518       assert self.op.use_locking, "Locking was not used"
10519       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10520
10521     self.wanted_instances = \
10522         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10523
10524   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10525     """Returns the status of a block device
10526
10527     """
10528     if self.op.static or not node:
10529       return None
10530
10531     self.cfg.SetDiskID(dev, node)
10532
10533     result = self.rpc.call_blockdev_find(node, dev)
10534     if result.offline:
10535       return None
10536
10537     result.Raise("Can't compute disk status for %s" % instance_name)
10538
10539     status = result.payload
10540     if status is None:
10541       return None
10542
10543     return (status.dev_path, status.major, status.minor,
10544             status.sync_percent, status.estimated_time,
10545             status.is_degraded, status.ldisk_status)
10546
10547   def _ComputeDiskStatus(self, instance, snode, dev):
10548     """Compute block device status.
10549
10550     """
10551     if dev.dev_type in constants.LDS_DRBD:
10552       # we change the snode then (otherwise we use the one passed in)
10553       if dev.logical_id[0] == instance.primary_node:
10554         snode = dev.logical_id[1]
10555       else:
10556         snode = dev.logical_id[0]
10557
10558     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10559                                               instance.name, dev)
10560     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10561
10562     if dev.children:
10563       dev_children = map(compat.partial(self._ComputeDiskStatus,
10564                                         instance, snode),
10565                          dev.children)
10566     else:
10567       dev_children = []
10568
10569     return {
10570       "iv_name": dev.iv_name,
10571       "dev_type": dev.dev_type,
10572       "logical_id": dev.logical_id,
10573       "physical_id": dev.physical_id,
10574       "pstatus": dev_pstatus,
10575       "sstatus": dev_sstatus,
10576       "children": dev_children,
10577       "mode": dev.mode,
10578       "size": dev.size,
10579       }
10580
10581   def Exec(self, feedback_fn):
10582     """Gather and return data"""
10583     result = {}
10584
10585     cluster = self.cfg.GetClusterInfo()
10586
10587     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10588                                           for i in self.wanted_instances)
10589     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10590       if self.op.static or pnode.offline:
10591         remote_state = None
10592         if pnode.offline:
10593           self.LogWarning("Primary node %s is marked offline, returning static"
10594                           " information only for instance %s" %
10595                           (pnode.name, instance.name))
10596       else:
10597         remote_info = self.rpc.call_instance_info(instance.primary_node,
10598                                                   instance.name,
10599                                                   instance.hypervisor)
10600         remote_info.Raise("Error checking node %s" % instance.primary_node)
10601         remote_info = remote_info.payload
10602         if remote_info and "state" in remote_info:
10603           remote_state = "up"
10604         else:
10605           remote_state = "down"
10606
10607       if instance.admin_up:
10608         config_state = "up"
10609       else:
10610         config_state = "down"
10611
10612       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10613                   instance.disks)
10614
10615       result[instance.name] = {
10616         "name": instance.name,
10617         "config_state": config_state,
10618         "run_state": remote_state,
10619         "pnode": instance.primary_node,
10620         "snodes": instance.secondary_nodes,
10621         "os": instance.os,
10622         # this happens to be the same format used for hooks
10623         "nics": _NICListToTuple(self, instance.nics),
10624         "disk_template": instance.disk_template,
10625         "disks": disks,
10626         "hypervisor": instance.hypervisor,
10627         "network_port": instance.network_port,
10628         "hv_instance": instance.hvparams,
10629         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10630         "be_instance": instance.beparams,
10631         "be_actual": cluster.FillBE(instance),
10632         "os_instance": instance.osparams,
10633         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10634         "serial_no": instance.serial_no,
10635         "mtime": instance.mtime,
10636         "ctime": instance.ctime,
10637         "uuid": instance.uuid,
10638         }
10639
10640     return result
10641
10642
10643 class LUInstanceSetParams(LogicalUnit):
10644   """Modifies an instances's parameters.
10645
10646   """
10647   HPATH = "instance-modify"
10648   HTYPE = constants.HTYPE_INSTANCE
10649   REQ_BGL = False
10650
10651   def CheckArguments(self):
10652     if not (self.op.nics or self.op.disks or self.op.disk_template or
10653             self.op.hvparams or self.op.beparams or self.op.os_name):
10654       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10655
10656     if self.op.hvparams:
10657       _CheckGlobalHvParams(self.op.hvparams)
10658
10659     # Disk validation
10660     disk_addremove = 0
10661     for disk_op, disk_dict in self.op.disks:
10662       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10663       if disk_op == constants.DDM_REMOVE:
10664         disk_addremove += 1
10665         continue
10666       elif disk_op == constants.DDM_ADD:
10667         disk_addremove += 1
10668       else:
10669         if not isinstance(disk_op, int):
10670           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10671         if not isinstance(disk_dict, dict):
10672           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10673           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10674
10675       if disk_op == constants.DDM_ADD:
10676         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10677         if mode not in constants.DISK_ACCESS_SET:
10678           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10679                                      errors.ECODE_INVAL)
10680         size = disk_dict.get(constants.IDISK_SIZE, None)
10681         if size is None:
10682           raise errors.OpPrereqError("Required disk parameter size missing",
10683                                      errors.ECODE_INVAL)
10684         try:
10685           size = int(size)
10686         except (TypeError, ValueError), err:
10687           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10688                                      str(err), errors.ECODE_INVAL)
10689         disk_dict[constants.IDISK_SIZE] = size
10690       else:
10691         # modification of disk
10692         if constants.IDISK_SIZE in disk_dict:
10693           raise errors.OpPrereqError("Disk size change not possible, use"
10694                                      " grow-disk", errors.ECODE_INVAL)
10695
10696     if disk_addremove > 1:
10697       raise errors.OpPrereqError("Only one disk add or remove operation"
10698                                  " supported at a time", errors.ECODE_INVAL)
10699
10700     if self.op.disks and self.op.disk_template is not None:
10701       raise errors.OpPrereqError("Disk template conversion and other disk"
10702                                  " changes not supported at the same time",
10703                                  errors.ECODE_INVAL)
10704
10705     if (self.op.disk_template and
10706         self.op.disk_template in constants.DTS_INT_MIRROR and
10707         self.op.remote_node is None):
10708       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10709                                  " one requires specifying a secondary node",
10710                                  errors.ECODE_INVAL)
10711
10712     # NIC validation
10713     nic_addremove = 0
10714     for nic_op, nic_dict in self.op.nics:
10715       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10716       if nic_op == constants.DDM_REMOVE:
10717         nic_addremove += 1
10718         continue
10719       elif nic_op == constants.DDM_ADD:
10720         nic_addremove += 1
10721       else:
10722         if not isinstance(nic_op, int):
10723           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10724         if not isinstance(nic_dict, dict):
10725           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10726           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10727
10728       # nic_dict should be a dict
10729       nic_ip = nic_dict.get(constants.INIC_IP, None)
10730       if nic_ip is not None:
10731         if nic_ip.lower() == constants.VALUE_NONE:
10732           nic_dict[constants.INIC_IP] = None
10733         else:
10734           if not netutils.IPAddress.IsValid(nic_ip):
10735             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10736                                        errors.ECODE_INVAL)
10737
10738       nic_bridge = nic_dict.get("bridge", None)
10739       nic_link = nic_dict.get(constants.INIC_LINK, None)
10740       if nic_bridge and nic_link:
10741         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10742                                    " at the same time", errors.ECODE_INVAL)
10743       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10744         nic_dict["bridge"] = None
10745       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10746         nic_dict[constants.INIC_LINK] = None
10747
10748       if nic_op == constants.DDM_ADD:
10749         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10750         if nic_mac is None:
10751           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10752
10753       if constants.INIC_MAC in nic_dict:
10754         nic_mac = nic_dict[constants.INIC_MAC]
10755         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10756           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10757
10758         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10759           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10760                                      " modifying an existing nic",
10761                                      errors.ECODE_INVAL)
10762
10763     if nic_addremove > 1:
10764       raise errors.OpPrereqError("Only one NIC add or remove operation"
10765                                  " supported at a time", errors.ECODE_INVAL)
10766
10767   def ExpandNames(self):
10768     self._ExpandAndLockInstance()
10769     self.needed_locks[locking.LEVEL_NODE] = []
10770     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10771
10772   def DeclareLocks(self, level):
10773     if level == locking.LEVEL_NODE:
10774       self._LockInstancesNodes()
10775       if self.op.disk_template and self.op.remote_node:
10776         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10777         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10778
10779   def BuildHooksEnv(self):
10780     """Build hooks env.
10781
10782     This runs on the master, primary and secondaries.
10783
10784     """
10785     args = dict()
10786     if constants.BE_MEMORY in self.be_new:
10787       args["memory"] = self.be_new[constants.BE_MEMORY]
10788     if constants.BE_VCPUS in self.be_new:
10789       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10790     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10791     # information at all.
10792     if self.op.nics:
10793       args["nics"] = []
10794       nic_override = dict(self.op.nics)
10795       for idx, nic in enumerate(self.instance.nics):
10796         if idx in nic_override:
10797           this_nic_override = nic_override[idx]
10798         else:
10799           this_nic_override = {}
10800         if constants.INIC_IP in this_nic_override:
10801           ip = this_nic_override[constants.INIC_IP]
10802         else:
10803           ip = nic.ip
10804         if constants.INIC_MAC in this_nic_override:
10805           mac = this_nic_override[constants.INIC_MAC]
10806         else:
10807           mac = nic.mac
10808         if idx in self.nic_pnew:
10809           nicparams = self.nic_pnew[idx]
10810         else:
10811           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10812         mode = nicparams[constants.NIC_MODE]
10813         link = nicparams[constants.NIC_LINK]
10814         args["nics"].append((ip, mac, mode, link))
10815       if constants.DDM_ADD in nic_override:
10816         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10817         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10818         nicparams = self.nic_pnew[constants.DDM_ADD]
10819         mode = nicparams[constants.NIC_MODE]
10820         link = nicparams[constants.NIC_LINK]
10821         args["nics"].append((ip, mac, mode, link))
10822       elif constants.DDM_REMOVE in nic_override:
10823         del args["nics"][-1]
10824
10825     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10826     if self.op.disk_template:
10827       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10828
10829     return env
10830
10831   def BuildHooksNodes(self):
10832     """Build hooks nodes.
10833
10834     """
10835     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10836     return (nl, nl)
10837
10838   def CheckPrereq(self):
10839     """Check prerequisites.
10840
10841     This only checks the instance list against the existing names.
10842
10843     """
10844     # checking the new params on the primary/secondary nodes
10845
10846     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10847     cluster = self.cluster = self.cfg.GetClusterInfo()
10848     assert self.instance is not None, \
10849       "Cannot retrieve locked instance %s" % self.op.instance_name
10850     pnode = instance.primary_node
10851     nodelist = list(instance.all_nodes)
10852
10853     # OS change
10854     if self.op.os_name and not self.op.force:
10855       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10856                       self.op.force_variant)
10857       instance_os = self.op.os_name
10858     else:
10859       instance_os = instance.os
10860
10861     if self.op.disk_template:
10862       if instance.disk_template == self.op.disk_template:
10863         raise errors.OpPrereqError("Instance already has disk template %s" %
10864                                    instance.disk_template, errors.ECODE_INVAL)
10865
10866       if (instance.disk_template,
10867           self.op.disk_template) not in self._DISK_CONVERSIONS:
10868         raise errors.OpPrereqError("Unsupported disk template conversion from"
10869                                    " %s to %s" % (instance.disk_template,
10870                                                   self.op.disk_template),
10871                                    errors.ECODE_INVAL)
10872       _CheckInstanceDown(self, instance, "cannot change disk template")
10873       if self.op.disk_template in constants.DTS_INT_MIRROR:
10874         if self.op.remote_node == pnode:
10875           raise errors.OpPrereqError("Given new secondary node %s is the same"
10876                                      " as the primary node of the instance" %
10877                                      self.op.remote_node, errors.ECODE_STATE)
10878         _CheckNodeOnline(self, self.op.remote_node)
10879         _CheckNodeNotDrained(self, self.op.remote_node)
10880         # FIXME: here we assume that the old instance type is DT_PLAIN
10881         assert instance.disk_template == constants.DT_PLAIN
10882         disks = [{constants.IDISK_SIZE: d.size,
10883                   constants.IDISK_VG: d.logical_id[0]}
10884                  for d in instance.disks]
10885         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10886         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10887
10888     # hvparams processing
10889     if self.op.hvparams:
10890       hv_type = instance.hypervisor
10891       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10892       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10893       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10894
10895       # local check
10896       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10897       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10898       self.hv_proposed = self.hv_new = hv_new # the new actual values
10899       self.hv_inst = i_hvdict # the new dict (without defaults)
10900     else:
10901       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
10902                                               instance.hvparams)
10903       self.hv_new = self.hv_inst = {}
10904
10905     # beparams processing
10906     if self.op.beparams:
10907       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10908                                    use_none=True)
10909       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10910       be_new = cluster.SimpleFillBE(i_bedict)
10911       self.be_proposed = self.be_new = be_new # the new actual values
10912       self.be_inst = i_bedict # the new dict (without defaults)
10913     else:
10914       self.be_new = self.be_inst = {}
10915       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
10916     be_old = cluster.FillBE(instance)
10917
10918     # CPU param validation -- checking every time a paramtere is
10919     # changed to cover all cases where either CPU mask or vcpus have
10920     # changed
10921     if (constants.BE_VCPUS in self.be_proposed and
10922         constants.HV_CPU_MASK in self.hv_proposed):
10923       cpu_list = \
10924         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
10925       # Verify mask is consistent with number of vCPUs. Can skip this
10926       # test if only 1 entry in the CPU mask, which means same mask
10927       # is applied to all vCPUs.
10928       if (len(cpu_list) > 1 and
10929           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
10930         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
10931                                    " CPU mask [%s]" %
10932                                    (self.be_proposed[constants.BE_VCPUS],
10933                                     self.hv_proposed[constants.HV_CPU_MASK]),
10934                                    errors.ECODE_INVAL)
10935
10936       # Only perform this test if a new CPU mask is given
10937       if constants.HV_CPU_MASK in self.hv_new:
10938         # Calculate the largest CPU number requested
10939         max_requested_cpu = max(map(max, cpu_list))
10940         # Check that all of the instance's nodes have enough physical CPUs to
10941         # satisfy the requested CPU mask
10942         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
10943                                 max_requested_cpu + 1, instance.hypervisor)
10944
10945     # osparams processing
10946     if self.op.osparams:
10947       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10948       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10949       self.os_inst = i_osdict # the new dict (without defaults)
10950     else:
10951       self.os_inst = {}
10952
10953     self.warn = []
10954
10955     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10956         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10957       mem_check_list = [pnode]
10958       if be_new[constants.BE_AUTO_BALANCE]:
10959         # either we changed auto_balance to yes or it was from before
10960         mem_check_list.extend(instance.secondary_nodes)
10961       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10962                                                   instance.hypervisor)
10963       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10964                                          instance.hypervisor)
10965       pninfo = nodeinfo[pnode]
10966       msg = pninfo.fail_msg
10967       if msg:
10968         # Assume the primary node is unreachable and go ahead
10969         self.warn.append("Can't get info from primary node %s: %s" %
10970                          (pnode, msg))
10971       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10972         self.warn.append("Node data from primary node %s doesn't contain"
10973                          " free memory information" % pnode)
10974       elif instance_info.fail_msg:
10975         self.warn.append("Can't get instance runtime information: %s" %
10976                         instance_info.fail_msg)
10977       else:
10978         if instance_info.payload:
10979           current_mem = int(instance_info.payload["memory"])
10980         else:
10981           # Assume instance not running
10982           # (there is a slight race condition here, but it's not very probable,
10983           # and we have no other way to check)
10984           current_mem = 0
10985         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10986                     pninfo.payload["memory_free"])
10987         if miss_mem > 0:
10988           raise errors.OpPrereqError("This change will prevent the instance"
10989                                      " from starting, due to %d MB of memory"
10990                                      " missing on its primary node" % miss_mem,
10991                                      errors.ECODE_NORES)
10992
10993       if be_new[constants.BE_AUTO_BALANCE]:
10994         for node, nres in nodeinfo.items():
10995           if node not in instance.secondary_nodes:
10996             continue
10997           nres.Raise("Can't get info from secondary node %s" % node,
10998                      prereq=True, ecode=errors.ECODE_STATE)
10999           if not isinstance(nres.payload.get("memory_free", None), int):
11000             raise errors.OpPrereqError("Secondary node %s didn't return free"
11001                                        " memory information" % node,
11002                                        errors.ECODE_STATE)
11003           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11004             raise errors.OpPrereqError("This change will prevent the instance"
11005                                        " from failover to its secondary node"
11006                                        " %s, due to not enough memory" % node,
11007                                        errors.ECODE_STATE)
11008
11009     # NIC processing
11010     self.nic_pnew = {}
11011     self.nic_pinst = {}
11012     for nic_op, nic_dict in self.op.nics:
11013       if nic_op == constants.DDM_REMOVE:
11014         if not instance.nics:
11015           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11016                                      errors.ECODE_INVAL)
11017         continue
11018       if nic_op != constants.DDM_ADD:
11019         # an existing nic
11020         if not instance.nics:
11021           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11022                                      " no NICs" % nic_op,
11023                                      errors.ECODE_INVAL)
11024         if nic_op < 0 or nic_op >= len(instance.nics):
11025           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11026                                      " are 0 to %d" %
11027                                      (nic_op, len(instance.nics) - 1),
11028                                      errors.ECODE_INVAL)
11029         old_nic_params = instance.nics[nic_op].nicparams
11030         old_nic_ip = instance.nics[nic_op].ip
11031       else:
11032         old_nic_params = {}
11033         old_nic_ip = None
11034
11035       update_params_dict = dict([(key, nic_dict[key])
11036                                  for key in constants.NICS_PARAMETERS
11037                                  if key in nic_dict])
11038
11039       if "bridge" in nic_dict:
11040         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11041
11042       new_nic_params = _GetUpdatedParams(old_nic_params,
11043                                          update_params_dict)
11044       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11045       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11046       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11047       self.nic_pinst[nic_op] = new_nic_params
11048       self.nic_pnew[nic_op] = new_filled_nic_params
11049       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11050
11051       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11052         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11053         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11054         if msg:
11055           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11056           if self.op.force:
11057             self.warn.append(msg)
11058           else:
11059             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11060       if new_nic_mode == constants.NIC_MODE_ROUTED:
11061         if constants.INIC_IP in nic_dict:
11062           nic_ip = nic_dict[constants.INIC_IP]
11063         else:
11064           nic_ip = old_nic_ip
11065         if nic_ip is None:
11066           raise errors.OpPrereqError("Cannot set the nic ip to None"
11067                                      " on a routed nic", errors.ECODE_INVAL)
11068       if constants.INIC_MAC in nic_dict:
11069         nic_mac = nic_dict[constants.INIC_MAC]
11070         if nic_mac is None:
11071           raise errors.OpPrereqError("Cannot set the nic mac to None",
11072                                      errors.ECODE_INVAL)
11073         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11074           # otherwise generate the mac
11075           nic_dict[constants.INIC_MAC] = \
11076             self.cfg.GenerateMAC(self.proc.GetECId())
11077         else:
11078           # or validate/reserve the current one
11079           try:
11080             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11081           except errors.ReservationError:
11082             raise errors.OpPrereqError("MAC address %s already in use"
11083                                        " in cluster" % nic_mac,
11084                                        errors.ECODE_NOTUNIQUE)
11085
11086     # DISK processing
11087     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11088       raise errors.OpPrereqError("Disk operations not supported for"
11089                                  " diskless instances",
11090                                  errors.ECODE_INVAL)
11091     for disk_op, _ in self.op.disks:
11092       if disk_op == constants.DDM_REMOVE:
11093         if len(instance.disks) == 1:
11094           raise errors.OpPrereqError("Cannot remove the last disk of"
11095                                      " an instance", errors.ECODE_INVAL)
11096         _CheckInstanceDown(self, instance, "cannot remove disks")
11097
11098       if (disk_op == constants.DDM_ADD and
11099           len(instance.disks) >= constants.MAX_DISKS):
11100         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11101                                    " add more" % constants.MAX_DISKS,
11102                                    errors.ECODE_STATE)
11103       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11104         # an existing disk
11105         if disk_op < 0 or disk_op >= len(instance.disks):
11106           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11107                                      " are 0 to %d" %
11108                                      (disk_op, len(instance.disks)),
11109                                      errors.ECODE_INVAL)
11110
11111     return
11112
11113   def _ConvertPlainToDrbd(self, feedback_fn):
11114     """Converts an instance from plain to drbd.
11115
11116     """
11117     feedback_fn("Converting template to drbd")
11118     instance = self.instance
11119     pnode = instance.primary_node
11120     snode = self.op.remote_node
11121
11122     # create a fake disk info for _GenerateDiskTemplate
11123     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11124                   constants.IDISK_VG: d.logical_id[0]}
11125                  for d in instance.disks]
11126     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11127                                       instance.name, pnode, [snode],
11128                                       disk_info, None, None, 0, feedback_fn)
11129     info = _GetInstanceInfoText(instance)
11130     feedback_fn("Creating aditional volumes...")
11131     # first, create the missing data and meta devices
11132     for disk in new_disks:
11133       # unfortunately this is... not too nice
11134       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11135                             info, True)
11136       for child in disk.children:
11137         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11138     # at this stage, all new LVs have been created, we can rename the
11139     # old ones
11140     feedback_fn("Renaming original volumes...")
11141     rename_list = [(o, n.children[0].logical_id)
11142                    for (o, n) in zip(instance.disks, new_disks)]
11143     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11144     result.Raise("Failed to rename original LVs")
11145
11146     feedback_fn("Initializing DRBD devices...")
11147     # all child devices are in place, we can now create the DRBD devices
11148     for disk in new_disks:
11149       for node in [pnode, snode]:
11150         f_create = node == pnode
11151         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11152
11153     # at this point, the instance has been modified
11154     instance.disk_template = constants.DT_DRBD8
11155     instance.disks = new_disks
11156     self.cfg.Update(instance, feedback_fn)
11157
11158     # disks are created, waiting for sync
11159     disk_abort = not _WaitForSync(self, instance,
11160                                   oneshot=not self.op.wait_for_sync)
11161     if disk_abort:
11162       raise errors.OpExecError("There are some degraded disks for"
11163                                " this instance, please cleanup manually")
11164
11165   def _ConvertDrbdToPlain(self, feedback_fn):
11166     """Converts an instance from drbd to plain.
11167
11168     """
11169     instance = self.instance
11170     assert len(instance.secondary_nodes) == 1
11171     pnode = instance.primary_node
11172     snode = instance.secondary_nodes[0]
11173     feedback_fn("Converting template to plain")
11174
11175     old_disks = instance.disks
11176     new_disks = [d.children[0] for d in old_disks]
11177
11178     # copy over size and mode
11179     for parent, child in zip(old_disks, new_disks):
11180       child.size = parent.size
11181       child.mode = parent.mode
11182
11183     # update instance structure
11184     instance.disks = new_disks
11185     instance.disk_template = constants.DT_PLAIN
11186     self.cfg.Update(instance, feedback_fn)
11187
11188     feedback_fn("Removing volumes on the secondary node...")
11189     for disk in old_disks:
11190       self.cfg.SetDiskID(disk, snode)
11191       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11192       if msg:
11193         self.LogWarning("Could not remove block device %s on node %s,"
11194                         " continuing anyway: %s", disk.iv_name, snode, msg)
11195
11196     feedback_fn("Removing unneeded volumes on the primary node...")
11197     for idx, disk in enumerate(old_disks):
11198       meta = disk.children[1]
11199       self.cfg.SetDiskID(meta, pnode)
11200       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11201       if msg:
11202         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11203                         " continuing anyway: %s", idx, pnode, msg)
11204
11205   def Exec(self, feedback_fn):
11206     """Modifies an instance.
11207
11208     All parameters take effect only at the next restart of the instance.
11209
11210     """
11211     # Process here the warnings from CheckPrereq, as we don't have a
11212     # feedback_fn there.
11213     for warn in self.warn:
11214       feedback_fn("WARNING: %s" % warn)
11215
11216     result = []
11217     instance = self.instance
11218     # disk changes
11219     for disk_op, disk_dict in self.op.disks:
11220       if disk_op == constants.DDM_REMOVE:
11221         # remove the last disk
11222         device = instance.disks.pop()
11223         device_idx = len(instance.disks)
11224         for node, disk in device.ComputeNodeTree(instance.primary_node):
11225           self.cfg.SetDiskID(disk, node)
11226           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11227           if msg:
11228             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11229                             " continuing anyway", device_idx, node, msg)
11230         result.append(("disk/%d" % device_idx, "remove"))
11231       elif disk_op == constants.DDM_ADD:
11232         # add a new disk
11233         if instance.disk_template in (constants.DT_FILE,
11234                                         constants.DT_SHARED_FILE):
11235           file_driver, file_path = instance.disks[0].logical_id
11236           file_path = os.path.dirname(file_path)
11237         else:
11238           file_driver = file_path = None
11239         disk_idx_base = len(instance.disks)
11240         new_disk = _GenerateDiskTemplate(self,
11241                                          instance.disk_template,
11242                                          instance.name, instance.primary_node,
11243                                          instance.secondary_nodes,
11244                                          [disk_dict],
11245                                          file_path,
11246                                          file_driver,
11247                                          disk_idx_base, feedback_fn)[0]
11248         instance.disks.append(new_disk)
11249         info = _GetInstanceInfoText(instance)
11250
11251         logging.info("Creating volume %s for instance %s",
11252                      new_disk.iv_name, instance.name)
11253         # Note: this needs to be kept in sync with _CreateDisks
11254         #HARDCODE
11255         for node in instance.all_nodes:
11256           f_create = node == instance.primary_node
11257           try:
11258             _CreateBlockDev(self, node, instance, new_disk,
11259                             f_create, info, f_create)
11260           except errors.OpExecError, err:
11261             self.LogWarning("Failed to create volume %s (%s) on"
11262                             " node %s: %s",
11263                             new_disk.iv_name, new_disk, node, err)
11264         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11265                        (new_disk.size, new_disk.mode)))
11266       else:
11267         # change a given disk
11268         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11269         result.append(("disk.mode/%d" % disk_op,
11270                        disk_dict[constants.IDISK_MODE]))
11271
11272     if self.op.disk_template:
11273       r_shut = _ShutdownInstanceDisks(self, instance)
11274       if not r_shut:
11275         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11276                                  " proceed with disk template conversion")
11277       mode = (instance.disk_template, self.op.disk_template)
11278       try:
11279         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11280       except:
11281         self.cfg.ReleaseDRBDMinors(instance.name)
11282         raise
11283       result.append(("disk_template", self.op.disk_template))
11284
11285     # NIC changes
11286     for nic_op, nic_dict in self.op.nics:
11287       if nic_op == constants.DDM_REMOVE:
11288         # remove the last nic
11289         del instance.nics[-1]
11290         result.append(("nic.%d" % len(instance.nics), "remove"))
11291       elif nic_op == constants.DDM_ADD:
11292         # mac and bridge should be set, by now
11293         mac = nic_dict[constants.INIC_MAC]
11294         ip = nic_dict.get(constants.INIC_IP, None)
11295         nicparams = self.nic_pinst[constants.DDM_ADD]
11296         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11297         instance.nics.append(new_nic)
11298         result.append(("nic.%d" % (len(instance.nics) - 1),
11299                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11300                        (new_nic.mac, new_nic.ip,
11301                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11302                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11303                        )))
11304       else:
11305         for key in (constants.INIC_MAC, constants.INIC_IP):
11306           if key in nic_dict:
11307             setattr(instance.nics[nic_op], key, nic_dict[key])
11308         if nic_op in self.nic_pinst:
11309           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11310         for key, val in nic_dict.iteritems():
11311           result.append(("nic.%s/%d" % (key, nic_op), val))
11312
11313     # hvparams changes
11314     if self.op.hvparams:
11315       instance.hvparams = self.hv_inst
11316       for key, val in self.op.hvparams.iteritems():
11317         result.append(("hv/%s" % key, val))
11318
11319     # beparams changes
11320     if self.op.beparams:
11321       instance.beparams = self.be_inst
11322       for key, val in self.op.beparams.iteritems():
11323         result.append(("be/%s" % key, val))
11324
11325     # OS change
11326     if self.op.os_name:
11327       instance.os = self.op.os_name
11328
11329     # osparams changes
11330     if self.op.osparams:
11331       instance.osparams = self.os_inst
11332       for key, val in self.op.osparams.iteritems():
11333         result.append(("os/%s" % key, val))
11334
11335     self.cfg.Update(instance, feedback_fn)
11336
11337     return result
11338
11339   _DISK_CONVERSIONS = {
11340     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11341     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11342     }
11343
11344
11345 class LUInstanceChangeGroup(LogicalUnit):
11346   HPATH = "instance-change-group"
11347   HTYPE = constants.HTYPE_INSTANCE
11348   REQ_BGL = False
11349
11350   def ExpandNames(self):
11351     self.share_locks = _ShareAll()
11352     self.needed_locks = {
11353       locking.LEVEL_NODEGROUP: [],
11354       locking.LEVEL_NODE: [],
11355       }
11356
11357     self._ExpandAndLockInstance()
11358
11359     if self.op.target_groups:
11360       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11361                                   self.op.target_groups)
11362     else:
11363       self.req_target_uuids = None
11364
11365     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11366
11367   def DeclareLocks(self, level):
11368     if level == locking.LEVEL_NODEGROUP:
11369       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11370
11371       if self.req_target_uuids:
11372         lock_groups = set(self.req_target_uuids)
11373
11374         # Lock all groups used by instance optimistically; this requires going
11375         # via the node before it's locked, requiring verification later on
11376         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11377         lock_groups.update(instance_groups)
11378       else:
11379         # No target groups, need to lock all of them
11380         lock_groups = locking.ALL_SET
11381
11382       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11383
11384     elif level == locking.LEVEL_NODE:
11385       if self.req_target_uuids:
11386         # Lock all nodes used by instances
11387         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11388         self._LockInstancesNodes()
11389
11390         # Lock all nodes in all potential target groups
11391         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11392                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11393         member_nodes = [node_name
11394                         for group in lock_groups
11395                         for node_name in self.cfg.GetNodeGroup(group).members]
11396         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11397       else:
11398         # Lock all nodes as all groups are potential targets
11399         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11400
11401   def CheckPrereq(self):
11402     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11403     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11404     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11405
11406     assert (self.req_target_uuids is None or
11407             owned_groups.issuperset(self.req_target_uuids))
11408     assert owned_instances == set([self.op.instance_name])
11409
11410     # Get instance information
11411     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11412
11413     # Check if node groups for locked instance are still correct
11414     assert owned_nodes.issuperset(self.instance.all_nodes), \
11415       ("Instance %s's nodes changed while we kept the lock" %
11416        self.op.instance_name)
11417
11418     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11419                                            owned_groups)
11420
11421     if self.req_target_uuids:
11422       # User requested specific target groups
11423       self.target_uuids = self.req_target_uuids
11424     else:
11425       # All groups except those used by the instance are potential targets
11426       self.target_uuids = owned_groups - inst_groups
11427
11428     conflicting_groups = self.target_uuids & inst_groups
11429     if conflicting_groups:
11430       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11431                                  " used by the instance '%s'" %
11432                                  (utils.CommaJoin(conflicting_groups),
11433                                   self.op.instance_name),
11434                                  errors.ECODE_INVAL)
11435
11436     if not self.target_uuids:
11437       raise errors.OpPrereqError("There are no possible target groups",
11438                                  errors.ECODE_INVAL)
11439
11440   def BuildHooksEnv(self):
11441     """Build hooks env.
11442
11443     """
11444     assert self.target_uuids
11445
11446     env = {
11447       "TARGET_GROUPS": " ".join(self.target_uuids),
11448       }
11449
11450     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11451
11452     return env
11453
11454   def BuildHooksNodes(self):
11455     """Build hooks nodes.
11456
11457     """
11458     mn = self.cfg.GetMasterNode()
11459     return ([mn], [mn])
11460
11461   def Exec(self, feedback_fn):
11462     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11463
11464     assert instances == [self.op.instance_name], "Instance not locked"
11465
11466     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11467                      instances=instances, target_groups=list(self.target_uuids))
11468
11469     ial.Run(self.op.iallocator)
11470
11471     if not ial.success:
11472       raise errors.OpPrereqError("Can't compute solution for changing group of"
11473                                  " instance '%s' using iallocator '%s': %s" %
11474                                  (self.op.instance_name, self.op.iallocator,
11475                                   ial.info),
11476                                  errors.ECODE_NORES)
11477
11478     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11479
11480     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11481                  " instance '%s'", len(jobs), self.op.instance_name)
11482
11483     return ResultWithJobs(jobs)
11484
11485
11486 class LUBackupQuery(NoHooksLU):
11487   """Query the exports list
11488
11489   """
11490   REQ_BGL = False
11491
11492   def ExpandNames(self):
11493     self.needed_locks = {}
11494     self.share_locks[locking.LEVEL_NODE] = 1
11495     if not self.op.nodes:
11496       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11497     else:
11498       self.needed_locks[locking.LEVEL_NODE] = \
11499         _GetWantedNodes(self, self.op.nodes)
11500
11501   def Exec(self, feedback_fn):
11502     """Compute the list of all the exported system images.
11503
11504     @rtype: dict
11505     @return: a dictionary with the structure node->(export-list)
11506         where export-list is a list of the instances exported on
11507         that node.
11508
11509     """
11510     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11511     rpcresult = self.rpc.call_export_list(self.nodes)
11512     result = {}
11513     for node in rpcresult:
11514       if rpcresult[node].fail_msg:
11515         result[node] = False
11516       else:
11517         result[node] = rpcresult[node].payload
11518
11519     return result
11520
11521
11522 class LUBackupPrepare(NoHooksLU):
11523   """Prepares an instance for an export and returns useful information.
11524
11525   """
11526   REQ_BGL = False
11527
11528   def ExpandNames(self):
11529     self._ExpandAndLockInstance()
11530
11531   def CheckPrereq(self):
11532     """Check prerequisites.
11533
11534     """
11535     instance_name = self.op.instance_name
11536
11537     self.instance = self.cfg.GetInstanceInfo(instance_name)
11538     assert self.instance is not None, \
11539           "Cannot retrieve locked instance %s" % self.op.instance_name
11540     _CheckNodeOnline(self, self.instance.primary_node)
11541
11542     self._cds = _GetClusterDomainSecret()
11543
11544   def Exec(self, feedback_fn):
11545     """Prepares an instance for an export.
11546
11547     """
11548     instance = self.instance
11549
11550     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11551       salt = utils.GenerateSecret(8)
11552
11553       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11554       result = self.rpc.call_x509_cert_create(instance.primary_node,
11555                                               constants.RIE_CERT_VALIDITY)
11556       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11557
11558       (name, cert_pem) = result.payload
11559
11560       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11561                                              cert_pem)
11562
11563       return {
11564         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11565         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11566                           salt),
11567         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11568         }
11569
11570     return None
11571
11572
11573 class LUBackupExport(LogicalUnit):
11574   """Export an instance to an image in the cluster.
11575
11576   """
11577   HPATH = "instance-export"
11578   HTYPE = constants.HTYPE_INSTANCE
11579   REQ_BGL = False
11580
11581   def CheckArguments(self):
11582     """Check the arguments.
11583
11584     """
11585     self.x509_key_name = self.op.x509_key_name
11586     self.dest_x509_ca_pem = self.op.destination_x509_ca
11587
11588     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11589       if not self.x509_key_name:
11590         raise errors.OpPrereqError("Missing X509 key name for encryption",
11591                                    errors.ECODE_INVAL)
11592
11593       if not self.dest_x509_ca_pem:
11594         raise errors.OpPrereqError("Missing destination X509 CA",
11595                                    errors.ECODE_INVAL)
11596
11597   def ExpandNames(self):
11598     self._ExpandAndLockInstance()
11599
11600     # Lock all nodes for local exports
11601     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11602       # FIXME: lock only instance primary and destination node
11603       #
11604       # Sad but true, for now we have do lock all nodes, as we don't know where
11605       # the previous export might be, and in this LU we search for it and
11606       # remove it from its current node. In the future we could fix this by:
11607       #  - making a tasklet to search (share-lock all), then create the
11608       #    new one, then one to remove, after
11609       #  - removing the removal operation altogether
11610       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11611
11612   def DeclareLocks(self, level):
11613     """Last minute lock declaration."""
11614     # All nodes are locked anyway, so nothing to do here.
11615
11616   def BuildHooksEnv(self):
11617     """Build hooks env.
11618
11619     This will run on the master, primary node and target node.
11620
11621     """
11622     env = {
11623       "EXPORT_MODE": self.op.mode,
11624       "EXPORT_NODE": self.op.target_node,
11625       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11626       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11627       # TODO: Generic function for boolean env variables
11628       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11629       }
11630
11631     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11632
11633     return env
11634
11635   def BuildHooksNodes(self):
11636     """Build hooks nodes.
11637
11638     """
11639     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11640
11641     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11642       nl.append(self.op.target_node)
11643
11644     return (nl, nl)
11645
11646   def CheckPrereq(self):
11647     """Check prerequisites.
11648
11649     This checks that the instance and node names are valid.
11650
11651     """
11652     instance_name = self.op.instance_name
11653
11654     self.instance = self.cfg.GetInstanceInfo(instance_name)
11655     assert self.instance is not None, \
11656           "Cannot retrieve locked instance %s" % self.op.instance_name
11657     _CheckNodeOnline(self, self.instance.primary_node)
11658
11659     if (self.op.remove_instance and self.instance.admin_up and
11660         not self.op.shutdown):
11661       raise errors.OpPrereqError("Can not remove instance without shutting it"
11662                                  " down before")
11663
11664     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11665       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11666       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11667       assert self.dst_node is not None
11668
11669       _CheckNodeOnline(self, self.dst_node.name)
11670       _CheckNodeNotDrained(self, self.dst_node.name)
11671
11672       self._cds = None
11673       self.dest_disk_info = None
11674       self.dest_x509_ca = None
11675
11676     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11677       self.dst_node = None
11678
11679       if len(self.op.target_node) != len(self.instance.disks):
11680         raise errors.OpPrereqError(("Received destination information for %s"
11681                                     " disks, but instance %s has %s disks") %
11682                                    (len(self.op.target_node), instance_name,
11683                                     len(self.instance.disks)),
11684                                    errors.ECODE_INVAL)
11685
11686       cds = _GetClusterDomainSecret()
11687
11688       # Check X509 key name
11689       try:
11690         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11691       except (TypeError, ValueError), err:
11692         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11693
11694       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11695         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11696                                    errors.ECODE_INVAL)
11697
11698       # Load and verify CA
11699       try:
11700         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11701       except OpenSSL.crypto.Error, err:
11702         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11703                                    (err, ), errors.ECODE_INVAL)
11704
11705       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11706       if errcode is not None:
11707         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11708                                    (msg, ), errors.ECODE_INVAL)
11709
11710       self.dest_x509_ca = cert
11711
11712       # Verify target information
11713       disk_info = []
11714       for idx, disk_data in enumerate(self.op.target_node):
11715         try:
11716           (host, port, magic) = \
11717             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11718         except errors.GenericError, err:
11719           raise errors.OpPrereqError("Target info for disk %s: %s" %
11720                                      (idx, err), errors.ECODE_INVAL)
11721
11722         disk_info.append((host, port, magic))
11723
11724       assert len(disk_info) == len(self.op.target_node)
11725       self.dest_disk_info = disk_info
11726
11727     else:
11728       raise errors.ProgrammerError("Unhandled export mode %r" %
11729                                    self.op.mode)
11730
11731     # instance disk type verification
11732     # TODO: Implement export support for file-based disks
11733     for disk in self.instance.disks:
11734       if disk.dev_type == constants.LD_FILE:
11735         raise errors.OpPrereqError("Export not supported for instances with"
11736                                    " file-based disks", errors.ECODE_INVAL)
11737
11738   def _CleanupExports(self, feedback_fn):
11739     """Removes exports of current instance from all other nodes.
11740
11741     If an instance in a cluster with nodes A..D was exported to node C, its
11742     exports will be removed from the nodes A, B and D.
11743
11744     """
11745     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11746
11747     nodelist = self.cfg.GetNodeList()
11748     nodelist.remove(self.dst_node.name)
11749
11750     # on one-node clusters nodelist will be empty after the removal
11751     # if we proceed the backup would be removed because OpBackupQuery
11752     # substitutes an empty list with the full cluster node list.
11753     iname = self.instance.name
11754     if nodelist:
11755       feedback_fn("Removing old exports for instance %s" % iname)
11756       exportlist = self.rpc.call_export_list(nodelist)
11757       for node in exportlist:
11758         if exportlist[node].fail_msg:
11759           continue
11760         if iname in exportlist[node].payload:
11761           msg = self.rpc.call_export_remove(node, iname).fail_msg
11762           if msg:
11763             self.LogWarning("Could not remove older export for instance %s"
11764                             " on node %s: %s", iname, node, msg)
11765
11766   def Exec(self, feedback_fn):
11767     """Export an instance to an image in the cluster.
11768
11769     """
11770     assert self.op.mode in constants.EXPORT_MODES
11771
11772     instance = self.instance
11773     src_node = instance.primary_node
11774
11775     if self.op.shutdown:
11776       # shutdown the instance, but not the disks
11777       feedback_fn("Shutting down instance %s" % instance.name)
11778       result = self.rpc.call_instance_shutdown(src_node, instance,
11779                                                self.op.shutdown_timeout)
11780       # TODO: Maybe ignore failures if ignore_remove_failures is set
11781       result.Raise("Could not shutdown instance %s on"
11782                    " node %s" % (instance.name, src_node))
11783
11784     # set the disks ID correctly since call_instance_start needs the
11785     # correct drbd minor to create the symlinks
11786     for disk in instance.disks:
11787       self.cfg.SetDiskID(disk, src_node)
11788
11789     activate_disks = (not instance.admin_up)
11790
11791     if activate_disks:
11792       # Activate the instance disks if we'exporting a stopped instance
11793       feedback_fn("Activating disks for %s" % instance.name)
11794       _StartInstanceDisks(self, instance, None)
11795
11796     try:
11797       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11798                                                      instance)
11799
11800       helper.CreateSnapshots()
11801       try:
11802         if (self.op.shutdown and instance.admin_up and
11803             not self.op.remove_instance):
11804           assert not activate_disks
11805           feedback_fn("Starting instance %s" % instance.name)
11806           result = self.rpc.call_instance_start(src_node, instance,
11807                                                 None, None, False)
11808           msg = result.fail_msg
11809           if msg:
11810             feedback_fn("Failed to start instance: %s" % msg)
11811             _ShutdownInstanceDisks(self, instance)
11812             raise errors.OpExecError("Could not start instance: %s" % msg)
11813
11814         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11815           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11816         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11817           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11818           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11819
11820           (key_name, _, _) = self.x509_key_name
11821
11822           dest_ca_pem = \
11823             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11824                                             self.dest_x509_ca)
11825
11826           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11827                                                      key_name, dest_ca_pem,
11828                                                      timeouts)
11829       finally:
11830         helper.Cleanup()
11831
11832       # Check for backwards compatibility
11833       assert len(dresults) == len(instance.disks)
11834       assert compat.all(isinstance(i, bool) for i in dresults), \
11835              "Not all results are boolean: %r" % dresults
11836
11837     finally:
11838       if activate_disks:
11839         feedback_fn("Deactivating disks for %s" % instance.name)
11840         _ShutdownInstanceDisks(self, instance)
11841
11842     if not (compat.all(dresults) and fin_resu):
11843       failures = []
11844       if not fin_resu:
11845         failures.append("export finalization")
11846       if not compat.all(dresults):
11847         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11848                                if not dsk)
11849         failures.append("disk export: disk(s) %s" % fdsk)
11850
11851       raise errors.OpExecError("Export failed, errors in %s" %
11852                                utils.CommaJoin(failures))
11853
11854     # At this point, the export was successful, we can cleanup/finish
11855
11856     # Remove instance if requested
11857     if self.op.remove_instance:
11858       feedback_fn("Removing instance %s" % instance.name)
11859       _RemoveInstance(self, feedback_fn, instance,
11860                       self.op.ignore_remove_failures)
11861
11862     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11863       self._CleanupExports(feedback_fn)
11864
11865     return fin_resu, dresults
11866
11867
11868 class LUBackupRemove(NoHooksLU):
11869   """Remove exports related to the named instance.
11870
11871   """
11872   REQ_BGL = False
11873
11874   def ExpandNames(self):
11875     self.needed_locks = {}
11876     # We need all nodes to be locked in order for RemoveExport to work, but we
11877     # don't need to lock the instance itself, as nothing will happen to it (and
11878     # we can remove exports also for a removed instance)
11879     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11880
11881   def Exec(self, feedback_fn):
11882     """Remove any export.
11883
11884     """
11885     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11886     # If the instance was not found we'll try with the name that was passed in.
11887     # This will only work if it was an FQDN, though.
11888     fqdn_warn = False
11889     if not instance_name:
11890       fqdn_warn = True
11891       instance_name = self.op.instance_name
11892
11893     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11894     exportlist = self.rpc.call_export_list(locked_nodes)
11895     found = False
11896     for node in exportlist:
11897       msg = exportlist[node].fail_msg
11898       if msg:
11899         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11900         continue
11901       if instance_name in exportlist[node].payload:
11902         found = True
11903         result = self.rpc.call_export_remove(node, instance_name)
11904         msg = result.fail_msg
11905         if msg:
11906           logging.error("Could not remove export for instance %s"
11907                         " on node %s: %s", instance_name, node, msg)
11908
11909     if fqdn_warn and not found:
11910       feedback_fn("Export not found. If trying to remove an export belonging"
11911                   " to a deleted instance please use its Fully Qualified"
11912                   " Domain Name.")
11913
11914
11915 class LUGroupAdd(LogicalUnit):
11916   """Logical unit for creating node groups.
11917
11918   """
11919   HPATH = "group-add"
11920   HTYPE = constants.HTYPE_GROUP
11921   REQ_BGL = False
11922
11923   def ExpandNames(self):
11924     # We need the new group's UUID here so that we can create and acquire the
11925     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11926     # that it should not check whether the UUID exists in the configuration.
11927     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11928     self.needed_locks = {}
11929     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11930
11931   def CheckPrereq(self):
11932     """Check prerequisites.
11933
11934     This checks that the given group name is not an existing node group
11935     already.
11936
11937     """
11938     try:
11939       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11940     except errors.OpPrereqError:
11941       pass
11942     else:
11943       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11944                                  " node group (UUID: %s)" %
11945                                  (self.op.group_name, existing_uuid),
11946                                  errors.ECODE_EXISTS)
11947
11948     if self.op.ndparams:
11949       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11950
11951   def BuildHooksEnv(self):
11952     """Build hooks env.
11953
11954     """
11955     return {
11956       "GROUP_NAME": self.op.group_name,
11957       }
11958
11959   def BuildHooksNodes(self):
11960     """Build hooks nodes.
11961
11962     """
11963     mn = self.cfg.GetMasterNode()
11964     return ([mn], [mn])
11965
11966   def Exec(self, feedback_fn):
11967     """Add the node group to the cluster.
11968
11969     """
11970     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11971                                   uuid=self.group_uuid,
11972                                   alloc_policy=self.op.alloc_policy,
11973                                   ndparams=self.op.ndparams)
11974
11975     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11976     del self.remove_locks[locking.LEVEL_NODEGROUP]
11977
11978
11979 class LUGroupAssignNodes(NoHooksLU):
11980   """Logical unit for assigning nodes to groups.
11981
11982   """
11983   REQ_BGL = False
11984
11985   def ExpandNames(self):
11986     # These raise errors.OpPrereqError on their own:
11987     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11988     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11989
11990     # We want to lock all the affected nodes and groups. We have readily
11991     # available the list of nodes, and the *destination* group. To gather the
11992     # list of "source" groups, we need to fetch node information later on.
11993     self.needed_locks = {
11994       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11995       locking.LEVEL_NODE: self.op.nodes,
11996       }
11997
11998   def DeclareLocks(self, level):
11999     if level == locking.LEVEL_NODEGROUP:
12000       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12001
12002       # Try to get all affected nodes' groups without having the group or node
12003       # lock yet. Needs verification later in the code flow.
12004       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12005
12006       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12007
12008   def CheckPrereq(self):
12009     """Check prerequisites.
12010
12011     """
12012     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12013     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12014             frozenset(self.op.nodes))
12015
12016     expected_locks = (set([self.group_uuid]) |
12017                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12018     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12019     if actual_locks != expected_locks:
12020       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12021                                " current groups are '%s', used to be '%s'" %
12022                                (utils.CommaJoin(expected_locks),
12023                                 utils.CommaJoin(actual_locks)))
12024
12025     self.node_data = self.cfg.GetAllNodesInfo()
12026     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12027     instance_data = self.cfg.GetAllInstancesInfo()
12028
12029     if self.group is None:
12030       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12031                                (self.op.group_name, self.group_uuid))
12032
12033     (new_splits, previous_splits) = \
12034       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12035                                              for node in self.op.nodes],
12036                                             self.node_data, instance_data)
12037
12038     if new_splits:
12039       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12040
12041       if not self.op.force:
12042         raise errors.OpExecError("The following instances get split by this"
12043                                  " change and --force was not given: %s" %
12044                                  fmt_new_splits)
12045       else:
12046         self.LogWarning("This operation will split the following instances: %s",
12047                         fmt_new_splits)
12048
12049         if previous_splits:
12050           self.LogWarning("In addition, these already-split instances continue"
12051                           " to be split across groups: %s",
12052                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12053
12054   def Exec(self, feedback_fn):
12055     """Assign nodes to a new group.
12056
12057     """
12058     for node in self.op.nodes:
12059       self.node_data[node].group = self.group_uuid
12060
12061     # FIXME: Depends on side-effects of modifying the result of
12062     # C{cfg.GetAllNodesInfo}
12063
12064     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12065
12066   @staticmethod
12067   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12068     """Check for split instances after a node assignment.
12069
12070     This method considers a series of node assignments as an atomic operation,
12071     and returns information about split instances after applying the set of
12072     changes.
12073
12074     In particular, it returns information about newly split instances, and
12075     instances that were already split, and remain so after the change.
12076
12077     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12078     considered.
12079
12080     @type changes: list of (node_name, new_group_uuid) pairs.
12081     @param changes: list of node assignments to consider.
12082     @param node_data: a dict with data for all nodes
12083     @param instance_data: a dict with all instances to consider
12084     @rtype: a two-tuple
12085     @return: a list of instances that were previously okay and result split as a
12086       consequence of this change, and a list of instances that were previously
12087       split and this change does not fix.
12088
12089     """
12090     changed_nodes = dict((node, group) for node, group in changes
12091                          if node_data[node].group != group)
12092
12093     all_split_instances = set()
12094     previously_split_instances = set()
12095
12096     def InstanceNodes(instance):
12097       return [instance.primary_node] + list(instance.secondary_nodes)
12098
12099     for inst in instance_data.values():
12100       if inst.disk_template not in constants.DTS_INT_MIRROR:
12101         continue
12102
12103       instance_nodes = InstanceNodes(inst)
12104
12105       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12106         previously_split_instances.add(inst.name)
12107
12108       if len(set(changed_nodes.get(node, node_data[node].group)
12109                  for node in instance_nodes)) > 1:
12110         all_split_instances.add(inst.name)
12111
12112     return (list(all_split_instances - previously_split_instances),
12113             list(previously_split_instances & all_split_instances))
12114
12115
12116 class _GroupQuery(_QueryBase):
12117   FIELDS = query.GROUP_FIELDS
12118
12119   def ExpandNames(self, lu):
12120     lu.needed_locks = {}
12121
12122     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12123     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12124
12125     if not self.names:
12126       self.wanted = [name_to_uuid[name]
12127                      for name in utils.NiceSort(name_to_uuid.keys())]
12128     else:
12129       # Accept names to be either names or UUIDs.
12130       missing = []
12131       self.wanted = []
12132       all_uuid = frozenset(self._all_groups.keys())
12133
12134       for name in self.names:
12135         if name in all_uuid:
12136           self.wanted.append(name)
12137         elif name in name_to_uuid:
12138           self.wanted.append(name_to_uuid[name])
12139         else:
12140           missing.append(name)
12141
12142       if missing:
12143         raise errors.OpPrereqError("Some groups do not exist: %s" %
12144                                    utils.CommaJoin(missing),
12145                                    errors.ECODE_NOENT)
12146
12147   def DeclareLocks(self, lu, level):
12148     pass
12149
12150   def _GetQueryData(self, lu):
12151     """Computes the list of node groups and their attributes.
12152
12153     """
12154     do_nodes = query.GQ_NODE in self.requested_data
12155     do_instances = query.GQ_INST in self.requested_data
12156
12157     group_to_nodes = None
12158     group_to_instances = None
12159
12160     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12161     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12162     # latter GetAllInstancesInfo() is not enough, for we have to go through
12163     # instance->node. Hence, we will need to process nodes even if we only need
12164     # instance information.
12165     if do_nodes or do_instances:
12166       all_nodes = lu.cfg.GetAllNodesInfo()
12167       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12168       node_to_group = {}
12169
12170       for node in all_nodes.values():
12171         if node.group in group_to_nodes:
12172           group_to_nodes[node.group].append(node.name)
12173           node_to_group[node.name] = node.group
12174
12175       if do_instances:
12176         all_instances = lu.cfg.GetAllInstancesInfo()
12177         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12178
12179         for instance in all_instances.values():
12180           node = instance.primary_node
12181           if node in node_to_group:
12182             group_to_instances[node_to_group[node]].append(instance.name)
12183
12184         if not do_nodes:
12185           # Do not pass on node information if it was not requested.
12186           group_to_nodes = None
12187
12188     return query.GroupQueryData([self._all_groups[uuid]
12189                                  for uuid in self.wanted],
12190                                 group_to_nodes, group_to_instances)
12191
12192
12193 class LUGroupQuery(NoHooksLU):
12194   """Logical unit for querying node groups.
12195
12196   """
12197   REQ_BGL = False
12198
12199   def CheckArguments(self):
12200     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12201                           self.op.output_fields, False)
12202
12203   def ExpandNames(self):
12204     self.gq.ExpandNames(self)
12205
12206   def DeclareLocks(self, level):
12207     self.gq.DeclareLocks(self, level)
12208
12209   def Exec(self, feedback_fn):
12210     return self.gq.OldStyleQuery(self)
12211
12212
12213 class LUGroupSetParams(LogicalUnit):
12214   """Modifies the parameters of a node group.
12215
12216   """
12217   HPATH = "group-modify"
12218   HTYPE = constants.HTYPE_GROUP
12219   REQ_BGL = False
12220
12221   def CheckArguments(self):
12222     all_changes = [
12223       self.op.ndparams,
12224       self.op.alloc_policy,
12225       ]
12226
12227     if all_changes.count(None) == len(all_changes):
12228       raise errors.OpPrereqError("Please pass at least one modification",
12229                                  errors.ECODE_INVAL)
12230
12231   def ExpandNames(self):
12232     # This raises errors.OpPrereqError on its own:
12233     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12234
12235     self.needed_locks = {
12236       locking.LEVEL_NODEGROUP: [self.group_uuid],
12237       }
12238
12239   def CheckPrereq(self):
12240     """Check prerequisites.
12241
12242     """
12243     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12244
12245     if self.group is None:
12246       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12247                                (self.op.group_name, self.group_uuid))
12248
12249     if self.op.ndparams:
12250       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12251       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12252       self.new_ndparams = new_ndparams
12253
12254   def BuildHooksEnv(self):
12255     """Build hooks env.
12256
12257     """
12258     return {
12259       "GROUP_NAME": self.op.group_name,
12260       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12261       }
12262
12263   def BuildHooksNodes(self):
12264     """Build hooks nodes.
12265
12266     """
12267     mn = self.cfg.GetMasterNode()
12268     return ([mn], [mn])
12269
12270   def Exec(self, feedback_fn):
12271     """Modifies the node group.
12272
12273     """
12274     result = []
12275
12276     if self.op.ndparams:
12277       self.group.ndparams = self.new_ndparams
12278       result.append(("ndparams", str(self.group.ndparams)))
12279
12280     if self.op.alloc_policy:
12281       self.group.alloc_policy = self.op.alloc_policy
12282
12283     self.cfg.Update(self.group, feedback_fn)
12284     return result
12285
12286
12287 class LUGroupRemove(LogicalUnit):
12288   HPATH = "group-remove"
12289   HTYPE = constants.HTYPE_GROUP
12290   REQ_BGL = False
12291
12292   def ExpandNames(self):
12293     # This will raises errors.OpPrereqError on its own:
12294     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12295     self.needed_locks = {
12296       locking.LEVEL_NODEGROUP: [self.group_uuid],
12297       }
12298
12299   def CheckPrereq(self):
12300     """Check prerequisites.
12301
12302     This checks that the given group name exists as a node group, that is
12303     empty (i.e., contains no nodes), and that is not the last group of the
12304     cluster.
12305
12306     """
12307     # Verify that the group is empty.
12308     group_nodes = [node.name
12309                    for node in self.cfg.GetAllNodesInfo().values()
12310                    if node.group == self.group_uuid]
12311
12312     if group_nodes:
12313       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12314                                  " nodes: %s" %
12315                                  (self.op.group_name,
12316                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12317                                  errors.ECODE_STATE)
12318
12319     # Verify the cluster would not be left group-less.
12320     if len(self.cfg.GetNodeGroupList()) == 1:
12321       raise errors.OpPrereqError("Group '%s' is the only group,"
12322                                  " cannot be removed" %
12323                                  self.op.group_name,
12324                                  errors.ECODE_STATE)
12325
12326   def BuildHooksEnv(self):
12327     """Build hooks env.
12328
12329     """
12330     return {
12331       "GROUP_NAME": self.op.group_name,
12332       }
12333
12334   def BuildHooksNodes(self):
12335     """Build hooks nodes.
12336
12337     """
12338     mn = self.cfg.GetMasterNode()
12339     return ([mn], [mn])
12340
12341   def Exec(self, feedback_fn):
12342     """Remove the node group.
12343
12344     """
12345     try:
12346       self.cfg.RemoveNodeGroup(self.group_uuid)
12347     except errors.ConfigurationError:
12348       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12349                                (self.op.group_name, self.group_uuid))
12350
12351     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12352
12353
12354 class LUGroupRename(LogicalUnit):
12355   HPATH = "group-rename"
12356   HTYPE = constants.HTYPE_GROUP
12357   REQ_BGL = False
12358
12359   def ExpandNames(self):
12360     # This raises errors.OpPrereqError on its own:
12361     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12362
12363     self.needed_locks = {
12364       locking.LEVEL_NODEGROUP: [self.group_uuid],
12365       }
12366
12367   def CheckPrereq(self):
12368     """Check prerequisites.
12369
12370     Ensures requested new name is not yet used.
12371
12372     """
12373     try:
12374       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12375     except errors.OpPrereqError:
12376       pass
12377     else:
12378       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12379                                  " node group (UUID: %s)" %
12380                                  (self.op.new_name, new_name_uuid),
12381                                  errors.ECODE_EXISTS)
12382
12383   def BuildHooksEnv(self):
12384     """Build hooks env.
12385
12386     """
12387     return {
12388       "OLD_NAME": self.op.group_name,
12389       "NEW_NAME": self.op.new_name,
12390       }
12391
12392   def BuildHooksNodes(self):
12393     """Build hooks nodes.
12394
12395     """
12396     mn = self.cfg.GetMasterNode()
12397
12398     all_nodes = self.cfg.GetAllNodesInfo()
12399     all_nodes.pop(mn, None)
12400
12401     run_nodes = [mn]
12402     run_nodes.extend(node.name for node in all_nodes.values()
12403                      if node.group == self.group_uuid)
12404
12405     return (run_nodes, run_nodes)
12406
12407   def Exec(self, feedback_fn):
12408     """Rename the node group.
12409
12410     """
12411     group = self.cfg.GetNodeGroup(self.group_uuid)
12412
12413     if group is None:
12414       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12415                                (self.op.group_name, self.group_uuid))
12416
12417     group.name = self.op.new_name
12418     self.cfg.Update(group, feedback_fn)
12419
12420     return self.op.new_name
12421
12422
12423 class LUGroupEvacuate(LogicalUnit):
12424   HPATH = "group-evacuate"
12425   HTYPE = constants.HTYPE_GROUP
12426   REQ_BGL = False
12427
12428   def ExpandNames(self):
12429     # This raises errors.OpPrereqError on its own:
12430     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12431
12432     if self.op.target_groups:
12433       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12434                                   self.op.target_groups)
12435     else:
12436       self.req_target_uuids = []
12437
12438     if self.group_uuid in self.req_target_uuids:
12439       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12440                                  " as a target group (targets are %s)" %
12441                                  (self.group_uuid,
12442                                   utils.CommaJoin(self.req_target_uuids)),
12443                                  errors.ECODE_INVAL)
12444
12445     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12446
12447     self.share_locks = _ShareAll()
12448     self.needed_locks = {
12449       locking.LEVEL_INSTANCE: [],
12450       locking.LEVEL_NODEGROUP: [],
12451       locking.LEVEL_NODE: [],
12452       }
12453
12454   def DeclareLocks(self, level):
12455     if level == locking.LEVEL_INSTANCE:
12456       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12457
12458       # Lock instances optimistically, needs verification once node and group
12459       # locks have been acquired
12460       self.needed_locks[locking.LEVEL_INSTANCE] = \
12461         self.cfg.GetNodeGroupInstances(self.group_uuid)
12462
12463     elif level == locking.LEVEL_NODEGROUP:
12464       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12465
12466       if self.req_target_uuids:
12467         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12468
12469         # Lock all groups used by instances optimistically; this requires going
12470         # via the node before it's locked, requiring verification later on
12471         lock_groups.update(group_uuid
12472                            for instance_name in
12473                              self.owned_locks(locking.LEVEL_INSTANCE)
12474                            for group_uuid in
12475                              self.cfg.GetInstanceNodeGroups(instance_name))
12476       else:
12477         # No target groups, need to lock all of them
12478         lock_groups = locking.ALL_SET
12479
12480       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12481
12482     elif level == locking.LEVEL_NODE:
12483       # This will only lock the nodes in the group to be evacuated which
12484       # contain actual instances
12485       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12486       self._LockInstancesNodes()
12487
12488       # Lock all nodes in group to be evacuated and target groups
12489       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12490       assert self.group_uuid in owned_groups
12491       member_nodes = [node_name
12492                       for group in owned_groups
12493                       for node_name in self.cfg.GetNodeGroup(group).members]
12494       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12495
12496   def CheckPrereq(self):
12497     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12498     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12499     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12500
12501     assert owned_groups.issuperset(self.req_target_uuids)
12502     assert self.group_uuid in owned_groups
12503
12504     # Check if locked instances are still correct
12505     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12506
12507     # Get instance information
12508     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12509
12510     # Check if node groups for locked instances are still correct
12511     for instance_name in owned_instances:
12512       inst = self.instances[instance_name]
12513       assert owned_nodes.issuperset(inst.all_nodes), \
12514         "Instance %s's nodes changed while we kept the lock" % instance_name
12515
12516       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12517                                              owned_groups)
12518
12519       assert self.group_uuid in inst_groups, \
12520         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12521
12522     if self.req_target_uuids:
12523       # User requested specific target groups
12524       self.target_uuids = self.req_target_uuids
12525     else:
12526       # All groups except the one to be evacuated are potential targets
12527       self.target_uuids = [group_uuid for group_uuid in owned_groups
12528                            if group_uuid != self.group_uuid]
12529
12530       if not self.target_uuids:
12531         raise errors.OpPrereqError("There are no possible target groups",
12532                                    errors.ECODE_INVAL)
12533
12534   def BuildHooksEnv(self):
12535     """Build hooks env.
12536
12537     """
12538     return {
12539       "GROUP_NAME": self.op.group_name,
12540       "TARGET_GROUPS": " ".join(self.target_uuids),
12541       }
12542
12543   def BuildHooksNodes(self):
12544     """Build hooks nodes.
12545
12546     """
12547     mn = self.cfg.GetMasterNode()
12548
12549     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12550
12551     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12552
12553     return (run_nodes, run_nodes)
12554
12555   def Exec(self, feedback_fn):
12556     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12557
12558     assert self.group_uuid not in self.target_uuids
12559
12560     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12561                      instances=instances, target_groups=self.target_uuids)
12562
12563     ial.Run(self.op.iallocator)
12564
12565     if not ial.success:
12566       raise errors.OpPrereqError("Can't compute group evacuation using"
12567                                  " iallocator '%s': %s" %
12568                                  (self.op.iallocator, ial.info),
12569                                  errors.ECODE_NORES)
12570
12571     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12572
12573     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12574                  len(jobs), self.op.group_name)
12575
12576     return ResultWithJobs(jobs)
12577
12578
12579 class TagsLU(NoHooksLU): # pylint: disable=W0223
12580   """Generic tags LU.
12581
12582   This is an abstract class which is the parent of all the other tags LUs.
12583
12584   """
12585   def ExpandNames(self):
12586     self.group_uuid = None
12587     self.needed_locks = {}
12588     if self.op.kind == constants.TAG_NODE:
12589       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12590       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12591     elif self.op.kind == constants.TAG_INSTANCE:
12592       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12593       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12594     elif self.op.kind == constants.TAG_NODEGROUP:
12595       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12596
12597     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12598     # not possible to acquire the BGL based on opcode parameters)
12599
12600   def CheckPrereq(self):
12601     """Check prerequisites.
12602
12603     """
12604     if self.op.kind == constants.TAG_CLUSTER:
12605       self.target = self.cfg.GetClusterInfo()
12606     elif self.op.kind == constants.TAG_NODE:
12607       self.target = self.cfg.GetNodeInfo(self.op.name)
12608     elif self.op.kind == constants.TAG_INSTANCE:
12609       self.target = self.cfg.GetInstanceInfo(self.op.name)
12610     elif self.op.kind == constants.TAG_NODEGROUP:
12611       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12612     else:
12613       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12614                                  str(self.op.kind), errors.ECODE_INVAL)
12615
12616
12617 class LUTagsGet(TagsLU):
12618   """Returns the tags of a given object.
12619
12620   """
12621   REQ_BGL = False
12622
12623   def ExpandNames(self):
12624     TagsLU.ExpandNames(self)
12625
12626     # Share locks as this is only a read operation
12627     self.share_locks = _ShareAll()
12628
12629   def Exec(self, feedback_fn):
12630     """Returns the tag list.
12631
12632     """
12633     return list(self.target.GetTags())
12634
12635
12636 class LUTagsSearch(NoHooksLU):
12637   """Searches the tags for a given pattern.
12638
12639   """
12640   REQ_BGL = False
12641
12642   def ExpandNames(self):
12643     self.needed_locks = {}
12644
12645   def CheckPrereq(self):
12646     """Check prerequisites.
12647
12648     This checks the pattern passed for validity by compiling it.
12649
12650     """
12651     try:
12652       self.re = re.compile(self.op.pattern)
12653     except re.error, err:
12654       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12655                                  (self.op.pattern, err), errors.ECODE_INVAL)
12656
12657   def Exec(self, feedback_fn):
12658     """Returns the tag list.
12659
12660     """
12661     cfg = self.cfg
12662     tgts = [("/cluster", cfg.GetClusterInfo())]
12663     ilist = cfg.GetAllInstancesInfo().values()
12664     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12665     nlist = cfg.GetAllNodesInfo().values()
12666     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12667     tgts.extend(("/nodegroup/%s" % n.name, n)
12668                 for n in cfg.GetAllNodeGroupsInfo().values())
12669     results = []
12670     for path, target in tgts:
12671       for tag in target.GetTags():
12672         if self.re.search(tag):
12673           results.append((path, tag))
12674     return results
12675
12676
12677 class LUTagsSet(TagsLU):
12678   """Sets a tag on a given object.
12679
12680   """
12681   REQ_BGL = False
12682
12683   def CheckPrereq(self):
12684     """Check prerequisites.
12685
12686     This checks the type and length of the tag name and value.
12687
12688     """
12689     TagsLU.CheckPrereq(self)
12690     for tag in self.op.tags:
12691       objects.TaggableObject.ValidateTag(tag)
12692
12693   def Exec(self, feedback_fn):
12694     """Sets the tag.
12695
12696     """
12697     try:
12698       for tag in self.op.tags:
12699         self.target.AddTag(tag)
12700     except errors.TagError, err:
12701       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12702     self.cfg.Update(self.target, feedback_fn)
12703
12704
12705 class LUTagsDel(TagsLU):
12706   """Delete a list of tags from a given object.
12707
12708   """
12709   REQ_BGL = False
12710
12711   def CheckPrereq(self):
12712     """Check prerequisites.
12713
12714     This checks that we have the given tag.
12715
12716     """
12717     TagsLU.CheckPrereq(self)
12718     for tag in self.op.tags:
12719       objects.TaggableObject.ValidateTag(tag)
12720     del_tags = frozenset(self.op.tags)
12721     cur_tags = self.target.GetTags()
12722
12723     diff_tags = del_tags - cur_tags
12724     if diff_tags:
12725       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12726       raise errors.OpPrereqError("Tag(s) %s not found" %
12727                                  (utils.CommaJoin(diff_names), ),
12728                                  errors.ECODE_NOENT)
12729
12730   def Exec(self, feedback_fn):
12731     """Remove the tag from the object.
12732
12733     """
12734     for tag in self.op.tags:
12735       self.target.RemoveTag(tag)
12736     self.cfg.Update(self.target, feedback_fn)
12737
12738
12739 class LUTestDelay(NoHooksLU):
12740   """Sleep for a specified amount of time.
12741
12742   This LU sleeps on the master and/or nodes for a specified amount of
12743   time.
12744
12745   """
12746   REQ_BGL = False
12747
12748   def ExpandNames(self):
12749     """Expand names and set required locks.
12750
12751     This expands the node list, if any.
12752
12753     """
12754     self.needed_locks = {}
12755     if self.op.on_nodes:
12756       # _GetWantedNodes can be used here, but is not always appropriate to use
12757       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12758       # more information.
12759       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12760       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12761
12762   def _TestDelay(self):
12763     """Do the actual sleep.
12764
12765     """
12766     if self.op.on_master:
12767       if not utils.TestDelay(self.op.duration):
12768         raise errors.OpExecError("Error during master delay test")
12769     if self.op.on_nodes:
12770       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12771       for node, node_result in result.items():
12772         node_result.Raise("Failure during rpc call to node %s" % node)
12773
12774   def Exec(self, feedback_fn):
12775     """Execute the test delay opcode, with the wanted repetitions.
12776
12777     """
12778     if self.op.repeat == 0:
12779       self._TestDelay()
12780     else:
12781       top_value = self.op.repeat - 1
12782       for i in range(self.op.repeat):
12783         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12784         self._TestDelay()
12785
12786
12787 class LUTestJqueue(NoHooksLU):
12788   """Utility LU to test some aspects of the job queue.
12789
12790   """
12791   REQ_BGL = False
12792
12793   # Must be lower than default timeout for WaitForJobChange to see whether it
12794   # notices changed jobs
12795   _CLIENT_CONNECT_TIMEOUT = 20.0
12796   _CLIENT_CONFIRM_TIMEOUT = 60.0
12797
12798   @classmethod
12799   def _NotifyUsingSocket(cls, cb, errcls):
12800     """Opens a Unix socket and waits for another program to connect.
12801
12802     @type cb: callable
12803     @param cb: Callback to send socket name to client
12804     @type errcls: class
12805     @param errcls: Exception class to use for errors
12806
12807     """
12808     # Using a temporary directory as there's no easy way to create temporary
12809     # sockets without writing a custom loop around tempfile.mktemp and
12810     # socket.bind
12811     tmpdir = tempfile.mkdtemp()
12812     try:
12813       tmpsock = utils.PathJoin(tmpdir, "sock")
12814
12815       logging.debug("Creating temporary socket at %s", tmpsock)
12816       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12817       try:
12818         sock.bind(tmpsock)
12819         sock.listen(1)
12820
12821         # Send details to client
12822         cb(tmpsock)
12823
12824         # Wait for client to connect before continuing
12825         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12826         try:
12827           (conn, _) = sock.accept()
12828         except socket.error, err:
12829           raise errcls("Client didn't connect in time (%s)" % err)
12830       finally:
12831         sock.close()
12832     finally:
12833       # Remove as soon as client is connected
12834       shutil.rmtree(tmpdir)
12835
12836     # Wait for client to close
12837     try:
12838       try:
12839         # pylint: disable=E1101
12840         # Instance of '_socketobject' has no ... member
12841         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12842         conn.recv(1)
12843       except socket.error, err:
12844         raise errcls("Client failed to confirm notification (%s)" % err)
12845     finally:
12846       conn.close()
12847
12848   def _SendNotification(self, test, arg, sockname):
12849     """Sends a notification to the client.
12850
12851     @type test: string
12852     @param test: Test name
12853     @param arg: Test argument (depends on test)
12854     @type sockname: string
12855     @param sockname: Socket path
12856
12857     """
12858     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12859
12860   def _Notify(self, prereq, test, arg):
12861     """Notifies the client of a test.
12862
12863     @type prereq: bool
12864     @param prereq: Whether this is a prereq-phase test
12865     @type test: string
12866     @param test: Test name
12867     @param arg: Test argument (depends on test)
12868
12869     """
12870     if prereq:
12871       errcls = errors.OpPrereqError
12872     else:
12873       errcls = errors.OpExecError
12874
12875     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12876                                                   test, arg),
12877                                    errcls)
12878
12879   def CheckArguments(self):
12880     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12881     self.expandnames_calls = 0
12882
12883   def ExpandNames(self):
12884     checkargs_calls = getattr(self, "checkargs_calls", 0)
12885     if checkargs_calls < 1:
12886       raise errors.ProgrammerError("CheckArguments was not called")
12887
12888     self.expandnames_calls += 1
12889
12890     if self.op.notify_waitlock:
12891       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12892
12893     self.LogInfo("Expanding names")
12894
12895     # Get lock on master node (just to get a lock, not for a particular reason)
12896     self.needed_locks = {
12897       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12898       }
12899
12900   def Exec(self, feedback_fn):
12901     if self.expandnames_calls < 1:
12902       raise errors.ProgrammerError("ExpandNames was not called")
12903
12904     if self.op.notify_exec:
12905       self._Notify(False, constants.JQT_EXEC, None)
12906
12907     self.LogInfo("Executing")
12908
12909     if self.op.log_messages:
12910       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12911       for idx, msg in enumerate(self.op.log_messages):
12912         self.LogInfo("Sending log message %s", idx + 1)
12913         feedback_fn(constants.JQT_MSGPREFIX + msg)
12914         # Report how many test messages have been sent
12915         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12916
12917     if self.op.fail:
12918       raise errors.OpExecError("Opcode failure was requested")
12919
12920     return True
12921
12922
12923 class IAllocator(object):
12924   """IAllocator framework.
12925
12926   An IAllocator instance has three sets of attributes:
12927     - cfg that is needed to query the cluster
12928     - input data (all members of the _KEYS class attribute are required)
12929     - four buffer attributes (in|out_data|text), that represent the
12930       input (to the external script) in text and data structure format,
12931       and the output from it, again in two formats
12932     - the result variables from the script (success, info, nodes) for
12933       easy usage
12934
12935   """
12936   # pylint: disable=R0902
12937   # lots of instance attributes
12938
12939   def __init__(self, cfg, rpc, mode, **kwargs):
12940     self.cfg = cfg
12941     self.rpc = rpc
12942     # init buffer variables
12943     self.in_text = self.out_text = self.in_data = self.out_data = None
12944     # init all input fields so that pylint is happy
12945     self.mode = mode
12946     self.memory = self.disks = self.disk_template = None
12947     self.os = self.tags = self.nics = self.vcpus = None
12948     self.hypervisor = None
12949     self.relocate_from = None
12950     self.name = None
12951     self.instances = None
12952     self.evac_mode = None
12953     self.target_groups = []
12954     # computed fields
12955     self.required_nodes = None
12956     # init result fields
12957     self.success = self.info = self.result = None
12958
12959     try:
12960       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12961     except KeyError:
12962       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12963                                    " IAllocator" % self.mode)
12964
12965     keyset = [n for (n, _) in keydata]
12966
12967     for key in kwargs:
12968       if key not in keyset:
12969         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12970                                      " IAllocator" % key)
12971       setattr(self, key, kwargs[key])
12972
12973     for key in keyset:
12974       if key not in kwargs:
12975         raise errors.ProgrammerError("Missing input parameter '%s' to"
12976                                      " IAllocator" % key)
12977     self._BuildInputData(compat.partial(fn, self), keydata)
12978
12979   def _ComputeClusterData(self):
12980     """Compute the generic allocator input data.
12981
12982     This is the data that is independent of the actual operation.
12983
12984     """
12985     cfg = self.cfg
12986     cluster_info = cfg.GetClusterInfo()
12987     # cluster data
12988     data = {
12989       "version": constants.IALLOCATOR_VERSION,
12990       "cluster_name": cfg.GetClusterName(),
12991       "cluster_tags": list(cluster_info.GetTags()),
12992       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12993       # we don't have job IDs
12994       }
12995     ninfo = cfg.GetAllNodesInfo()
12996     iinfo = cfg.GetAllInstancesInfo().values()
12997     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12998
12999     # node data
13000     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13001
13002     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13003       hypervisor_name = self.hypervisor
13004     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13005       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13006     else:
13007       hypervisor_name = cluster_info.enabled_hypervisors[0]
13008
13009     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13010                                         hypervisor_name)
13011     node_iinfo = \
13012       self.rpc.call_all_instances_info(node_list,
13013                                        cluster_info.enabled_hypervisors)
13014
13015     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13016
13017     config_ndata = self._ComputeBasicNodeData(ninfo)
13018     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13019                                                  i_list, config_ndata)
13020     assert len(data["nodes"]) == len(ninfo), \
13021         "Incomplete node data computed"
13022
13023     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13024
13025     self.in_data = data
13026
13027   @staticmethod
13028   def _ComputeNodeGroupData(cfg):
13029     """Compute node groups data.
13030
13031     """
13032     ng = dict((guuid, {
13033       "name": gdata.name,
13034       "alloc_policy": gdata.alloc_policy,
13035       })
13036       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13037
13038     return ng
13039
13040   @staticmethod
13041   def _ComputeBasicNodeData(node_cfg):
13042     """Compute global node data.
13043
13044     @rtype: dict
13045     @returns: a dict of name: (node dict, node config)
13046
13047     """
13048     # fill in static (config-based) values
13049     node_results = dict((ninfo.name, {
13050       "tags": list(ninfo.GetTags()),
13051       "primary_ip": ninfo.primary_ip,
13052       "secondary_ip": ninfo.secondary_ip,
13053       "offline": ninfo.offline,
13054       "drained": ninfo.drained,
13055       "master_candidate": ninfo.master_candidate,
13056       "group": ninfo.group,
13057       "master_capable": ninfo.master_capable,
13058       "vm_capable": ninfo.vm_capable,
13059       })
13060       for ninfo in node_cfg.values())
13061
13062     return node_results
13063
13064   @staticmethod
13065   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13066                               node_results):
13067     """Compute global node data.
13068
13069     @param node_results: the basic node structures as filled from the config
13070
13071     """
13072     # make a copy of the current dict
13073     node_results = dict(node_results)
13074     for nname, nresult in node_data.items():
13075       assert nname in node_results, "Missing basic data for node %s" % nname
13076       ninfo = node_cfg[nname]
13077
13078       if not (ninfo.offline or ninfo.drained):
13079         nresult.Raise("Can't get data for node %s" % nname)
13080         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13081                                 nname)
13082         remote_info = nresult.payload
13083
13084         for attr in ["memory_total", "memory_free", "memory_dom0",
13085                      "vg_size", "vg_free", "cpu_total"]:
13086           if attr not in remote_info:
13087             raise errors.OpExecError("Node '%s' didn't return attribute"
13088                                      " '%s'" % (nname, attr))
13089           if not isinstance(remote_info[attr], int):
13090             raise errors.OpExecError("Node '%s' returned invalid value"
13091                                      " for '%s': %s" %
13092                                      (nname, attr, remote_info[attr]))
13093         # compute memory used by primary instances
13094         i_p_mem = i_p_up_mem = 0
13095         for iinfo, beinfo in i_list:
13096           if iinfo.primary_node == nname:
13097             i_p_mem += beinfo[constants.BE_MEMORY]
13098             if iinfo.name not in node_iinfo[nname].payload:
13099               i_used_mem = 0
13100             else:
13101               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13102             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13103             remote_info["memory_free"] -= max(0, i_mem_diff)
13104
13105             if iinfo.admin_up:
13106               i_p_up_mem += beinfo[constants.BE_MEMORY]
13107
13108         # compute memory used by instances
13109         pnr_dyn = {
13110           "total_memory": remote_info["memory_total"],
13111           "reserved_memory": remote_info["memory_dom0"],
13112           "free_memory": remote_info["memory_free"],
13113           "total_disk": remote_info["vg_size"],
13114           "free_disk": remote_info["vg_free"],
13115           "total_cpus": remote_info["cpu_total"],
13116           "i_pri_memory": i_p_mem,
13117           "i_pri_up_memory": i_p_up_mem,
13118           }
13119         pnr_dyn.update(node_results[nname])
13120         node_results[nname] = pnr_dyn
13121
13122     return node_results
13123
13124   @staticmethod
13125   def _ComputeInstanceData(cluster_info, i_list):
13126     """Compute global instance data.
13127
13128     """
13129     instance_data = {}
13130     for iinfo, beinfo in i_list:
13131       nic_data = []
13132       for nic in iinfo.nics:
13133         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13134         nic_dict = {
13135           "mac": nic.mac,
13136           "ip": nic.ip,
13137           "mode": filled_params[constants.NIC_MODE],
13138           "link": filled_params[constants.NIC_LINK],
13139           }
13140         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13141           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13142         nic_data.append(nic_dict)
13143       pir = {
13144         "tags": list(iinfo.GetTags()),
13145         "admin_up": iinfo.admin_up,
13146         "vcpus": beinfo[constants.BE_VCPUS],
13147         "memory": beinfo[constants.BE_MEMORY],
13148         "os": iinfo.os,
13149         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13150         "nics": nic_data,
13151         "disks": [{constants.IDISK_SIZE: dsk.size,
13152                    constants.IDISK_MODE: dsk.mode}
13153                   for dsk in iinfo.disks],
13154         "disk_template": iinfo.disk_template,
13155         "hypervisor": iinfo.hypervisor,
13156         }
13157       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13158                                                  pir["disks"])
13159       instance_data[iinfo.name] = pir
13160
13161     return instance_data
13162
13163   def _AddNewInstance(self):
13164     """Add new instance data to allocator structure.
13165
13166     This in combination with _AllocatorGetClusterData will create the
13167     correct structure needed as input for the allocator.
13168
13169     The checks for the completeness of the opcode must have already been
13170     done.
13171
13172     """
13173     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13174
13175     if self.disk_template in constants.DTS_INT_MIRROR:
13176       self.required_nodes = 2
13177     else:
13178       self.required_nodes = 1
13179
13180     request = {
13181       "name": self.name,
13182       "disk_template": self.disk_template,
13183       "tags": self.tags,
13184       "os": self.os,
13185       "vcpus": self.vcpus,
13186       "memory": self.memory,
13187       "disks": self.disks,
13188       "disk_space_total": disk_space,
13189       "nics": self.nics,
13190       "required_nodes": self.required_nodes,
13191       "hypervisor": self.hypervisor,
13192       }
13193
13194     return request
13195
13196   def _AddRelocateInstance(self):
13197     """Add relocate instance data to allocator structure.
13198
13199     This in combination with _IAllocatorGetClusterData will create the
13200     correct structure needed as input for the allocator.
13201
13202     The checks for the completeness of the opcode must have already been
13203     done.
13204
13205     """
13206     instance = self.cfg.GetInstanceInfo(self.name)
13207     if instance is None:
13208       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13209                                    " IAllocator" % self.name)
13210
13211     if instance.disk_template not in constants.DTS_MIRRORED:
13212       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13213                                  errors.ECODE_INVAL)
13214
13215     if instance.disk_template in constants.DTS_INT_MIRROR and \
13216         len(instance.secondary_nodes) != 1:
13217       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13218                                  errors.ECODE_STATE)
13219
13220     self.required_nodes = 1
13221     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13222     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13223
13224     request = {
13225       "name": self.name,
13226       "disk_space_total": disk_space,
13227       "required_nodes": self.required_nodes,
13228       "relocate_from": self.relocate_from,
13229       }
13230     return request
13231
13232   def _AddNodeEvacuate(self):
13233     """Get data for node-evacuate requests.
13234
13235     """
13236     return {
13237       "instances": self.instances,
13238       "evac_mode": self.evac_mode,
13239       }
13240
13241   def _AddChangeGroup(self):
13242     """Get data for node-evacuate requests.
13243
13244     """
13245     return {
13246       "instances": self.instances,
13247       "target_groups": self.target_groups,
13248       }
13249
13250   def _BuildInputData(self, fn, keydata):
13251     """Build input data structures.
13252
13253     """
13254     self._ComputeClusterData()
13255
13256     request = fn()
13257     request["type"] = self.mode
13258     for keyname, keytype in keydata:
13259       if keyname not in request:
13260         raise errors.ProgrammerError("Request parameter %s is missing" %
13261                                      keyname)
13262       val = request[keyname]
13263       if not keytype(val):
13264         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13265                                      " validation, value %s, expected"
13266                                      " type %s" % (keyname, val, keytype))
13267     self.in_data["request"] = request
13268
13269     self.in_text = serializer.Dump(self.in_data)
13270
13271   _STRING_LIST = ht.TListOf(ht.TString)
13272   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13273      # pylint: disable=E1101
13274      # Class '...' has no 'OP_ID' member
13275      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13276                           opcodes.OpInstanceMigrate.OP_ID,
13277                           opcodes.OpInstanceReplaceDisks.OP_ID])
13278      })))
13279
13280   _NEVAC_MOVED = \
13281     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13282                        ht.TItems([ht.TNonEmptyString,
13283                                   ht.TNonEmptyString,
13284                                   ht.TListOf(ht.TNonEmptyString),
13285                                  ])))
13286   _NEVAC_FAILED = \
13287     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13288                        ht.TItems([ht.TNonEmptyString,
13289                                   ht.TMaybeString,
13290                                  ])))
13291   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13292                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13293
13294   _MODE_DATA = {
13295     constants.IALLOCATOR_MODE_ALLOC:
13296       (_AddNewInstance,
13297        [
13298         ("name", ht.TString),
13299         ("memory", ht.TInt),
13300         ("disks", ht.TListOf(ht.TDict)),
13301         ("disk_template", ht.TString),
13302         ("os", ht.TString),
13303         ("tags", _STRING_LIST),
13304         ("nics", ht.TListOf(ht.TDict)),
13305         ("vcpus", ht.TInt),
13306         ("hypervisor", ht.TString),
13307         ], ht.TList),
13308     constants.IALLOCATOR_MODE_RELOC:
13309       (_AddRelocateInstance,
13310        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13311        ht.TList),
13312      constants.IALLOCATOR_MODE_NODE_EVAC:
13313       (_AddNodeEvacuate, [
13314         ("instances", _STRING_LIST),
13315         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13316         ], _NEVAC_RESULT),
13317      constants.IALLOCATOR_MODE_CHG_GROUP:
13318       (_AddChangeGroup, [
13319         ("instances", _STRING_LIST),
13320         ("target_groups", _STRING_LIST),
13321         ], _NEVAC_RESULT),
13322     }
13323
13324   def Run(self, name, validate=True, call_fn=None):
13325     """Run an instance allocator and return the results.
13326
13327     """
13328     if call_fn is None:
13329       call_fn = self.rpc.call_iallocator_runner
13330
13331     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13332     result.Raise("Failure while running the iallocator script")
13333
13334     self.out_text = result.payload
13335     if validate:
13336       self._ValidateResult()
13337
13338   def _ValidateResult(self):
13339     """Process the allocator results.
13340
13341     This will process and if successful save the result in
13342     self.out_data and the other parameters.
13343
13344     """
13345     try:
13346       rdict = serializer.Load(self.out_text)
13347     except Exception, err:
13348       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13349
13350     if not isinstance(rdict, dict):
13351       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13352
13353     # TODO: remove backwards compatiblity in later versions
13354     if "nodes" in rdict and "result" not in rdict:
13355       rdict["result"] = rdict["nodes"]
13356       del rdict["nodes"]
13357
13358     for key in "success", "info", "result":
13359       if key not in rdict:
13360         raise errors.OpExecError("Can't parse iallocator results:"
13361                                  " missing key '%s'" % key)
13362       setattr(self, key, rdict[key])
13363
13364     if not self._result_check(self.result):
13365       raise errors.OpExecError("Iallocator returned invalid result,"
13366                                " expected %s, got %s" %
13367                                (self._result_check, self.result),
13368                                errors.ECODE_INVAL)
13369
13370     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13371       assert self.relocate_from is not None
13372       assert self.required_nodes == 1
13373
13374       node2group = dict((name, ndata["group"])
13375                         for (name, ndata) in self.in_data["nodes"].items())
13376
13377       fn = compat.partial(self._NodesToGroups, node2group,
13378                           self.in_data["nodegroups"])
13379
13380       instance = self.cfg.GetInstanceInfo(self.name)
13381       request_groups = fn(self.relocate_from + [instance.primary_node])
13382       result_groups = fn(rdict["result"] + [instance.primary_node])
13383
13384       if self.success and not set(result_groups).issubset(request_groups):
13385         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13386                                  " differ from original groups (%s)" %
13387                                  (utils.CommaJoin(result_groups),
13388                                   utils.CommaJoin(request_groups)))
13389
13390     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13391       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13392
13393     self.out_data = rdict
13394
13395   @staticmethod
13396   def _NodesToGroups(node2group, groups, nodes):
13397     """Returns a list of unique group names for a list of nodes.
13398
13399     @type node2group: dict
13400     @param node2group: Map from node name to group UUID
13401     @type groups: dict
13402     @param groups: Group information
13403     @type nodes: list
13404     @param nodes: Node names
13405
13406     """
13407     result = set()
13408
13409     for node in nodes:
13410       try:
13411         group_uuid = node2group[node]
13412       except KeyError:
13413         # Ignore unknown node
13414         pass
13415       else:
13416         try:
13417           group = groups[group_uuid]
13418         except KeyError:
13419           # Can't find group, let's use UUID
13420           group_name = group_uuid
13421         else:
13422           group_name = group["name"]
13423
13424         result.add(group_name)
13425
13426     return sorted(result)
13427
13428
13429 class LUTestAllocator(NoHooksLU):
13430   """Run allocator tests.
13431
13432   This LU runs the allocator tests
13433
13434   """
13435   def CheckPrereq(self):
13436     """Check prerequisites.
13437
13438     This checks the opcode parameters depending on the director and mode test.
13439
13440     """
13441     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13442       for attr in ["memory", "disks", "disk_template",
13443                    "os", "tags", "nics", "vcpus"]:
13444         if not hasattr(self.op, attr):
13445           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13446                                      attr, errors.ECODE_INVAL)
13447       iname = self.cfg.ExpandInstanceName(self.op.name)
13448       if iname is not None:
13449         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13450                                    iname, errors.ECODE_EXISTS)
13451       if not isinstance(self.op.nics, list):
13452         raise errors.OpPrereqError("Invalid parameter 'nics'",
13453                                    errors.ECODE_INVAL)
13454       if not isinstance(self.op.disks, list):
13455         raise errors.OpPrereqError("Invalid parameter 'disks'",
13456                                    errors.ECODE_INVAL)
13457       for row in self.op.disks:
13458         if (not isinstance(row, dict) or
13459             constants.IDISK_SIZE not in row or
13460             not isinstance(row[constants.IDISK_SIZE], int) or
13461             constants.IDISK_MODE not in row or
13462             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13463           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13464                                      " parameter", errors.ECODE_INVAL)
13465       if self.op.hypervisor is None:
13466         self.op.hypervisor = self.cfg.GetHypervisorType()
13467     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13468       fname = _ExpandInstanceName(self.cfg, self.op.name)
13469       self.op.name = fname
13470       self.relocate_from = \
13471           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13472     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13473                           constants.IALLOCATOR_MODE_NODE_EVAC):
13474       if not self.op.instances:
13475         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13476       self.op.instances = _GetWantedInstances(self, self.op.instances)
13477     else:
13478       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13479                                  self.op.mode, errors.ECODE_INVAL)
13480
13481     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13482       if self.op.allocator is None:
13483         raise errors.OpPrereqError("Missing allocator name",
13484                                    errors.ECODE_INVAL)
13485     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13486       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13487                                  self.op.direction, errors.ECODE_INVAL)
13488
13489   def Exec(self, feedback_fn):
13490     """Run the allocator test.
13491
13492     """
13493     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13494       ial = IAllocator(self.cfg, self.rpc,
13495                        mode=self.op.mode,
13496                        name=self.op.name,
13497                        memory=self.op.memory,
13498                        disks=self.op.disks,
13499                        disk_template=self.op.disk_template,
13500                        os=self.op.os,
13501                        tags=self.op.tags,
13502                        nics=self.op.nics,
13503                        vcpus=self.op.vcpus,
13504                        hypervisor=self.op.hypervisor,
13505                        )
13506     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13507       ial = IAllocator(self.cfg, self.rpc,
13508                        mode=self.op.mode,
13509                        name=self.op.name,
13510                        relocate_from=list(self.relocate_from),
13511                        )
13512     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13513       ial = IAllocator(self.cfg, self.rpc,
13514                        mode=self.op.mode,
13515                        instances=self.op.instances,
13516                        target_groups=self.op.target_groups)
13517     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13518       ial = IAllocator(self.cfg, self.rpc,
13519                        mode=self.op.mode,
13520                        instances=self.op.instances,
13521                        evac_mode=self.op.evac_mode)
13522     else:
13523       raise errors.ProgrammerError("Uncatched mode %s in"
13524                                    " LUTestAllocator.Exec", self.op.mode)
13525
13526     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13527       result = ial.in_text
13528     else:
13529       ial.Run(self.op.allocator, validate=False)
13530       result = ial.out_text
13531     return result
13532
13533
13534 #: Query type implementations
13535 _QUERY_IMPL = {
13536   constants.QR_INSTANCE: _InstanceQuery,
13537   constants.QR_NODE: _NodeQuery,
13538   constants.QR_GROUP: _GroupQuery,
13539   constants.QR_OS: _OsQuery,
13540   }
13541
13542 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13543
13544
13545 def _GetQueryImplementation(name):
13546   """Returns the implemtnation for a query type.
13547
13548   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13549
13550   """
13551   try:
13552     return _QUERY_IMPL[name]
13553   except KeyError:
13554     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13555                                errors.ECODE_INVAL)