code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70
  71 class ResultWithJobs:
  72   """Data container for LU results with jobs.
  73
  74   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  75   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  76   contained in the C{jobs} attribute and include the job IDs in the opcode
  77   result.
  78
  79   """
  80   def __init__(self, jobs, **kwargs):
  81     """Initializes this class.
  82
  83     Additional return values can be specified as keyword arguments.
  84
  85     @type jobs: list of lists of L{opcode.OpCode}
  86     @param jobs: A list of lists of opcode objects
  87
  88     """
  89     self.jobs = jobs
  90     self.other = kwargs
  91
  92
  93 class LogicalUnit(object):
  94   """Logical Unit base class.
  95
  96   Subclasses must follow these rules:
  97     - implement ExpandNames
  98     - implement CheckPrereq (except when tasklets are used)
  99     - implement Exec (except when tasklets are used)
 100     - implement BuildHooksEnv
 101     - implement BuildHooksNodes
 102     - redefine HPATH and HTYPE
 103     - optionally redefine their run requirements:
 104         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 105
 106   Note that all commands require root permissions.
 107
 108   @ivar dry_run_result: the value (if any) that will be returned to the caller
 109       in dry-run mode (signalled by opcode dry_run parameter)
 110
 111   """
 112   HPATH = None
 113   HTYPE = None
 114   REQ_BGL = True
 115
 116   def __init__(self, processor, op, context, rpc_runner):
 117     """Constructor for LogicalUnit.
 118
 119     This needs to be overridden in derived classes in order to check op
 120     validity.
 121
 122     """
 123     self.proc = processor
 124     self.op = op
 125     self.cfg = context.cfg
 126     self.glm = context.glm
 127     # readability alias
 128     self.owned_locks = context.glm.list_owned
 129     self.context = context
 130     self.rpc = rpc_runner
 131     # Dicts used to declare locking needs to mcpu
 132     self.needed_locks = None
 133     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 134     self.add_locks = {}
 135     self.remove_locks = {}
 136     # Used to force good behavior when calling helper functions
 137     self.recalculate_locks = {}
 138     # logging
 139     self.Log = processor.Log # pylint: disable=C0103
 140     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 141     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 142     self.LogStep = processor.LogStep # pylint: disable=C0103
 143     # support for dry-run
 144     self.dry_run_result = None
 145     # support for generic debug attribute
 146     if (not hasattr(self.op, "debug_level") or
 147         not isinstance(self.op.debug_level, int)):
 148       self.op.debug_level = 0
 149
 150     # Tasklets
 151     self.tasklets = None
 152
 153     # Validate opcode parameters and set defaults
 154     self.op.Validate(True)
 155
 156     self.CheckArguments()
 157
 158   def CheckArguments(self):
 159     """Check syntactic validity for the opcode arguments.
 160
 161     This method is for doing a simple syntactic check and ensure
 162     validity of opcode parameters, without any cluster-related
 163     checks. While the same can be accomplished in ExpandNames and/or
 164     CheckPrereq, doing these separate is better because:
 165
 166       - ExpandNames is left as as purely a lock-related function
 167       - CheckPrereq is run after we have acquired locks (and possible
 168         waited for them)
 169
 170     The function is allowed to change the self.op attribute so that
 171     later methods can no longer worry about missing parameters.
 172
 173     """
 174     pass
 175
 176   def ExpandNames(self):
 177     """Expand names for this LU.
 178
 179     This method is called before starting to execute the opcode, and it should
 180     update all the parameters of the opcode to their canonical form (e.g. a
 181     short node name must be fully expanded after this method has successfully
 182     completed). This way locking, hooks, logging, etc. can work correctly.
 183
 184     LUs which implement this method must also populate the self.needed_locks
 185     member, as a dict with lock levels as keys, and a list of needed lock names
 186     as values. Rules:
 187
 188       - use an empty dict if you don't need any lock
 189       - if you don't need any lock at a particular level omit that level
 190       - don't put anything for the BGL level
 191       - if you want all locks at a level use locking.ALL_SET as a value
 192
 193     If you need to share locks (rather than acquire them exclusively) at one
 194     level you can modify self.share_locks, setting a true value (usually 1) for
 195     that level. By default locks are not shared.
 196
 197     This function can also define a list of tasklets, which then will be
 198     executed in order instead of the usual LU-level CheckPrereq and Exec
 199     functions, if those are not defined by the LU.
 200
 201     Examples::
 202
 203       # Acquire all nodes and one instance
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: locking.ALL_SET,
 206         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 207       }
 208       # Acquire just two nodes
 209       self.needed_locks = {
 210         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 211       }
 212       # Acquire no locks
 213       self.needed_locks = {} # No, you can't leave it to the default value None
 214
 215     """
 216     # The implementation of this method is mandatory only if the new LU is
 217     # concurrent, so that old LUs don't need to be changed all at the same
 218     # time.
 219     if self.REQ_BGL:
 220       self.needed_locks = {} # Exclusive LUs don't need locks.
 221     else:
 222       raise NotImplementedError
 223
 224   def DeclareLocks(self, level):
 225     """Declare LU locking needs for a level
 226
 227     While most LUs can just declare their locking needs at ExpandNames time,
 228     sometimes there's the need to calculate some locks after having acquired
 229     the ones before. This function is called just before acquiring locks at a
 230     particular level, but after acquiring the ones at lower levels, and permits
 231     such calculations. It can be used to modify self.needed_locks, and by
 232     default it does nothing.
 233
 234     This function is only called if you have something already set in
 235     self.needed_locks for the level.
 236
 237     @param level: Locking level which is going to be locked
 238     @type level: member of ganeti.locking.LEVELS
 239
 240     """
 241
 242   def CheckPrereq(self):
 243     """Check prerequisites for this LU.
 244
 245     This method should check that the prerequisites for the execution
 246     of this LU are fulfilled. It can do internode communication, but
 247     it should be idempotent - no cluster or system changes are
 248     allowed.
 249
 250     The method should raise errors.OpPrereqError in case something is
 251     not fulfilled. Its return value is ignored.
 252
 253     This method should also update all the parameters of the opcode to
 254     their canonical form if it hasn't been done by ExpandNames before.
 255
 256     """
 257     if self.tasklets is not None:
 258       for (idx, tl) in enumerate(self.tasklets):
 259         logging.debug("Checking prerequisites for tasklet %s/%s",
 260                       idx + 1, len(self.tasklets))
 261         tl.CheckPrereq()
 262     else:
 263       pass
 264
 265   def Exec(self, feedback_fn):
 266     """Execute the LU.
 267
 268     This method should implement the actual work. It should raise
 269     errors.OpExecError for failures that are somewhat dealt with in
 270     code, or expected.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 276         tl.Exec(feedback_fn)
 277     else:
 278       raise NotImplementedError
 279
 280   def BuildHooksEnv(self):
 281     """Build hooks environment for this LU.
 282
 283     @rtype: dict
 284     @return: Dictionary containing the environment that will be used for
 285       running the hooks for this LU. The keys of the dict must not be prefixed
 286       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 287       will extend the environment with additional variables. If no environment
 288       should be defined, an empty dictionary should be returned (not C{None}).
 289     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 290       will not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def BuildHooksNodes(self):
 296     """Build list of nodes to run LU's hooks.
 297
 298     @rtype: tuple; (list, list)
 299     @return: Tuple containing a list of node names on which the hook
 300       should run before the execution and a list of node names on which the
 301       hook should run after the execution. No nodes should be returned as an
 302       empty list (and not None).
 303     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 304       will not be called.
 305
 306     """
 307     raise NotImplementedError
 308
 309   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 310     """Notify the LU about the results of its hooks.
 311
 312     This method is called every time a hooks phase is executed, and notifies
 313     the Logical Unit about the hooks' result. The LU can then use it to alter
 314     its result based on the hooks.  By default the method does nothing and the
 315     previous result is passed back unchanged but any LU can define it if it
 316     wants to use the local cluster hook-scripts somehow.
 317
 318     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 319         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 320     @param hook_results: the results of the multi-node hooks rpc call
 321     @param feedback_fn: function used send feedback back to the caller
 322     @param lu_result: the previous Exec result this LU had, or None
 323         in the PRE phase
 324     @return: the new Exec result, based on the previous result
 325         and hook results
 326
 327     """
 328     # API must be kept, thus we ignore the unused argument and could
 329     # be a function warnings
 330     # pylint: disable=W0613,R0201
 331     return lu_result
 332
 333   def _ExpandAndLockInstance(self):
 334     """Helper function to expand and lock an instance.
 335
 336     Many LUs that work on an instance take its name in self.op.instance_name
 337     and need to expand it and then declare the expanded name for locking. This
 338     function does it, and then updates self.op.instance_name to the expanded
 339     name. It also initializes needed_locks as a dict, if this hasn't been done
 340     before.
 341
 342     """
 343     if self.needed_locks is None:
 344       self.needed_locks = {}
 345     else:
 346       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 347         "_ExpandAndLockInstance called with instance-level locks set"
 348     self.op.instance_name = _ExpandInstanceName(self.cfg,
 349                                                 self.op.instance_name)
 350     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 351
 352   def _LockInstancesNodes(self, primary_only=False,
 353                           level=locking.LEVEL_NODE):
 354     """Helper function to declare instances' nodes for locking.
 355
 356     This function should be called after locking one or more instances to lock
 357     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 358     with all primary or secondary nodes for instances already locked and
 359     present in self.needed_locks[locking.LEVEL_INSTANCE].
 360
 361     It should be called from DeclareLocks, and for safety only works if
 362     self.recalculate_locks[locking.LEVEL_NODE] is set.
 363
 364     In the future it may grow parameters to just lock some instance's nodes, or
 365     to just lock primaries or secondary nodes, if needed.
 366
 367     If should be called in DeclareLocks in a way similar to::
 368
 369       if level == locking.LEVEL_NODE:
 370         self._LockInstancesNodes()
 371
 372     @type primary_only: boolean
 373     @param primary_only: only lock primary nodes of locked instances
 374     @param level: Which lock level to use for locking nodes
 375
 376     """
 377     assert level in self.recalculate_locks, \
 378       "_LockInstancesNodes helper function called with no nodes to recalculate"
 379
 380     # TODO: check if we're really been called with the instance locks held
 381
 382     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 383     # future we might want to have different behaviors depending on the value
 384     # of self.recalculate_locks[locking.LEVEL_NODE]
 385     wanted_nodes = []
 386     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 387     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 388       wanted_nodes.append(instance.primary_node)
 389       if not primary_only:
 390         wanted_nodes.extend(instance.secondary_nodes)
 391
 392     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 393       self.needed_locks[level] = wanted_nodes
 394     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 395       self.needed_locks[level].extend(wanted_nodes)
 396     else:
 397       raise errors.ProgrammerError("Unknown recalculation mode")
 398
 399     del self.recalculate_locks[level]
 400
 401
 402 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 403   """Simple LU which runs no hooks.
 404
 405   This LU is intended as a parent for other LogicalUnits which will
 406   run no hooks, in order to reduce duplicate code.
 407
 408   """
 409   HPATH = None
 410   HTYPE = None
 411
 412   def BuildHooksEnv(self):
 413     """Empty BuildHooksEnv for NoHooksLu.
 414
 415     This just raises an error.
 416
 417     """
 418     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 419
 420   def BuildHooksNodes(self):
 421     """Empty BuildHooksNodes for NoHooksLU.
 422
 423     """
 424     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 425
 426
 427 class Tasklet:
 428   """Tasklet base class.
 429
 430   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 431   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 432   tasklets know nothing about locks.
 433
 434   Subclasses must follow these rules:
 435     - Implement CheckPrereq
 436     - Implement Exec
 437
 438   """
 439   def __init__(self, lu):
 440     self.lu = lu
 441
 442     # Shortcuts
 443     self.cfg = lu.cfg
 444     self.rpc = lu.rpc
 445
 446   def CheckPrereq(self):
 447     """Check prerequisites for this tasklets.
 448
 449     This method should check whether the prerequisites for the execution of
 450     this tasklet are fulfilled. It can do internode communication, but it
 451     should be idempotent - no cluster or system changes are allowed.
 452
 453     The method should raise errors.OpPrereqError in case something is not
 454     fulfilled. Its return value is ignored.
 455
 456     This method should also update all parameters to their canonical form if it
 457     hasn't been done before.
 458
 459     """
 460     pass
 461
 462   def Exec(self, feedback_fn):
 463     """Execute the tasklet.
 464
 465     This method should implement the actual work. It should raise
 466     errors.OpExecError for failures that are somewhat dealt with in code, or
 467     expected.
 468
 469     """
 470     raise NotImplementedError
 471
 472
 473 class _QueryBase:
 474   """Base for query utility classes.
 475
 476   """
 477   #: Attribute holding field definitions
 478   FIELDS = None
 479
 480   def __init__(self, qfilter, fields, use_locking):
 481     """Initializes this class.
 482
 483     """
 484     self.use_locking = use_locking
 485
 486     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 487                              namefield="name")
 488     self.requested_data = self.query.RequestedData()
 489     self.names = self.query.RequestedNames()
 490
 491     # Sort only if no names were requested
 492     self.sort_by_name = not self.names
 493
 494     self.do_locking = None
 495     self.wanted = None
 496
 497   def _GetNames(self, lu, all_names, lock_level):
 498     """Helper function to determine names asked for in the query.
 499
 500     """
 501     if self.do_locking:
 502       names = lu.owned_locks(lock_level)
 503     else:
 504       names = all_names
 505
 506     if self.wanted == locking.ALL_SET:
 507       assert not self.names
 508       # caller didn't specify names, so ordering is not important
 509       return utils.NiceSort(names)
 510
 511     # caller specified names and we must keep the same order
 512     assert self.names
 513     assert not self.do_locking or lu.glm.is_owned(lock_level)
 514
 515     missing = set(self.wanted).difference(names)
 516     if missing:
 517       raise errors.OpExecError("Some items were removed before retrieving"
 518                                " their data: %s" % missing)
 519
 520     # Return expanded names
 521     return self.wanted
 522
 523   def ExpandNames(self, lu):
 524     """Expand names for this query.
 525
 526     See L{LogicalUnit.ExpandNames}.
 527
 528     """
 529     raise NotImplementedError()
 530
 531   def DeclareLocks(self, lu, level):
 532     """Declare locks for this query.
 533
 534     See L{LogicalUnit.DeclareLocks}.
 535
 536     """
 537     raise NotImplementedError()
 538
 539   def _GetQueryData(self, lu):
 540     """Collects all data for this query.
 541
 542     @return: Query data object
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def NewStyleQuery(self, lu):
 548     """Collect data and execute query.
 549
 550     """
 551     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 552                                   sort_by_name=self.sort_by_name)
 553
 554   def OldStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return self.query.OldStyleQuery(self._GetQueryData(lu),
 559                                     sort_by_name=self.sort_by_name)
 560
 561
 562 def _ShareAll():
 563   """Returns a dict declaring all lock levels shared.
 564
 565   """
 566   return dict.fromkeys(locking.LEVELS, 1)
 567
 568
 569 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 570   """Checks if the owned node groups are still correct for an instance.
 571
 572   @type cfg: L{config.ConfigWriter}
 573   @param cfg: The cluster configuration
 574   @type instance_name: string
 575   @param instance_name: Instance name
 576   @type owned_groups: set or frozenset
 577   @param owned_groups: List of currently owned node groups
 578
 579   """
 580   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 581
 582   if not owned_groups.issuperset(inst_groups):
 583     raise errors.OpPrereqError("Instance %s's node groups changed since"
 584                                " locks were acquired, current groups are"
 585                                " are '%s', owning groups '%s'; retry the"
 586                                " operation" %
 587                                (instance_name,
 588                                 utils.CommaJoin(inst_groups),
 589                                 utils.CommaJoin(owned_groups)),
 590                                errors.ECODE_STATE)
 591
 592   return inst_groups
 593
 594
 595 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 596   """Checks if the instances in a node group are still correct.
 597
 598   @type cfg: L{config.ConfigWriter}
 599   @param cfg: The cluster configuration
 600   @type group_uuid: string
 601   @param group_uuid: Node group UUID
 602   @type owned_instances: set or frozenset
 603   @param owned_instances: List of currently owned instances
 604
 605   """
 606   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 607   if owned_instances != wanted_instances:
 608     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 609                                " locks were acquired, wanted '%s', have '%s';"
 610                                " retry the operation" %
 611                                (group_uuid,
 612                                 utils.CommaJoin(wanted_instances),
 613                                 utils.CommaJoin(owned_instances)),
 614                                errors.ECODE_STATE)
 615
 616   return wanted_instances
 617
 618
 619 def _SupportsOob(cfg, node):
 620   """Tells if node supports OOB.
 621
 622   @type cfg: L{config.ConfigWriter}
 623   @param cfg: The cluster configuration
 624   @type node: L{objects.Node}
 625   @param node: The node
 626   @return: The OOB script if supported or an empty string otherwise
 627
 628   """
 629   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 630
 631
 632 def _GetWantedNodes(lu, nodes):
 633   """Returns list of checked and expanded node names.
 634
 635   @type lu: L{LogicalUnit}
 636   @param lu: the logical unit on whose behalf we execute
 637   @type nodes: list
 638   @param nodes: list of node names or None for all nodes
 639   @rtype: list
 640   @return: the list of nodes, sorted
 641   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 642
 643   """
 644   if nodes:
 645     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 646
 647   return utils.NiceSort(lu.cfg.GetNodeList())
 648
 649
 650 def _GetWantedInstances(lu, instances):
 651   """Returns list of checked and expanded instance names.
 652
 653   @type lu: L{LogicalUnit}
 654   @param lu: the logical unit on whose behalf we execute
 655   @type instances: list
 656   @param instances: list of instance names or None for all instances
 657   @rtype: list
 658   @return: the list of instances, sorted
 659   @raise errors.OpPrereqError: if the instances parameter is wrong type
 660   @raise errors.OpPrereqError: if any of the passed instances is not found
 661
 662   """
 663   if instances:
 664     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 665   else:
 666     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 667   return wanted
 668
 669
 670 def _GetUpdatedParams(old_params, update_dict,
 671                       use_default=True, use_none=False):
 672   """Return the new version of a parameter dictionary.
 673
 674   @type old_params: dict
 675   @param old_params: old parameters
 676   @type update_dict: dict
 677   @param update_dict: dict containing new parameter values, or
 678       constants.VALUE_DEFAULT to reset the parameter to its default
 679       value
 680   @param use_default: boolean
 681   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 682       values as 'to be deleted' values
 683   @param use_none: boolean
 684   @type use_none: whether to recognise C{None} values as 'to be
 685       deleted' values
 686   @rtype: dict
 687   @return: the new parameter dictionary
 688
 689   """
 690   params_copy = copy.deepcopy(old_params)
 691   for key, val in update_dict.iteritems():
 692     if ((use_default and val == constants.VALUE_DEFAULT) or
 693         (use_none and val is None)):
 694       try:
 695         del params_copy[key]
 696       except KeyError:
 697         pass
 698     else:
 699       params_copy[key] = val
 700   return params_copy
 701
 702
 703 def _ReleaseLocks(lu, level, names=None, keep=None):
 704   """Releases locks owned by an LU.
 705
 706   @type lu: L{LogicalUnit}
 707   @param level: Lock level
 708   @type names: list or None
 709   @param names: Names of locks to release
 710   @type keep: list or None
 711   @param keep: Names of locks to retain
 712
 713   """
 714   assert not (keep is not None and names is not None), \
 715          "Only one of the 'names' and the 'keep' parameters can be given"
 716
 717   if names is not None:
 718     should_release = names.__contains__
 719   elif keep:
 720     should_release = lambda name: name not in keep
 721   else:
 722     should_release = None
 723
 724   if should_release:
 725     retain = []
 726     release = []
 727
 728     # Determine which locks to release
 729     for name in lu.owned_locks(level):
 730       if should_release(name):
 731         release.append(name)
 732       else:
 733         retain.append(name)
 734
 735     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 736
 737     # Release just some locks
 738     lu.glm.release(level, names=release)
 739
 740     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 741   else:
 742     # Release everything
 743     lu.glm.release(level)
 744
 745     assert not lu.glm.is_owned(level), "No locks should be owned"
 746
 747
 748 def _MapInstanceDisksToNodes(instances):
 749   """Creates a map from (node, volume) to instance name.
 750
 751   @type instances: list of L{objects.Instance}
 752   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 753
 754   """
 755   return dict(((node, vol), inst.name)
 756               for inst in instances
 757               for (node, vols) in inst.MapLVsByNode().items()
 758               for vol in vols)
 759
 760
 761 def _RunPostHook(lu, node_name):
 762   """Runs the post-hook for an opcode on a single node.
 763
 764   """
 765   hm = lu.proc.BuildHooksManager(lu)
 766   try:
 767     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 768   except:
 769     # pylint: disable=W0702
 770     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 771
 772
 773 def _CheckOutputFields(static, dynamic, selected):
 774   """Checks whether all selected fields are valid.
 775
 776   @type static: L{utils.FieldSet}
 777   @param static: static fields set
 778   @type dynamic: L{utils.FieldSet}
 779   @param dynamic: dynamic fields set
 780
 781   """
 782   f = utils.FieldSet()
 783   f.Extend(static)
 784   f.Extend(dynamic)
 785
 786   delta = f.NonMatching(selected)
 787   if delta:
 788     raise errors.OpPrereqError("Unknown output fields selected: %s"
 789                                % ",".join(delta), errors.ECODE_INVAL)
 790
 791
 792 def _CheckGlobalHvParams(params):
 793   """Validates that given hypervisor params are not global ones.
 794
 795   This will ensure that instances don't get customised versions of
 796   global params.
 797
 798   """
 799   used_globals = constants.HVC_GLOBALS.intersection(params)
 800   if used_globals:
 801     msg = ("The following hypervisor parameters are global and cannot"
 802            " be customized at instance level, please modify them at"
 803            " cluster level: %s" % utils.CommaJoin(used_globals))
 804     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 805
 806
 807 def _CheckNodeOnline(lu, node, msg=None):
 808   """Ensure that a given node is online.
 809
 810   @param lu: the LU on behalf of which we make the check
 811   @param node: the node to check
 812   @param msg: if passed, should be a message to replace the default one
 813   @raise errors.OpPrereqError: if the node is offline
 814
 815   """
 816   if msg is None:
 817     msg = "Can't use offline node"
 818   if lu.cfg.GetNodeInfo(node).offline:
 819     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 820
 821
 822 def _CheckNodeNotDrained(lu, node):
 823   """Ensure that a given node is not drained.
 824
 825   @param lu: the LU on behalf of which we make the check
 826   @param node: the node to check
 827   @raise errors.OpPrereqError: if the node is drained
 828
 829   """
 830   if lu.cfg.GetNodeInfo(node).drained:
 831     raise errors.OpPrereqError("Can't use drained node %s" % node,
 832                                errors.ECODE_STATE)
 833
 834
 835 def _CheckNodeVmCapable(lu, node):
 836   """Ensure that a given node is vm capable.
 837
 838   @param lu: the LU on behalf of which we make the check
 839   @param node: the node to check
 840   @raise errors.OpPrereqError: if the node is not vm capable
 841
 842   """
 843   if not lu.cfg.GetNodeInfo(node).vm_capable:
 844     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 845                                errors.ECODE_STATE)
 846
 847
 848 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 849   """Ensure that a node supports a given OS.
 850
 851   @param lu: the LU on behalf of which we make the check
 852   @param node: the node to check
 853   @param os_name: the OS to query about
 854   @param force_variant: whether to ignore variant errors
 855   @raise errors.OpPrereqError: if the node is not supporting the OS
 856
 857   """
 858   result = lu.rpc.call_os_get(node, os_name)
 859   result.Raise("OS '%s' not in supported OS list for node %s" %
 860                (os_name, node),
 861                prereq=True, ecode=errors.ECODE_INVAL)
 862   if not force_variant:
 863     _CheckOSVariant(result.payload, os_name)
 864
 865
 866 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 867   """Ensure that a node has the given secondary ip.
 868
 869   @type lu: L{LogicalUnit}
 870   @param lu: the LU on behalf of which we make the check
 871   @type node: string
 872   @param node: the node to check
 873   @type secondary_ip: string
 874   @param secondary_ip: the ip to check
 875   @type prereq: boolean
 876   @param prereq: whether to throw a prerequisite or an execute error
 877   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 878   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 879
 880   """
 881   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 882   result.Raise("Failure checking secondary ip on node %s" % node,
 883                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 884   if not result.payload:
 885     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 886            " please fix and re-run this command" % secondary_ip)
 887     if prereq:
 888       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 889     else:
 890       raise errors.OpExecError(msg)
 891
 892
 893 def _GetClusterDomainSecret():
 894   """Reads the cluster domain secret.
 895
 896   """
 897   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 898                                strict=True)
 899
 900
 901 def _CheckInstanceDown(lu, instance, reason):
 902   """Ensure that an instance is not running."""
 903   if instance.admin_up:
 904     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 905                                (instance.name, reason), errors.ECODE_STATE)
 906
 907   pnode = instance.primary_node
 908   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 909   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 910               prereq=True, ecode=errors.ECODE_ENVIRON)
 911
 912   if instance.name in ins_l.payload:
 913     raise errors.OpPrereqError("Instance %s is running, %s" %
 914                                (instance.name, reason), errors.ECODE_STATE)
 915
 916
 917 def _ExpandItemName(fn, name, kind):
 918   """Expand an item name.
 919
 920   @param fn: the function to use for expansion
 921   @param name: requested item name
 922   @param kind: text description ('Node' or 'Instance')
 923   @return: the resolved (full) name
 924   @raise errors.OpPrereqError: if the item is not found
 925
 926   """
 927   full_name = fn(name)
 928   if full_name is None:
 929     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 930                                errors.ECODE_NOENT)
 931   return full_name
 932
 933
 934 def _ExpandNodeName(cfg, name):
 935   """Wrapper over L{_ExpandItemName} for nodes."""
 936   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 937
 938
 939 def _ExpandInstanceName(cfg, name):
 940   """Wrapper over L{_ExpandItemName} for instance."""
 941   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 942
 943
 944 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 945                           memory, vcpus, nics, disk_template, disks,
 946                           bep, hvp, hypervisor_name, tags):
 947   """Builds instance related env variables for hooks
 948
 949   This builds the hook environment from individual variables.
 950
 951   @type name: string
 952   @param name: the name of the instance
 953   @type primary_node: string
 954   @param primary_node: the name of the instance's primary node
 955   @type secondary_nodes: list
 956   @param secondary_nodes: list of secondary nodes as strings
 957   @type os_type: string
 958   @param os_type: the name of the instance's OS
 959   @type status: boolean
 960   @param status: the should_run status of the instance
 961   @type memory: string
 962   @param memory: the memory size of the instance
 963   @type vcpus: string
 964   @param vcpus: the count of VCPUs the instance has
 965   @type nics: list
 966   @param nics: list of tuples (ip, mac, mode, link) representing
 967       the NICs the instance has
 968   @type disk_template: string
 969   @param disk_template: the disk template of the instance
 970   @type disks: list
 971   @param disks: the list of (size, mode) pairs
 972   @type bep: dict
 973   @param bep: the backend parameters for the instance
 974   @type hvp: dict
 975   @param hvp: the hypervisor parameters for the instance
 976   @type hypervisor_name: string
 977   @param hypervisor_name: the hypervisor for the instance
 978   @type tags: list
 979   @param tags: list of instance tags as strings
 980   @rtype: dict
 981   @return: the hook environment for this instance
 982
 983   """
 984   if status:
 985     str_status = "up"
 986   else:
 987     str_status = "down"
 988   env = {
 989     "OP_TARGET": name,
 990     "INSTANCE_NAME": name,
 991     "INSTANCE_PRIMARY": primary_node,
 992     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 993     "INSTANCE_OS_TYPE": os_type,
 994     "INSTANCE_STATUS": str_status,
 995     "INSTANCE_MEMORY": memory,
 996     "INSTANCE_VCPUS": vcpus,
 997     "INSTANCE_DISK_TEMPLATE": disk_template,
 998     "INSTANCE_HYPERVISOR": hypervisor_name,
 999   }
1000
1001   if nics:
1002     nic_count = len(nics)
1003     for idx, (ip, mac, mode, link) in enumerate(nics):
1004       if ip is None:
1005         ip = ""
1006       env["INSTANCE_NIC%d_IP" % idx] = ip
1007       env["INSTANCE_NIC%d_MAC" % idx] = mac
1008       env["INSTANCE_NIC%d_MODE" % idx] = mode
1009       env["INSTANCE_NIC%d_LINK" % idx] = link
1010       if mode == constants.NIC_MODE_BRIDGED:
1011         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1012   else:
1013     nic_count = 0
1014
1015   env["INSTANCE_NIC_COUNT"] = nic_count
1016
1017   if disks:
1018     disk_count = len(disks)
1019     for idx, (size, mode) in enumerate(disks):
1020       env["INSTANCE_DISK%d_SIZE" % idx] = size
1021       env["INSTANCE_DISK%d_MODE" % idx] = mode
1022   else:
1023     disk_count = 0
1024
1025   env["INSTANCE_DISK_COUNT"] = disk_count
1026
1027   if not tags:
1028     tags = []
1029
1030   env["INSTANCE_TAGS"] = " ".join(tags)
1031
1032   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1033     for key, value in source.items():
1034       env["INSTANCE_%s_%s" % (kind, key)] = value
1035
1036   return env
1037
1038
1039 def _NICListToTuple(lu, nics):
1040   """Build a list of nic information tuples.
1041
1042   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1043   value in LUInstanceQueryData.
1044
1045   @type lu:  L{LogicalUnit}
1046   @param lu: the logical unit on whose behalf we execute
1047   @type nics: list of L{objects.NIC}
1048   @param nics: list of nics to convert to hooks tuples
1049
1050   """
1051   hooks_nics = []
1052   cluster = lu.cfg.GetClusterInfo()
1053   for nic in nics:
1054     ip = nic.ip
1055     mac = nic.mac
1056     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1057     mode = filled_params[constants.NIC_MODE]
1058     link = filled_params[constants.NIC_LINK]
1059     hooks_nics.append((ip, mac, mode, link))
1060   return hooks_nics
1061
1062
1063 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1064   """Builds instance related env variables for hooks from an object.
1065
1066   @type lu: L{LogicalUnit}
1067   @param lu: the logical unit on whose behalf we execute
1068   @type instance: L{objects.Instance}
1069   @param instance: the instance for which we should build the
1070       environment
1071   @type override: dict
1072   @param override: dictionary with key/values that will override
1073       our values
1074   @rtype: dict
1075   @return: the hook environment dictionary
1076
1077   """
1078   cluster = lu.cfg.GetClusterInfo()
1079   bep = cluster.FillBE(instance)
1080   hvp = cluster.FillHV(instance)
1081   args = {
1082     "name": instance.name,
1083     "primary_node": instance.primary_node,
1084     "secondary_nodes": instance.secondary_nodes,
1085     "os_type": instance.os,
1086     "status": instance.admin_up,
1087     "memory": bep[constants.BE_MEMORY],
1088     "vcpus": bep[constants.BE_VCPUS],
1089     "nics": _NICListToTuple(lu, instance.nics),
1090     "disk_template": instance.disk_template,
1091     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1092     "bep": bep,
1093     "hvp": hvp,
1094     "hypervisor_name": instance.hypervisor,
1095     "tags": instance.tags,
1096   }
1097   if override:
1098     args.update(override)
1099   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1100
1101
1102 def _AdjustCandidatePool(lu, exceptions):
1103   """Adjust the candidate pool after node operations.
1104
1105   """
1106   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1107   if mod_list:
1108     lu.LogInfo("Promoted nodes to master candidate role: %s",
1109                utils.CommaJoin(node.name for node in mod_list))
1110     for name in mod_list:
1111       lu.context.ReaddNode(name)
1112   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1113   if mc_now > mc_max:
1114     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1115                (mc_now, mc_max))
1116
1117
1118 def _DecideSelfPromotion(lu, exceptions=None):
1119   """Decide whether I should promote myself as a master candidate.
1120
1121   """
1122   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1123   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1124   # the new node will increase mc_max with one, so:
1125   mc_should = min(mc_should + 1, cp_size)
1126   return mc_now < mc_should
1127
1128
1129 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1130   """Check that the brigdes needed by a list of nics exist.
1131
1132   """
1133   cluster = lu.cfg.GetClusterInfo()
1134   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1135   brlist = [params[constants.NIC_LINK] for params in paramslist
1136             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1137   if brlist:
1138     result = lu.rpc.call_bridges_exist(target_node, brlist)
1139     result.Raise("Error checking bridges on destination node '%s'" %
1140                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1141
1142
1143 def _CheckInstanceBridgesExist(lu, instance, node=None):
1144   """Check that the brigdes needed by an instance exist.
1145
1146   """
1147   if node is None:
1148     node = instance.primary_node
1149   _CheckNicsBridgesExist(lu, instance.nics, node)
1150
1151
1152 def _CheckOSVariant(os_obj, name):
1153   """Check whether an OS name conforms to the os variants specification.
1154
1155   @type os_obj: L{objects.OS}
1156   @param os_obj: OS object to check
1157   @type name: string
1158   @param name: OS name passed by the user, to check for validity
1159
1160   """
1161   variant = objects.OS.GetVariant(name)
1162   if not os_obj.supported_variants:
1163     if variant:
1164       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1165                                  " passed)" % (os_obj.name, variant),
1166                                  errors.ECODE_INVAL)
1167     return
1168   if not variant:
1169     raise errors.OpPrereqError("OS name must include a variant",
1170                                errors.ECODE_INVAL)
1171
1172   if variant not in os_obj.supported_variants:
1173     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1174
1175
1176 def _GetNodeInstancesInner(cfg, fn):
1177   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1178
1179
1180 def _GetNodeInstances(cfg, node_name):
1181   """Returns a list of all primary and secondary instances on a node.
1182
1183   """
1184
1185   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1186
1187
1188 def _GetNodePrimaryInstances(cfg, node_name):
1189   """Returns primary instances on a node.
1190
1191   """
1192   return _GetNodeInstancesInner(cfg,
1193                                 lambda inst: node_name == inst.primary_node)
1194
1195
1196 def _GetNodeSecondaryInstances(cfg, node_name):
1197   """Returns secondary instances on a node.
1198
1199   """
1200   return _GetNodeInstancesInner(cfg,
1201                                 lambda inst: node_name in inst.secondary_nodes)
1202
1203
1204 def _GetStorageTypeArgs(cfg, storage_type):
1205   """Returns the arguments for a storage type.
1206
1207   """
1208   # Special case for file storage
1209   if storage_type == constants.ST_FILE:
1210     # storage.FileStorage wants a list of storage directories
1211     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1212
1213   return []
1214
1215
1216 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1217   faulty = []
1218
1219   for dev in instance.disks:
1220     cfg.SetDiskID(dev, node_name)
1221
1222   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1223   result.Raise("Failed to get disk status from node %s" % node_name,
1224                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1225
1226   for idx, bdev_status in enumerate(result.payload):
1227     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1228       faulty.append(idx)
1229
1230   return faulty
1231
1232
1233 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1234   """Check the sanity of iallocator and node arguments and use the
1235   cluster-wide iallocator if appropriate.
1236
1237   Check that at most one of (iallocator, node) is specified. If none is
1238   specified, then the LU's opcode's iallocator slot is filled with the
1239   cluster-wide default iallocator.
1240
1241   @type iallocator_slot: string
1242   @param iallocator_slot: the name of the opcode iallocator slot
1243   @type node_slot: string
1244   @param node_slot: the name of the opcode target node slot
1245
1246   """
1247   node = getattr(lu.op, node_slot, None)
1248   iallocator = getattr(lu.op, iallocator_slot, None)
1249
1250   if node is not None and iallocator is not None:
1251     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1252                                errors.ECODE_INVAL)
1253   elif node is None and iallocator is None:
1254     default_iallocator = lu.cfg.GetDefaultIAllocator()
1255     if default_iallocator:
1256       setattr(lu.op, iallocator_slot, default_iallocator)
1257     else:
1258       raise errors.OpPrereqError("No iallocator or node given and no"
1259                                  " cluster-wide default iallocator found;"
1260                                  " please specify either an iallocator or a"
1261                                  " node, or set a cluster-wide default"
1262                                  " iallocator")
1263
1264
1265 def _GetDefaultIAllocator(cfg, iallocator):
1266   """Decides on which iallocator to use.
1267
1268   @type cfg: L{config.ConfigWriter}
1269   @param cfg: Cluster configuration object
1270   @type iallocator: string or None
1271   @param iallocator: Iallocator specified in opcode
1272   @rtype: string
1273   @return: Iallocator name
1274
1275   """
1276   if not iallocator:
1277     # Use default iallocator
1278     iallocator = cfg.GetDefaultIAllocator()
1279
1280   if not iallocator:
1281     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1282                                " opcode nor as a cluster-wide default",
1283                                errors.ECODE_INVAL)
1284
1285   return iallocator
1286
1287
1288 class LUClusterPostInit(LogicalUnit):
1289   """Logical unit for running hooks after cluster initialization.
1290
1291   """
1292   HPATH = "cluster-init"
1293   HTYPE = constants.HTYPE_CLUSTER
1294
1295   def BuildHooksEnv(self):
1296     """Build hooks env.
1297
1298     """
1299     return {
1300       "OP_TARGET": self.cfg.GetClusterName(),
1301       }
1302
1303   def BuildHooksNodes(self):
1304     """Build hooks nodes.
1305
1306     """
1307     return ([], [self.cfg.GetMasterNode()])
1308
1309   def Exec(self, feedback_fn):
1310     """Nothing to do.
1311
1312     """
1313     return True
1314
1315
1316 class LUClusterDestroy(LogicalUnit):
1317   """Logical unit for destroying the cluster.
1318
1319   """
1320   HPATH = "cluster-destroy"
1321   HTYPE = constants.HTYPE_CLUSTER
1322
1323   def BuildHooksEnv(self):
1324     """Build hooks env.
1325
1326     """
1327     return {
1328       "OP_TARGET": self.cfg.GetClusterName(),
1329       }
1330
1331   def BuildHooksNodes(self):
1332     """Build hooks nodes.
1333
1334     """
1335     return ([], [])
1336
1337   def CheckPrereq(self):
1338     """Check prerequisites.
1339
1340     This checks whether the cluster is empty.
1341
1342     Any errors are signaled by raising errors.OpPrereqError.
1343
1344     """
1345     master = self.cfg.GetMasterNode()
1346
1347     nodelist = self.cfg.GetNodeList()
1348     if len(nodelist) != 1 or nodelist[0] != master:
1349       raise errors.OpPrereqError("There are still %d node(s) in"
1350                                  " this cluster." % (len(nodelist) - 1),
1351                                  errors.ECODE_INVAL)
1352     instancelist = self.cfg.GetInstanceList()
1353     if instancelist:
1354       raise errors.OpPrereqError("There are still %d instance(s) in"
1355                                  " this cluster." % len(instancelist),
1356                                  errors.ECODE_INVAL)
1357
1358   def Exec(self, feedback_fn):
1359     """Destroys the cluster.
1360
1361     """
1362     master_params = self.cfg.GetMasterNetworkParameters()
1363
1364     # Run post hooks on master node before it's removed
1365     _RunPostHook(self, master_params.name)
1366
1367     ems = self.cfg.GetUseExternalMipScript()
1368     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1369                                                      master_params, ems)
1370     result.Raise("Could not disable the master role")
1371
1372     return master_params.name
1373
1374
1375 def _VerifyCertificate(filename):
1376   """Verifies a certificate for L{LUClusterVerifyConfig}.
1377
1378   @type filename: string
1379   @param filename: Path to PEM file
1380
1381   """
1382   try:
1383     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1384                                            utils.ReadFile(filename))
1385   except Exception, err: # pylint: disable=W0703
1386     return (LUClusterVerifyConfig.ETYPE_ERROR,
1387             "Failed to load X509 certificate %s: %s" % (filename, err))
1388
1389   (errcode, msg) = \
1390     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1391                                 constants.SSL_CERT_EXPIRATION_ERROR)
1392
1393   if msg:
1394     fnamemsg = "While verifying %s: %s" % (filename, msg)
1395   else:
1396     fnamemsg = None
1397
1398   if errcode is None:
1399     return (None, fnamemsg)
1400   elif errcode == utils.CERT_WARNING:
1401     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1402   elif errcode == utils.CERT_ERROR:
1403     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1404
1405   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1406
1407
1408 def _GetAllHypervisorParameters(cluster, instances):
1409   """Compute the set of all hypervisor parameters.
1410
1411   @type cluster: L{objects.Cluster}
1412   @param cluster: the cluster object
1413   @param instances: list of L{objects.Instance}
1414   @param instances: additional instances from which to obtain parameters
1415   @rtype: list of (origin, hypervisor, parameters)
1416   @return: a list with all parameters found, indicating the hypervisor they
1417        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1418
1419   """
1420   hvp_data = []
1421
1422   for hv_name in cluster.enabled_hypervisors:
1423     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1424
1425   for os_name, os_hvp in cluster.os_hvp.items():
1426     for hv_name, hv_params in os_hvp.items():
1427       if hv_params:
1428         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1429         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1430
1431   # TODO: collapse identical parameter values in a single one
1432   for instance in instances:
1433     if instance.hvparams:
1434       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1435                        cluster.FillHV(instance)))
1436
1437   return hvp_data
1438
1439
1440 class _VerifyErrors(object):
1441   """Mix-in for cluster/group verify LUs.
1442
1443   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1444   self.op and self._feedback_fn to be available.)
1445
1446   """
1447
1448   ETYPE_FIELD = "code"
1449   ETYPE_ERROR = "ERROR"
1450   ETYPE_WARNING = "WARNING"
1451
1452   def _Error(self, ecode, item, msg, *args, **kwargs):
1453     """Format an error message.
1454
1455     Based on the opcode's error_codes parameter, either format a
1456     parseable error code, or a simpler error string.
1457
1458     This must be called only from Exec and functions called from Exec.
1459
1460     """
1461     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1462     itype, etxt, _ = ecode
1463     # first complete the msg
1464     if args:
1465       msg = msg % args
1466     # then format the whole message
1467     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1468       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1469     else:
1470       if item:
1471         item = " " + item
1472       else:
1473         item = ""
1474       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1475     # and finally report it via the feedback_fn
1476     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1477
1478   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1479     """Log an error message if the passed condition is True.
1480
1481     """
1482     cond = (bool(cond)
1483             or self.op.debug_simulate_errors) # pylint: disable=E1101
1484
1485     # If the error code is in the list of ignored errors, demote the error to a
1486     # warning
1487     (_, etxt, _) = ecode
1488     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1489       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1490
1491     if cond:
1492       self._Error(ecode, *args, **kwargs)
1493
1494     # do not mark the operation as failed for WARN cases only
1495     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1496       self.bad = self.bad or cond
1497
1498
1499 class LUClusterVerify(NoHooksLU):
1500   """Submits all jobs necessary to verify the cluster.
1501
1502   """
1503   REQ_BGL = False
1504
1505   def ExpandNames(self):
1506     self.needed_locks = {}
1507
1508   def Exec(self, feedback_fn):
1509     jobs = []
1510
1511     if self.op.group_name:
1512       groups = [self.op.group_name]
1513       depends_fn = lambda: None
1514     else:
1515       groups = self.cfg.GetNodeGroupList()
1516
1517       # Verify global configuration
1518       jobs.append([
1519         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1520         ])
1521
1522       # Always depend on global verification
1523       depends_fn = lambda: [(-len(jobs), [])]
1524
1525     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1526                                             ignore_errors=self.op.ignore_errors,
1527                                             depends=depends_fn())]
1528                 for group in groups)
1529
1530     # Fix up all parameters
1531     for op in itertools.chain(*jobs): # pylint: disable=W0142
1532       op.debug_simulate_errors = self.op.debug_simulate_errors
1533       op.verbose = self.op.verbose
1534       op.error_codes = self.op.error_codes
1535       try:
1536         op.skip_checks = self.op.skip_checks
1537       except AttributeError:
1538         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1539
1540     return ResultWithJobs(jobs)
1541
1542
1543 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1544   """Verifies the cluster config.
1545
1546   """
1547   REQ_BGL = True
1548
1549   def _VerifyHVP(self, hvp_data):
1550     """Verifies locally the syntax of the hypervisor parameters.
1551
1552     """
1553     for item, hv_name, hv_params in hvp_data:
1554       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1555              (item, hv_name))
1556       try:
1557         hv_class = hypervisor.GetHypervisor(hv_name)
1558         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1559         hv_class.CheckParameterSyntax(hv_params)
1560       except errors.GenericError, err:
1561         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1562
1563   def ExpandNames(self):
1564     # Information can be safely retrieved as the BGL is acquired in exclusive
1565     # mode
1566     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1567     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1568     self.all_node_info = self.cfg.GetAllNodesInfo()
1569     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1570     self.needed_locks = {}
1571
1572   def Exec(self, feedback_fn):
1573     """Verify integrity of cluster, performing various test on nodes.
1574
1575     """
1576     self.bad = False
1577     self._feedback_fn = feedback_fn
1578
1579     feedback_fn("* Verifying cluster config")
1580
1581     for msg in self.cfg.VerifyConfig():
1582       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1583
1584     feedback_fn("* Verifying cluster certificate files")
1585
1586     for cert_filename in constants.ALL_CERT_FILES:
1587       (errcode, msg) = _VerifyCertificate(cert_filename)
1588       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1589
1590     feedback_fn("* Verifying hypervisor parameters")
1591
1592     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1593                                                 self.all_inst_info.values()))
1594
1595     feedback_fn("* Verifying all nodes belong to an existing group")
1596
1597     # We do this verification here because, should this bogus circumstance
1598     # occur, it would never be caught by VerifyGroup, which only acts on
1599     # nodes/instances reachable from existing node groups.
1600
1601     dangling_nodes = set(node.name for node in self.all_node_info.values()
1602                          if node.group not in self.all_group_info)
1603
1604     dangling_instances = {}
1605     no_node_instances = []
1606
1607     for inst in self.all_inst_info.values():
1608       if inst.primary_node in dangling_nodes:
1609         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1610       elif inst.primary_node not in self.all_node_info:
1611         no_node_instances.append(inst.name)
1612
1613     pretty_dangling = [
1614         "%s (%s)" %
1615         (node.name,
1616          utils.CommaJoin(dangling_instances.get(node.name,
1617                                                 ["no instances"])))
1618         for node in dangling_nodes]
1619
1620     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1621                   None,
1622                   "the following nodes (and their instances) belong to a non"
1623                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1624
1625     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1626                   None,
1627                   "the following instances have a non-existing primary-node:"
1628                   " %s", utils.CommaJoin(no_node_instances))
1629
1630     return not self.bad
1631
1632
1633 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1634   """Verifies the status of a node group.
1635
1636   """
1637   HPATH = "cluster-verify"
1638   HTYPE = constants.HTYPE_CLUSTER
1639   REQ_BGL = False
1640
1641   _HOOKS_INDENT_RE = re.compile("^", re.M)
1642
1643   class NodeImage(object):
1644     """A class representing the logical and physical status of a node.
1645
1646     @type name: string
1647     @ivar name: the node name to which this object refers
1648     @ivar volumes: a structure as returned from
1649         L{ganeti.backend.GetVolumeList} (runtime)
1650     @ivar instances: a list of running instances (runtime)
1651     @ivar pinst: list of configured primary instances (config)
1652     @ivar sinst: list of configured secondary instances (config)
1653     @ivar sbp: dictionary of {primary-node: list of instances} for all
1654         instances for which this node is secondary (config)
1655     @ivar mfree: free memory, as reported by hypervisor (runtime)
1656     @ivar dfree: free disk, as reported by the node (runtime)
1657     @ivar offline: the offline status (config)
1658     @type rpc_fail: boolean
1659     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1660         not whether the individual keys were correct) (runtime)
1661     @type lvm_fail: boolean
1662     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1663     @type hyp_fail: boolean
1664     @ivar hyp_fail: whether the RPC call didn't return the instance list
1665     @type ghost: boolean
1666     @ivar ghost: whether this is a known node or not (config)
1667     @type os_fail: boolean
1668     @ivar os_fail: whether the RPC call didn't return valid OS data
1669     @type oslist: list
1670     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1671     @type vm_capable: boolean
1672     @ivar vm_capable: whether the node can host instances
1673
1674     """
1675     def __init__(self, offline=False, name=None, vm_capable=True):
1676       self.name = name
1677       self.volumes = {}
1678       self.instances = []
1679       self.pinst = []
1680       self.sinst = []
1681       self.sbp = {}
1682       self.mfree = 0
1683       self.dfree = 0
1684       self.offline = offline
1685       self.vm_capable = vm_capable
1686       self.rpc_fail = False
1687       self.lvm_fail = False
1688       self.hyp_fail = False
1689       self.ghost = False
1690       self.os_fail = False
1691       self.oslist = {}
1692
1693   def ExpandNames(self):
1694     # This raises errors.OpPrereqError on its own:
1695     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1696
1697     # Get instances in node group; this is unsafe and needs verification later
1698     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1699
1700     self.needed_locks = {
1701       locking.LEVEL_INSTANCE: inst_names,
1702       locking.LEVEL_NODEGROUP: [self.group_uuid],
1703       locking.LEVEL_NODE: [],
1704       }
1705
1706     self.share_locks = _ShareAll()
1707
1708   def DeclareLocks(self, level):
1709     if level == locking.LEVEL_NODE:
1710       # Get members of node group; this is unsafe and needs verification later
1711       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1712
1713       all_inst_info = self.cfg.GetAllInstancesInfo()
1714
1715       # In Exec(), we warn about mirrored instances that have primary and
1716       # secondary living in separate node groups. To fully verify that
1717       # volumes for these instances are healthy, we will need to do an
1718       # extra call to their secondaries. We ensure here those nodes will
1719       # be locked.
1720       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1721         # Important: access only the instances whose lock is owned
1722         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1723           nodes.update(all_inst_info[inst].secondary_nodes)
1724
1725       self.needed_locks[locking.LEVEL_NODE] = nodes
1726
1727   def CheckPrereq(self):
1728     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1729     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1730
1731     group_nodes = set(self.group_info.members)
1732     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1733
1734     unlocked_nodes = \
1735         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1736
1737     unlocked_instances = \
1738         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1739
1740     if unlocked_nodes:
1741       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1742                                  utils.CommaJoin(unlocked_nodes))
1743
1744     if unlocked_instances:
1745       raise errors.OpPrereqError("Missing lock for instances: %s" %
1746                                  utils.CommaJoin(unlocked_instances))
1747
1748     self.all_node_info = self.cfg.GetAllNodesInfo()
1749     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1750
1751     self.my_node_names = utils.NiceSort(group_nodes)
1752     self.my_inst_names = utils.NiceSort(group_instances)
1753
1754     self.my_node_info = dict((name, self.all_node_info[name])
1755                              for name in self.my_node_names)
1756
1757     self.my_inst_info = dict((name, self.all_inst_info[name])
1758                              for name in self.my_inst_names)
1759
1760     # We detect here the nodes that will need the extra RPC calls for verifying
1761     # split LV volumes; they should be locked.
1762     extra_lv_nodes = set()
1763
1764     for inst in self.my_inst_info.values():
1765       if inst.disk_template in constants.DTS_INT_MIRROR:
1766         group = self.my_node_info[inst.primary_node].group
1767         for nname in inst.secondary_nodes:
1768           if self.all_node_info[nname].group != group:
1769             extra_lv_nodes.add(nname)
1770
1771     unlocked_lv_nodes = \
1772         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1773
1774     if unlocked_lv_nodes:
1775       raise errors.OpPrereqError("these nodes could be locked: %s" %
1776                                  utils.CommaJoin(unlocked_lv_nodes))
1777     self.extra_lv_nodes = list(extra_lv_nodes)
1778
1779   def _VerifyNode(self, ninfo, nresult):
1780     """Perform some basic validation on data returned from a node.
1781
1782       - check the result data structure is well formed and has all the
1783         mandatory fields
1784       - check ganeti version
1785
1786     @type ninfo: L{objects.Node}
1787     @param ninfo: the node to check
1788     @param nresult: the results from the node
1789     @rtype: boolean
1790     @return: whether overall this call was successful (and we can expect
1791          reasonable values in the respose)
1792
1793     """
1794     node = ninfo.name
1795     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1796
1797     # main result, nresult should be a non-empty dict
1798     test = not nresult or not isinstance(nresult, dict)
1799     _ErrorIf(test, constants.CV_ENODERPC, node,
1800                   "unable to verify node: no data returned")
1801     if test:
1802       return False
1803
1804     # compares ganeti version
1805     local_version = constants.PROTOCOL_VERSION
1806     remote_version = nresult.get("version", None)
1807     test = not (remote_version and
1808                 isinstance(remote_version, (list, tuple)) and
1809                 len(remote_version) == 2)
1810     _ErrorIf(test, constants.CV_ENODERPC, node,
1811              "connection to node returned invalid data")
1812     if test:
1813       return False
1814
1815     test = local_version != remote_version[0]
1816     _ErrorIf(test, constants.CV_ENODEVERSION, node,
1817              "incompatible protocol versions: master %s,"
1818              " node %s", local_version, remote_version[0])
1819     if test:
1820       return False
1821
1822     # node seems compatible, we can actually try to look into its results
1823
1824     # full package version
1825     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1826                   constants.CV_ENODEVERSION, node,
1827                   "software version mismatch: master %s, node %s",
1828                   constants.RELEASE_VERSION, remote_version[1],
1829                   code=self.ETYPE_WARNING)
1830
1831     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1832     if ninfo.vm_capable and isinstance(hyp_result, dict):
1833       for hv_name, hv_result in hyp_result.iteritems():
1834         test = hv_result is not None
1835         _ErrorIf(test, constants.CV_ENODEHV, node,
1836                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1837
1838     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1839     if ninfo.vm_capable and isinstance(hvp_result, list):
1840       for item, hv_name, hv_result in hvp_result:
1841         _ErrorIf(True, constants.CV_ENODEHV, node,
1842                  "hypervisor %s parameter verify failure (source %s): %s",
1843                  hv_name, item, hv_result)
1844
1845     test = nresult.get(constants.NV_NODESETUP,
1846                        ["Missing NODESETUP results"])
1847     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1848              "; ".join(test))
1849
1850     return True
1851
1852   def _VerifyNodeTime(self, ninfo, nresult,
1853                       nvinfo_starttime, nvinfo_endtime):
1854     """Check the node time.
1855
1856     @type ninfo: L{objects.Node}
1857     @param ninfo: the node to check
1858     @param nresult: the remote results for the node
1859     @param nvinfo_starttime: the start time of the RPC call
1860     @param nvinfo_endtime: the end time of the RPC call
1861
1862     """
1863     node = ninfo.name
1864     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1865
1866     ntime = nresult.get(constants.NV_TIME, None)
1867     try:
1868       ntime_merged = utils.MergeTime(ntime)
1869     except (ValueError, TypeError):
1870       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1871       return
1872
1873     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1874       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1875     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1876       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1877     else:
1878       ntime_diff = None
1879
1880     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1881              "Node time diverges by at least %s from master node time",
1882              ntime_diff)
1883
1884   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1885     """Check the node LVM results.
1886
1887     @type ninfo: L{objects.Node}
1888     @param ninfo: the node to check
1889     @param nresult: the remote results for the node
1890     @param vg_name: the configured VG name
1891
1892     """
1893     if vg_name is None:
1894       return
1895
1896     node = ninfo.name
1897     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1898
1899     # checks vg existence and size > 20G
1900     vglist = nresult.get(constants.NV_VGLIST, None)
1901     test = not vglist
1902     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1903     if not test:
1904       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1905                                             constants.MIN_VG_SIZE)
1906       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1907
1908     # check pv names
1909     pvlist = nresult.get(constants.NV_PVLIST, None)
1910     test = pvlist is None
1911     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1912     if not test:
1913       # check that ':' is not present in PV names, since it's a
1914       # special character for lvcreate (denotes the range of PEs to
1915       # use on the PV)
1916       for _, pvname, owner_vg in pvlist:
1917         test = ":" in pvname
1918         _ErrorIf(test, constants.CV_ENODELVM, node,
1919                  "Invalid character ':' in PV '%s' of VG '%s'",
1920                  pvname, owner_vg)
1921
1922   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1923     """Check the node bridges.
1924
1925     @type ninfo: L{objects.Node}
1926     @param ninfo: the node to check
1927     @param nresult: the remote results for the node
1928     @param bridges: the expected list of bridges
1929
1930     """
1931     if not bridges:
1932       return
1933
1934     node = ninfo.name
1935     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1936
1937     missing = nresult.get(constants.NV_BRIDGES, None)
1938     test = not isinstance(missing, list)
1939     _ErrorIf(test, constants.CV_ENODENET, node,
1940              "did not return valid bridge information")
1941     if not test:
1942       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1943                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1944
1945   def _VerifyNodeUserScripts(self, ninfo, nresult):
1946     """Check the results of user scripts presence and executability on the node
1947
1948     @type ninfo: L{objects.Node}
1949     @param ninfo: the node to check
1950     @param nresult: the remote results for the node
1951
1952     """
1953     node = ninfo.name
1954
1955     test = not constants.NV_USERSCRIPTS in nresult
1956     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1957                   "did not return user scripts information")
1958
1959     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1960     if not test:
1961       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1962                     "user scripts not present or not executable: %s" %
1963                     utils.CommaJoin(sorted(broken_scripts)))
1964
1965   def _VerifyNodeNetwork(self, ninfo, nresult):
1966     """Check the node network connectivity results.
1967
1968     @type ninfo: L{objects.Node}
1969     @param ninfo: the node to check
1970     @param nresult: the remote results for the node
1971
1972     """
1973     node = ninfo.name
1974     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1975
1976     test = constants.NV_NODELIST not in nresult
1977     _ErrorIf(test, constants.CV_ENODESSH, node,
1978              "node hasn't returned node ssh connectivity data")
1979     if not test:
1980       if nresult[constants.NV_NODELIST]:
1981         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1982           _ErrorIf(True, constants.CV_ENODESSH, node,
1983                    "ssh communication with node '%s': %s", a_node, a_msg)
1984
1985     test = constants.NV_NODENETTEST not in nresult
1986     _ErrorIf(test, constants.CV_ENODENET, node,
1987              "node hasn't returned node tcp connectivity data")
1988     if not test:
1989       if nresult[constants.NV_NODENETTEST]:
1990         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1991         for anode in nlist:
1992           _ErrorIf(True, constants.CV_ENODENET, node,
1993                    "tcp communication with node '%s': %s",
1994                    anode, nresult[constants.NV_NODENETTEST][anode])
1995
1996     test = constants.NV_MASTERIP not in nresult
1997     _ErrorIf(test, constants.CV_ENODENET, node,
1998              "node hasn't returned node master IP reachability data")
1999     if not test:
2000       if not nresult[constants.NV_MASTERIP]:
2001         if node == self.master_node:
2002           msg = "the master node cannot reach the master IP (not configured?)"
2003         else:
2004           msg = "cannot reach the master IP"
2005         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2006
2007   def _VerifyInstance(self, instance, instanceconfig, node_image,
2008                       diskstatus):
2009     """Verify an instance.
2010
2011     This function checks to see if the required block devices are
2012     available on the instance's node.
2013
2014     """
2015     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2016     node_current = instanceconfig.primary_node
2017
2018     node_vol_should = {}
2019     instanceconfig.MapLVsByNode(node_vol_should)
2020
2021     for node in node_vol_should:
2022       n_img = node_image[node]
2023       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2024         # ignore missing volumes on offline or broken nodes
2025         continue
2026       for volume in node_vol_should[node]:
2027         test = volume not in n_img.volumes
2028         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2029                  "volume %s missing on node %s", volume, node)
2030
2031     if instanceconfig.admin_up:
2032       pri_img = node_image[node_current]
2033       test = instance not in pri_img.instances and not pri_img.offline
2034       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2035                "instance not running on its primary node %s",
2036                node_current)
2037
2038     diskdata = [(nname, success, status, idx)
2039                 for (nname, disks) in diskstatus.items()
2040                 for idx, (success, status) in enumerate(disks)]
2041
2042     for nname, success, bdev_status, idx in diskdata:
2043       # the 'ghost node' construction in Exec() ensures that we have a
2044       # node here
2045       snode = node_image[nname]
2046       bad_snode = snode.ghost or snode.offline
2047       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2048                constants.CV_EINSTANCEFAULTYDISK, instance,
2049                "couldn't retrieve status for disk/%s on %s: %s",
2050                idx, nname, bdev_status)
2051       _ErrorIf((instanceconfig.admin_up and success and
2052                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2053                constants.CV_EINSTANCEFAULTYDISK, instance,
2054                "disk/%s on %s is faulty", idx, nname)
2055
2056   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2057     """Verify if there are any unknown volumes in the cluster.
2058
2059     The .os, .swap and backup volumes are ignored. All other volumes are
2060     reported as unknown.
2061
2062     @type reserved: L{ganeti.utils.FieldSet}
2063     @param reserved: a FieldSet of reserved volume names
2064
2065     """
2066     for node, n_img in node_image.items():
2067       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2068         # skip non-healthy nodes
2069         continue
2070       for volume in n_img.volumes:
2071         test = ((node not in node_vol_should or
2072                 volume not in node_vol_should[node]) and
2073                 not reserved.Matches(volume))
2074         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2075                       "volume %s is unknown", volume)
2076
2077   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2078     """Verify N+1 Memory Resilience.
2079
2080     Check that if one single node dies we can still start all the
2081     instances it was primary for.
2082
2083     """
2084     cluster_info = self.cfg.GetClusterInfo()
2085     for node, n_img in node_image.items():
2086       # This code checks that every node which is now listed as
2087       # secondary has enough memory to host all instances it is
2088       # supposed to should a single other node in the cluster fail.
2089       # FIXME: not ready for failover to an arbitrary node
2090       # FIXME: does not support file-backed instances
2091       # WARNING: we currently take into account down instances as well
2092       # as up ones, considering that even if they're down someone
2093       # might want to start them even in the event of a node failure.
2094       if n_img.offline:
2095         # we're skipping offline nodes from the N+1 warning, since
2096         # most likely we don't have good memory infromation from them;
2097         # we already list instances living on such nodes, and that's
2098         # enough warning
2099         continue
2100       for prinode, instances in n_img.sbp.items():
2101         needed_mem = 0
2102         for instance in instances:
2103           bep = cluster_info.FillBE(instance_cfg[instance])
2104           if bep[constants.BE_AUTO_BALANCE]:
2105             needed_mem += bep[constants.BE_MEMORY]
2106         test = n_img.mfree < needed_mem
2107         self._ErrorIf(test, constants.CV_ENODEN1, node,
2108                       "not enough memory to accomodate instance failovers"
2109                       " should node %s fail (%dMiB needed, %dMiB available)",
2110                       prinode, needed_mem, n_img.mfree)
2111
2112   @classmethod
2113   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2114                    (files_all, files_opt, files_mc, files_vm)):
2115     """Verifies file checksums collected from all nodes.
2116
2117     @param errorif: Callback for reporting errors
2118     @param nodeinfo: List of L{objects.Node} objects
2119     @param master_node: Name of master node
2120     @param all_nvinfo: RPC results
2121
2122     """
2123     # Define functions determining which nodes to consider for a file
2124     files2nodefn = [
2125       (files_all, None),
2126       (files_mc, lambda node: (node.master_candidate or
2127                                node.name == master_node)),
2128       (files_vm, lambda node: node.vm_capable),
2129       ]
2130
2131     # Build mapping from filename to list of nodes which should have the file
2132     nodefiles = {}
2133     for (files, fn) in files2nodefn:
2134       if fn is None:
2135         filenodes = nodeinfo
2136       else:
2137         filenodes = filter(fn, nodeinfo)
2138       nodefiles.update((filename,
2139                         frozenset(map(operator.attrgetter("name"), filenodes)))
2140                        for filename in files)
2141
2142     assert set(nodefiles) == (files_all | files_mc | files_vm)
2143
2144     fileinfo = dict((filename, {}) for filename in nodefiles)
2145     ignore_nodes = set()
2146
2147     for node in nodeinfo:
2148       if node.offline:
2149         ignore_nodes.add(node.name)
2150         continue
2151
2152       nresult = all_nvinfo[node.name]
2153
2154       if nresult.fail_msg or not nresult.payload:
2155         node_files = None
2156       else:
2157         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2158
2159       test = not (node_files and isinstance(node_files, dict))
2160       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2161               "Node did not return file checksum data")
2162       if test:
2163         ignore_nodes.add(node.name)
2164         continue
2165
2166       # Build per-checksum mapping from filename to nodes having it
2167       for (filename, checksum) in node_files.items():
2168         assert filename in nodefiles
2169         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2170
2171     for (filename, checksums) in fileinfo.items():
2172       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2173
2174       # Nodes having the file
2175       with_file = frozenset(node_name
2176                             for nodes in fileinfo[filename].values()
2177                             for node_name in nodes) - ignore_nodes
2178
2179       expected_nodes = nodefiles[filename] - ignore_nodes
2180
2181       # Nodes missing file
2182       missing_file = expected_nodes - with_file
2183
2184       if filename in files_opt:
2185         # All or no nodes
2186         errorif(missing_file and missing_file != expected_nodes,
2187                 constants.CV_ECLUSTERFILECHECK, None,
2188                 "File %s is optional, but it must exist on all or no"
2189                 " nodes (not found on %s)",
2190                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2191       else:
2192         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2193                 "File %s is missing from node(s) %s", filename,
2194                 utils.CommaJoin(utils.NiceSort(missing_file)))
2195
2196         # Warn if a node has a file it shouldn't
2197         unexpected = with_file - expected_nodes
2198         errorif(unexpected,
2199                 constants.CV_ECLUSTERFILECHECK, None,
2200                 "File %s should not exist on node(s) %s",
2201                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2202
2203       # See if there are multiple versions of the file
2204       test = len(checksums) > 1
2205       if test:
2206         variants = ["variant %s on %s" %
2207                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2208                     for (idx, (checksum, nodes)) in
2209                       enumerate(sorted(checksums.items()))]
2210       else:
2211         variants = []
2212
2213       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2214               "File %s found with %s different checksums (%s)",
2215               filename, len(checksums), "; ".join(variants))
2216
2217   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2218                       drbd_map):
2219     """Verifies and the node DRBD status.
2220
2221     @type ninfo: L{objects.Node}
2222     @param ninfo: the node to check
2223     @param nresult: the remote results for the node
2224     @param instanceinfo: the dict of instances
2225     @param drbd_helper: the configured DRBD usermode helper
2226     @param drbd_map: the DRBD map as returned by
2227         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2228
2229     """
2230     node = ninfo.name
2231     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2232
2233     if drbd_helper:
2234       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2235       test = (helper_result == None)
2236       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2237                "no drbd usermode helper returned")
2238       if helper_result:
2239         status, payload = helper_result
2240         test = not status
2241         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2242                  "drbd usermode helper check unsuccessful: %s", payload)
2243         test = status and (payload != drbd_helper)
2244         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2245                  "wrong drbd usermode helper: %s", payload)
2246
2247     # compute the DRBD minors
2248     node_drbd = {}
2249     for minor, instance in drbd_map[node].items():
2250       test = instance not in instanceinfo
2251       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2252                "ghost instance '%s' in temporary DRBD map", instance)
2253         # ghost instance should not be running, but otherwise we
2254         # don't give double warnings (both ghost instance and
2255         # unallocated minor in use)
2256       if test:
2257         node_drbd[minor] = (instance, False)
2258       else:
2259         instance = instanceinfo[instance]
2260         node_drbd[minor] = (instance.name, instance.admin_up)
2261
2262     # and now check them
2263     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2264     test = not isinstance(used_minors, (tuple, list))
2265     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2266              "cannot parse drbd status file: %s", str(used_minors))
2267     if test:
2268       # we cannot check drbd status
2269       return
2270
2271     for minor, (iname, must_exist) in node_drbd.items():
2272       test = minor not in used_minors and must_exist
2273       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2274                "drbd minor %d of instance %s is not active", minor, iname)
2275     for minor in used_minors:
2276       test = minor not in node_drbd
2277       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2278                "unallocated drbd minor %d is in use", minor)
2279
2280   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2281     """Builds the node OS structures.
2282
2283     @type ninfo: L{objects.Node}
2284     @param ninfo: the node to check
2285     @param nresult: the remote results for the node
2286     @param nimg: the node image object
2287
2288     """
2289     node = ninfo.name
2290     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2291
2292     remote_os = nresult.get(constants.NV_OSLIST, None)
2293     test = (not isinstance(remote_os, list) or
2294             not compat.all(isinstance(v, list) and len(v) == 7
2295                            for v in remote_os))
2296
2297     _ErrorIf(test, constants.CV_ENODEOS, node,
2298              "node hasn't returned valid OS data")
2299
2300     nimg.os_fail = test
2301
2302     if test:
2303       return
2304
2305     os_dict = {}
2306
2307     for (name, os_path, status, diagnose,
2308          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2309
2310       if name not in os_dict:
2311         os_dict[name] = []
2312
2313       # parameters is a list of lists instead of list of tuples due to
2314       # JSON lacking a real tuple type, fix it:
2315       parameters = [tuple(v) for v in parameters]
2316       os_dict[name].append((os_path, status, diagnose,
2317                             set(variants), set(parameters), set(api_ver)))
2318
2319     nimg.oslist = os_dict
2320
2321   def _VerifyNodeOS(self, ninfo, nimg, base):
2322     """Verifies the node OS list.
2323
2324     @type ninfo: L{objects.Node}
2325     @param ninfo: the node to check
2326     @param nimg: the node image object
2327     @param base: the 'template' node we match against (e.g. from the master)
2328
2329     """
2330     node = ninfo.name
2331     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2332
2333     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2334
2335     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2336     for os_name, os_data in nimg.oslist.items():
2337       assert os_data, "Empty OS status for OS %s?!" % os_name
2338       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2339       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2340                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2341       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2342                "OS '%s' has multiple entries (first one shadows the rest): %s",
2343                os_name, utils.CommaJoin([v[0] for v in os_data]))
2344       # comparisons with the 'base' image
2345       test = os_name not in base.oslist
2346       _ErrorIf(test, constants.CV_ENODEOS, node,
2347                "Extra OS %s not present on reference node (%s)",
2348                os_name, base.name)
2349       if test:
2350         continue
2351       assert base.oslist[os_name], "Base node has empty OS status?"
2352       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2353       if not b_status:
2354         # base OS is invalid, skipping
2355         continue
2356       for kind, a, b in [("API version", f_api, b_api),
2357                          ("variants list", f_var, b_var),
2358                          ("parameters", beautify_params(f_param),
2359                           beautify_params(b_param))]:
2360         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2361                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2362                  kind, os_name, base.name,
2363                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2364
2365     # check any missing OSes
2366     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2367     _ErrorIf(missing, constants.CV_ENODEOS, node,
2368              "OSes present on reference node %s but missing on this node: %s",
2369              base.name, utils.CommaJoin(missing))
2370
2371   def _VerifyOob(self, ninfo, nresult):
2372     """Verifies out of band functionality of a node.
2373
2374     @type ninfo: L{objects.Node}
2375     @param ninfo: the node to check
2376     @param nresult: the remote results for the node
2377
2378     """
2379     node = ninfo.name
2380     # We just have to verify the paths on master and/or master candidates
2381     # as the oob helper is invoked on the master
2382     if ((ninfo.master_candidate or ninfo.master_capable) and
2383         constants.NV_OOB_PATHS in nresult):
2384       for path_result in nresult[constants.NV_OOB_PATHS]:
2385         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2386
2387   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2388     """Verifies and updates the node volume data.
2389
2390     This function will update a L{NodeImage}'s internal structures
2391     with data from the remote call.
2392
2393     @type ninfo: L{objects.Node}
2394     @param ninfo: the node to check
2395     @param nresult: the remote results for the node
2396     @param nimg: the node image object
2397     @param vg_name: the configured VG name
2398
2399     """
2400     node = ninfo.name
2401     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2402
2403     nimg.lvm_fail = True
2404     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2405     if vg_name is None:
2406       pass
2407     elif isinstance(lvdata, basestring):
2408       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2409                utils.SafeEncode(lvdata))
2410     elif not isinstance(lvdata, dict):
2411       _ErrorIf(True, constants.CV_ENODELVM, node,
2412                "rpc call to node failed (lvlist)")
2413     else:
2414       nimg.volumes = lvdata
2415       nimg.lvm_fail = False
2416
2417   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2418     """Verifies and updates the node instance list.
2419
2420     If the listing was successful, then updates this node's instance
2421     list. Otherwise, it marks the RPC call as failed for the instance
2422     list key.
2423
2424     @type ninfo: L{objects.Node}
2425     @param ninfo: the node to check
2426     @param nresult: the remote results for the node
2427     @param nimg: the node image object
2428
2429     """
2430     idata = nresult.get(constants.NV_INSTANCELIST, None)
2431     test = not isinstance(idata, list)
2432     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2433                   "rpc call to node failed (instancelist): %s",
2434                   utils.SafeEncode(str(idata)))
2435     if test:
2436       nimg.hyp_fail = True
2437     else:
2438       nimg.instances = idata
2439
2440   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2441     """Verifies and computes a node information map
2442
2443     @type ninfo: L{objects.Node}
2444     @param ninfo: the node to check
2445     @param nresult: the remote results for the node
2446     @param nimg: the node image object
2447     @param vg_name: the configured VG name
2448
2449     """
2450     node = ninfo.name
2451     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2452
2453     # try to read free memory (from the hypervisor)
2454     hv_info = nresult.get(constants.NV_HVINFO, None)
2455     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2456     _ErrorIf(test, constants.CV_ENODEHV, node,
2457              "rpc call to node failed (hvinfo)")
2458     if not test:
2459       try:
2460         nimg.mfree = int(hv_info["memory_free"])
2461       except (ValueError, TypeError):
2462         _ErrorIf(True, constants.CV_ENODERPC, node,
2463                  "node returned invalid nodeinfo, check hypervisor")
2464
2465     # FIXME: devise a free space model for file based instances as well
2466     if vg_name is not None:
2467       test = (constants.NV_VGLIST not in nresult or
2468               vg_name not in nresult[constants.NV_VGLIST])
2469       _ErrorIf(test, constants.CV_ENODELVM, node,
2470                "node didn't return data for the volume group '%s'"
2471                " - it is either missing or broken", vg_name)
2472       if not test:
2473         try:
2474           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2475         except (ValueError, TypeError):
2476           _ErrorIf(True, constants.CV_ENODERPC, node,
2477                    "node returned invalid LVM info, check LVM status")
2478
2479   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2480     """Gets per-disk status information for all instances.
2481
2482     @type nodelist: list of strings
2483     @param nodelist: Node names
2484     @type node_image: dict of (name, L{objects.Node})
2485     @param node_image: Node objects
2486     @type instanceinfo: dict of (name, L{objects.Instance})
2487     @param instanceinfo: Instance objects
2488     @rtype: {instance: {node: [(succes, payload)]}}
2489     @return: a dictionary of per-instance dictionaries with nodes as
2490         keys and disk information as values; the disk information is a
2491         list of tuples (success, payload)
2492
2493     """
2494     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2495
2496     node_disks = {}
2497     node_disks_devonly = {}
2498     diskless_instances = set()
2499     diskless = constants.DT_DISKLESS
2500
2501     for nname in nodelist:
2502       node_instances = list(itertools.chain(node_image[nname].pinst,
2503                                             node_image[nname].sinst))
2504       diskless_instances.update(inst for inst in node_instances
2505                                 if instanceinfo[inst].disk_template == diskless)
2506       disks = [(inst, disk)
2507                for inst in node_instances
2508                for disk in instanceinfo[inst].disks]
2509
2510       if not disks:
2511         # No need to collect data
2512         continue
2513
2514       node_disks[nname] = disks
2515
2516       # Creating copies as SetDiskID below will modify the objects and that can
2517       # lead to incorrect data returned from nodes
2518       devonly = [dev.Copy() for (_, dev) in disks]
2519
2520       for dev in devonly:
2521         self.cfg.SetDiskID(dev, nname)
2522
2523       node_disks_devonly[nname] = devonly
2524
2525     assert len(node_disks) == len(node_disks_devonly)
2526
2527     # Collect data from all nodes with disks
2528     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2529                                                           node_disks_devonly)
2530
2531     assert len(result) == len(node_disks)
2532
2533     instdisk = {}
2534
2535     for (nname, nres) in result.items():
2536       disks = node_disks[nname]
2537
2538       if nres.offline:
2539         # No data from this node
2540         data = len(disks) * [(False, "node offline")]
2541       else:
2542         msg = nres.fail_msg
2543         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2544                  "while getting disk information: %s", msg)
2545         if msg:
2546           # No data from this node
2547           data = len(disks) * [(False, msg)]
2548         else:
2549           data = []
2550           for idx, i in enumerate(nres.payload):
2551             if isinstance(i, (tuple, list)) and len(i) == 2:
2552               data.append(i)
2553             else:
2554               logging.warning("Invalid result from node %s, entry %d: %s",
2555                               nname, idx, i)
2556               data.append((False, "Invalid result from the remote node"))
2557
2558       for ((inst, _), status) in zip(disks, data):
2559         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2560
2561     # Add empty entries for diskless instances.
2562     for inst in diskless_instances:
2563       assert inst not in instdisk
2564       instdisk[inst] = {}
2565
2566     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2567                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2568                       compat.all(isinstance(s, (tuple, list)) and
2569                                  len(s) == 2 for s in statuses)
2570                       for inst, nnames in instdisk.items()
2571                       for nname, statuses in nnames.items())
2572     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2573
2574     return instdisk
2575
2576   @staticmethod
2577   def _SshNodeSelector(group_uuid, all_nodes):
2578     """Create endless iterators for all potential SSH check hosts.
2579
2580     """
2581     nodes = [node for node in all_nodes
2582              if (node.group != group_uuid and
2583                  not node.offline)]
2584     keyfunc = operator.attrgetter("group")
2585
2586     return map(itertools.cycle,
2587                [sorted(map(operator.attrgetter("name"), names))
2588                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2589                                                   keyfunc)])
2590
2591   @classmethod
2592   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2593     """Choose which nodes should talk to which other nodes.
2594
2595     We will make nodes contact all nodes in their group, and one node from
2596     every other group.
2597
2598     @warning: This algorithm has a known issue if one node group is much
2599       smaller than others (e.g. just one node). In such a case all other
2600       nodes will talk to the single node.
2601
2602     """
2603     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2604     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2605
2606     return (online_nodes,
2607             dict((name, sorted([i.next() for i in sel]))
2608                  for name in online_nodes))
2609
2610   def BuildHooksEnv(self):
2611     """Build hooks env.
2612
2613     Cluster-Verify hooks just ran in the post phase and their failure makes
2614     the output be logged in the verify output and the verification to fail.
2615
2616     """
2617     env = {
2618       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2619       }
2620
2621     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2622                for node in self.my_node_info.values())
2623
2624     return env
2625
2626   def BuildHooksNodes(self):
2627     """Build hooks nodes.
2628
2629     """
2630     return ([], self.my_node_names)
2631
2632   def Exec(self, feedback_fn):
2633     """Verify integrity of the node group, performing various test on nodes.
2634
2635     """
2636     # This method has too many local variables. pylint: disable=R0914
2637     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2638
2639     if not self.my_node_names:
2640       # empty node group
2641       feedback_fn("* Empty node group, skipping verification")
2642       return True
2643
2644     self.bad = False
2645     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2646     verbose = self.op.verbose
2647     self._feedback_fn = feedback_fn
2648
2649     vg_name = self.cfg.GetVGName()
2650     drbd_helper = self.cfg.GetDRBDHelper()
2651     cluster = self.cfg.GetClusterInfo()
2652     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2653     hypervisors = cluster.enabled_hypervisors
2654     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2655
2656     i_non_redundant = [] # Non redundant instances
2657     i_non_a_balanced = [] # Non auto-balanced instances
2658     n_offline = 0 # Count of offline nodes
2659     n_drained = 0 # Count of nodes being drained
2660     node_vol_should = {}
2661
2662     # FIXME: verify OS list
2663
2664     # File verification
2665     filemap = _ComputeAncillaryFiles(cluster, False)
2666
2667     # do local checksums
2668     master_node = self.master_node = self.cfg.GetMasterNode()
2669     master_ip = self.cfg.GetMasterIP()
2670
2671     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2672
2673     user_scripts = []
2674     if self.cfg.GetUseExternalMipScript():
2675       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2676
2677     node_verify_param = {
2678       constants.NV_FILELIST:
2679         utils.UniqueSequence(filename
2680                              for files in filemap
2681                              for filename in files),
2682       constants.NV_NODELIST:
2683         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2684                                   self.all_node_info.values()),
2685       constants.NV_HYPERVISOR: hypervisors,
2686       constants.NV_HVPARAMS:
2687         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2688       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2689                                  for node in node_data_list
2690                                  if not node.offline],
2691       constants.NV_INSTANCELIST: hypervisors,
2692       constants.NV_VERSION: None,
2693       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2694       constants.NV_NODESETUP: None,
2695       constants.NV_TIME: None,
2696       constants.NV_MASTERIP: (master_node, master_ip),
2697       constants.NV_OSLIST: None,
2698       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2699       constants.NV_USERSCRIPTS: user_scripts,
2700       }
2701
2702     if vg_name is not None:
2703       node_verify_param[constants.NV_VGLIST] = None
2704       node_verify_param[constants.NV_LVLIST] = vg_name
2705       node_verify_param[constants.NV_PVLIST] = [vg_name]
2706       node_verify_param[constants.NV_DRBDLIST] = None
2707
2708     if drbd_helper:
2709       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2710
2711     # bridge checks
2712     # FIXME: this needs to be changed per node-group, not cluster-wide
2713     bridges = set()
2714     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2715     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2716       bridges.add(default_nicpp[constants.NIC_LINK])
2717     for instance in self.my_inst_info.values():
2718       for nic in instance.nics:
2719         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2720         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2721           bridges.add(full_nic[constants.NIC_LINK])
2722
2723     if bridges:
2724       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2725
2726     # Build our expected cluster state
2727     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2728                                                  name=node.name,
2729                                                  vm_capable=node.vm_capable))
2730                       for node in node_data_list)
2731
2732     # Gather OOB paths
2733     oob_paths = []
2734     for node in self.all_node_info.values():
2735       path = _SupportsOob(self.cfg, node)
2736       if path and path not in oob_paths:
2737         oob_paths.append(path)
2738
2739     if oob_paths:
2740       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2741
2742     for instance in self.my_inst_names:
2743       inst_config = self.my_inst_info[instance]
2744
2745       for nname in inst_config.all_nodes:
2746         if nname not in node_image:
2747           gnode = self.NodeImage(name=nname)
2748           gnode.ghost = (nname not in self.all_node_info)
2749           node_image[nname] = gnode
2750
2751       inst_config.MapLVsByNode(node_vol_should)
2752
2753       pnode = inst_config.primary_node
2754       node_image[pnode].pinst.append(instance)
2755
2756       for snode in inst_config.secondary_nodes:
2757         nimg = node_image[snode]
2758         nimg.sinst.append(instance)
2759         if pnode not in nimg.sbp:
2760           nimg.sbp[pnode] = []
2761         nimg.sbp[pnode].append(instance)
2762
2763     # At this point, we have the in-memory data structures complete,
2764     # except for the runtime information, which we'll gather next
2765
2766     # Due to the way our RPC system works, exact response times cannot be
2767     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2768     # time before and after executing the request, we can at least have a time
2769     # window.
2770     nvinfo_starttime = time.time()
2771     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2772                                            node_verify_param,
2773                                            self.cfg.GetClusterName())
2774     nvinfo_endtime = time.time()
2775
2776     if self.extra_lv_nodes and vg_name is not None:
2777       extra_lv_nvinfo = \
2778           self.rpc.call_node_verify(self.extra_lv_nodes,
2779                                     {constants.NV_LVLIST: vg_name},
2780                                     self.cfg.GetClusterName())
2781     else:
2782       extra_lv_nvinfo = {}
2783
2784     all_drbd_map = self.cfg.ComputeDRBDMap()
2785
2786     feedback_fn("* Gathering disk information (%s nodes)" %
2787                 len(self.my_node_names))
2788     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2789                                      self.my_inst_info)
2790
2791     feedback_fn("* Verifying configuration file consistency")
2792
2793     # If not all nodes are being checked, we need to make sure the master node
2794     # and a non-checked vm_capable node are in the list.
2795     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2796     if absent_nodes:
2797       vf_nvinfo = all_nvinfo.copy()
2798       vf_node_info = list(self.my_node_info.values())
2799       additional_nodes = []
2800       if master_node not in self.my_node_info:
2801         additional_nodes.append(master_node)
2802         vf_node_info.append(self.all_node_info[master_node])
2803       # Add the first vm_capable node we find which is not included
2804       for node in absent_nodes:
2805         nodeinfo = self.all_node_info[node]
2806         if nodeinfo.vm_capable and not nodeinfo.offline:
2807           additional_nodes.append(node)
2808           vf_node_info.append(self.all_node_info[node])
2809           break
2810       key = constants.NV_FILELIST
2811       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2812                                                  {key: node_verify_param[key]},
2813                                                  self.cfg.GetClusterName()))
2814     else:
2815       vf_nvinfo = all_nvinfo
2816       vf_node_info = self.my_node_info.values()
2817
2818     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2819
2820     feedback_fn("* Verifying node status")
2821
2822     refos_img = None
2823
2824     for node_i in node_data_list:
2825       node = node_i.name
2826       nimg = node_image[node]
2827
2828       if node_i.offline:
2829         if verbose:
2830           feedback_fn("* Skipping offline node %s" % (node,))
2831         n_offline += 1
2832         continue
2833
2834       if node == master_node:
2835         ntype = "master"
2836       elif node_i.master_candidate:
2837         ntype = "master candidate"
2838       elif node_i.drained:
2839         ntype = "drained"
2840         n_drained += 1
2841       else:
2842         ntype = "regular"
2843       if verbose:
2844         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2845
2846       msg = all_nvinfo[node].fail_msg
2847       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2848                msg)
2849       if msg:
2850         nimg.rpc_fail = True
2851         continue
2852
2853       nresult = all_nvinfo[node].payload
2854
2855       nimg.call_ok = self._VerifyNode(node_i, nresult)
2856       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2857       self._VerifyNodeNetwork(node_i, nresult)
2858       self._VerifyNodeUserScripts(node_i, nresult)
2859       self._VerifyOob(node_i, nresult)
2860
2861       if nimg.vm_capable:
2862         self._VerifyNodeLVM(node_i, nresult, vg_name)
2863         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2864                              all_drbd_map)
2865
2866         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2867         self._UpdateNodeInstances(node_i, nresult, nimg)
2868         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2869         self._UpdateNodeOS(node_i, nresult, nimg)
2870
2871         if not nimg.os_fail:
2872           if refos_img is None:
2873             refos_img = nimg
2874           self._VerifyNodeOS(node_i, nimg, refos_img)
2875         self._VerifyNodeBridges(node_i, nresult, bridges)
2876
2877         # Check whether all running instancies are primary for the node. (This
2878         # can no longer be done from _VerifyInstance below, since some of the
2879         # wrong instances could be from other node groups.)
2880         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2881
2882         for inst in non_primary_inst:
2883           test = inst in self.all_inst_info
2884           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2885                    "instance should not run on node %s", node_i.name)
2886           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2887                    "node is running unknown instance %s", inst)
2888
2889     for node, result in extra_lv_nvinfo.items():
2890       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2891                               node_image[node], vg_name)
2892
2893     feedback_fn("* Verifying instance status")
2894     for instance in self.my_inst_names:
2895       if verbose:
2896         feedback_fn("* Verifying instance %s" % instance)
2897       inst_config = self.my_inst_info[instance]
2898       self._VerifyInstance(instance, inst_config, node_image,
2899                            instdisk[instance])
2900       inst_nodes_offline = []
2901
2902       pnode = inst_config.primary_node
2903       pnode_img = node_image[pnode]
2904       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2905                constants.CV_ENODERPC, pnode, "instance %s, connection to"
2906                " primary node failed", instance)
2907
2908       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2909                constants.CV_EINSTANCEBADNODE, instance,
2910                "instance is marked as running and lives on offline node %s",
2911                inst_config.primary_node)
2912
2913       # If the instance is non-redundant we cannot survive losing its primary
2914       # node, so we are not N+1 compliant. On the other hand we have no disk
2915       # templates with more than one secondary so that situation is not well
2916       # supported either.
2917       # FIXME: does not support file-backed instances
2918       if not inst_config.secondary_nodes:
2919         i_non_redundant.append(instance)
2920
2921       _ErrorIf(len(inst_config.secondary_nodes) > 1,
2922                constants.CV_EINSTANCELAYOUT,
2923                instance, "instance has multiple secondary nodes: %s",
2924                utils.CommaJoin(inst_config.secondary_nodes),
2925                code=self.ETYPE_WARNING)
2926
2927       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2928         pnode = inst_config.primary_node
2929         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2930         instance_groups = {}
2931
2932         for node in instance_nodes:
2933           instance_groups.setdefault(self.all_node_info[node].group,
2934                                      []).append(node)
2935
2936         pretty_list = [
2937           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2938           # Sort so that we always list the primary node first.
2939           for group, nodes in sorted(instance_groups.items(),
2940                                      key=lambda (_, nodes): pnode in nodes,
2941                                      reverse=True)]
2942
2943         self._ErrorIf(len(instance_groups) > 1,
2944                       constants.CV_EINSTANCESPLITGROUPS,
2945                       instance, "instance has primary and secondary nodes in"
2946                       " different groups: %s", utils.CommaJoin(pretty_list),
2947                       code=self.ETYPE_WARNING)
2948
2949       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2950         i_non_a_balanced.append(instance)
2951
2952       for snode in inst_config.secondary_nodes:
2953         s_img = node_image[snode]
2954         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2955                  snode, "instance %s, connection to secondary node failed",
2956                  instance)
2957
2958         if s_img.offline:
2959           inst_nodes_offline.append(snode)
2960
2961       # warn that the instance lives on offline nodes
2962       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2963                "instance has offline secondary node(s) %s",
2964                utils.CommaJoin(inst_nodes_offline))
2965       # ... or ghost/non-vm_capable nodes
2966       for node in inst_config.all_nodes:
2967         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2968                  instance, "instance lives on ghost node %s", node)
2969         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2970                  instance, "instance lives on non-vm_capable node %s", node)
2971
2972     feedback_fn("* Verifying orphan volumes")
2973     reserved = utils.FieldSet(*cluster.reserved_lvs)
2974
2975     # We will get spurious "unknown volume" warnings if any node of this group
2976     # is secondary for an instance whose primary is in another group. To avoid
2977     # them, we find these instances and add their volumes to node_vol_should.
2978     for inst in self.all_inst_info.values():
2979       for secondary in inst.secondary_nodes:
2980         if (secondary in self.my_node_info
2981             and inst.name not in self.my_inst_info):
2982           inst.MapLVsByNode(node_vol_should)
2983           break
2984
2985     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2986
2987     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2988       feedback_fn("* Verifying N+1 Memory redundancy")
2989       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2990
2991     feedback_fn("* Other Notes")
2992     if i_non_redundant:
2993       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2994                   % len(i_non_redundant))
2995
2996     if i_non_a_balanced:
2997       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2998                   % len(i_non_a_balanced))
2999
3000     if n_offline:
3001       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3002
3003     if n_drained:
3004       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3005
3006     return not self.bad
3007
3008   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3009     """Analyze the post-hooks' result
3010
3011     This method analyses the hook result, handles it, and sends some
3012     nicely-formatted feedback back to the user.
3013
3014     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3015         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3016     @param hooks_results: the results of the multi-node hooks rpc call
3017     @param feedback_fn: function used send feedback back to the caller
3018     @param lu_result: previous Exec result
3019     @return: the new Exec result, based on the previous result
3020         and hook results
3021
3022     """
3023     # We only really run POST phase hooks, only for non-empty groups,
3024     # and are only interested in their results
3025     if not self.my_node_names:
3026       # empty node group
3027       pass
3028     elif phase == constants.HOOKS_PHASE_POST:
3029       # Used to change hooks' output to proper indentation
3030       feedback_fn("* Hooks Results")
3031       assert hooks_results, "invalid result from hooks"
3032
3033       for node_name in hooks_results:
3034         res = hooks_results[node_name]
3035         msg = res.fail_msg
3036         test = msg and not res.offline
3037         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3038                       "Communication failure in hooks execution: %s", msg)
3039         if res.offline or msg:
3040           # No need to investigate payload if node is offline or gave
3041           # an error.
3042           continue
3043         for script, hkr, output in res.payload:
3044           test = hkr == constants.HKR_FAIL
3045           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3046                         "Script %s failed, output:", script)
3047           if test:
3048             output = self._HOOKS_INDENT_RE.sub("      ", output)
3049             feedback_fn("%s" % output)
3050             lu_result = False
3051
3052     return lu_result
3053
3054
3055 class LUClusterVerifyDisks(NoHooksLU):
3056   """Verifies the cluster disks status.
3057
3058   """
3059   REQ_BGL = False
3060
3061   def ExpandNames(self):
3062     self.share_locks = _ShareAll()
3063     self.needed_locks = {
3064       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3065       }
3066
3067   def Exec(self, feedback_fn):
3068     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3069
3070     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3071     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3072                            for group in group_names])
3073
3074
3075 class LUGroupVerifyDisks(NoHooksLU):
3076   """Verifies the status of all disks in a node group.
3077
3078   """
3079   REQ_BGL = False
3080
3081   def ExpandNames(self):
3082     # Raises errors.OpPrereqError on its own if group can't be found
3083     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3084
3085     self.share_locks = _ShareAll()
3086     self.needed_locks = {
3087       locking.LEVEL_INSTANCE: [],
3088       locking.LEVEL_NODEGROUP: [],
3089       locking.LEVEL_NODE: [],
3090       }
3091
3092   def DeclareLocks(self, level):
3093     if level == locking.LEVEL_INSTANCE:
3094       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3095
3096       # Lock instances optimistically, needs verification once node and group
3097       # locks have been acquired
3098       self.needed_locks[locking.LEVEL_INSTANCE] = \
3099         self.cfg.GetNodeGroupInstances(self.group_uuid)
3100
3101     elif level == locking.LEVEL_NODEGROUP:
3102       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3103
3104       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3105         set([self.group_uuid] +
3106             # Lock all groups used by instances optimistically; this requires
3107             # going via the node before it's locked, requiring verification
3108             # later on
3109             [group_uuid
3110              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3111              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3112
3113     elif level == locking.LEVEL_NODE:
3114       # This will only lock the nodes in the group to be verified which contain
3115       # actual instances
3116       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3117       self._LockInstancesNodes()
3118
3119       # Lock all nodes in group to be verified
3120       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3121       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3122       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3123
3124   def CheckPrereq(self):
3125     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3126     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3127     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3128
3129     assert self.group_uuid in owned_groups
3130
3131     # Check if locked instances are still correct
3132     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3133
3134     # Get instance information
3135     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3136
3137     # Check if node groups for locked instances are still correct
3138     for (instance_name, inst) in self.instances.items():
3139       assert owned_nodes.issuperset(inst.all_nodes), \
3140         "Instance %s's nodes changed while we kept the lock" % instance_name
3141
3142       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3143                                              owned_groups)
3144
3145       assert self.group_uuid in inst_groups, \
3146         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3147
3148   def Exec(self, feedback_fn):
3149     """Verify integrity of cluster disks.
3150
3151     @rtype: tuple of three items
3152     @return: a tuple of (dict of node-to-node_error, list of instances
3153         which need activate-disks, dict of instance: (node, volume) for
3154         missing volumes
3155
3156     """
3157     res_nodes = {}
3158     res_instances = set()
3159     res_missing = {}
3160
3161     nv_dict = _MapInstanceDisksToNodes([inst
3162                                         for inst in self.instances.values()
3163                                         if inst.admin_up])
3164
3165     if nv_dict:
3166       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3167                              set(self.cfg.GetVmCapableNodeList()))
3168
3169       node_lvs = self.rpc.call_lv_list(nodes, [])
3170
3171       for (node, node_res) in node_lvs.items():
3172         if node_res.offline:
3173           continue
3174
3175         msg = node_res.fail_msg
3176         if msg:
3177           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3178           res_nodes[node] = msg
3179           continue
3180
3181         for lv_name, (_, _, lv_online) in node_res.payload.items():
3182           inst = nv_dict.pop((node, lv_name), None)
3183           if not (lv_online or inst is None):
3184             res_instances.add(inst)
3185
3186       # any leftover items in nv_dict are missing LVs, let's arrange the data
3187       # better
3188       for key, inst in nv_dict.iteritems():
3189         res_missing.setdefault(inst, []).append(list(key))
3190
3191     return (res_nodes, list(res_instances), res_missing)
3192
3193
3194 class LUClusterRepairDiskSizes(NoHooksLU):
3195   """Verifies the cluster disks sizes.
3196
3197   """
3198   REQ_BGL = False
3199
3200   def ExpandNames(self):
3201     if self.op.instances:
3202       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3203       self.needed_locks = {
3204         locking.LEVEL_NODE: [],
3205         locking.LEVEL_INSTANCE: self.wanted_names,
3206         }
3207       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3208     else:
3209       self.wanted_names = None
3210       self.needed_locks = {
3211         locking.LEVEL_NODE: locking.ALL_SET,
3212         locking.LEVEL_INSTANCE: locking.ALL_SET,
3213         }
3214     self.share_locks = _ShareAll()
3215
3216   def DeclareLocks(self, level):
3217     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3218       self._LockInstancesNodes(primary_only=True)
3219
3220   def CheckPrereq(self):
3221     """Check prerequisites.
3222
3223     This only checks the optional instance list against the existing names.
3224
3225     """
3226     if self.wanted_names is None:
3227       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3228
3229     self.wanted_instances = \
3230         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3231
3232   def _EnsureChildSizes(self, disk):
3233     """Ensure children of the disk have the needed disk size.
3234
3235     This is valid mainly for DRBD8 and fixes an issue where the
3236     children have smaller disk size.
3237
3238     @param disk: an L{ganeti.objects.Disk} object
3239
3240     """
3241     if disk.dev_type == constants.LD_DRBD8:
3242       assert disk.children, "Empty children for DRBD8?"
3243       fchild = disk.children[0]
3244       mismatch = fchild.size < disk.size
3245       if mismatch:
3246         self.LogInfo("Child disk has size %d, parent %d, fixing",
3247                      fchild.size, disk.size)
3248         fchild.size = disk.size
3249
3250       # and we recurse on this child only, not on the metadev
3251       return self._EnsureChildSizes(fchild) or mismatch
3252     else:
3253       return False
3254
3255   def Exec(self, feedback_fn):
3256     """Verify the size of cluster disks.
3257
3258     """
3259     # TODO: check child disks too
3260     # TODO: check differences in size between primary/secondary nodes
3261     per_node_disks = {}
3262     for instance in self.wanted_instances:
3263       pnode = instance.primary_node
3264       if pnode not in per_node_disks:
3265         per_node_disks[pnode] = []
3266       for idx, disk in enumerate(instance.disks):
3267         per_node_disks[pnode].append((instance, idx, disk))
3268
3269     changed = []
3270     for node, dskl in per_node_disks.items():
3271       newl = [v[2].Copy() for v in dskl]
3272       for dsk in newl:
3273         self.cfg.SetDiskID(dsk, node)
3274       result = self.rpc.call_blockdev_getsize(node, newl)
3275       if result.fail_msg:
3276         self.LogWarning("Failure in blockdev_getsize call to node"
3277                         " %s, ignoring", node)
3278         continue
3279       if len(result.payload) != len(dskl):
3280         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3281                         " result.payload=%s", node, len(dskl), result.payload)
3282         self.LogWarning("Invalid result from node %s, ignoring node results",
3283                         node)
3284         continue
3285       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3286         if size is None:
3287           self.LogWarning("Disk %d of instance %s did not return size"
3288                           " information, ignoring", idx, instance.name)
3289           continue
3290         if not isinstance(size, (int, long)):
3291           self.LogWarning("Disk %d of instance %s did not return valid"
3292                           " size information, ignoring", idx, instance.name)
3293           continue
3294         size = size >> 20
3295         if size != disk.size:
3296           self.LogInfo("Disk %d of instance %s has mismatched size,"
3297                        " correcting: recorded %d, actual %d", idx,
3298                        instance.name, disk.size, size)
3299           disk.size = size
3300           self.cfg.Update(instance, feedback_fn)
3301           changed.append((instance.name, idx, size))
3302         if self._EnsureChildSizes(disk):
3303           self.cfg.Update(instance, feedback_fn)
3304           changed.append((instance.name, idx, disk.size))
3305     return changed
3306
3307
3308 class LUClusterRename(LogicalUnit):
3309   """Rename the cluster.
3310
3311   """
3312   HPATH = "cluster-rename"
3313   HTYPE = constants.HTYPE_CLUSTER
3314
3315   def BuildHooksEnv(self):
3316     """Build hooks env.
3317
3318     """
3319     return {
3320       "OP_TARGET": self.cfg.GetClusterName(),
3321       "NEW_NAME": self.op.name,
3322       }
3323
3324   def BuildHooksNodes(self):
3325     """Build hooks nodes.
3326
3327     """
3328     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3329
3330   def CheckPrereq(self):
3331     """Verify that the passed name is a valid one.
3332
3333     """
3334     hostname = netutils.GetHostname(name=self.op.name,
3335                                     family=self.cfg.GetPrimaryIPFamily())
3336
3337     new_name = hostname.name
3338     self.ip = new_ip = hostname.ip
3339     old_name = self.cfg.GetClusterName()
3340     old_ip = self.cfg.GetMasterIP()
3341     if new_name == old_name and new_ip == old_ip:
3342       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3343                                  " cluster has changed",
3344                                  errors.ECODE_INVAL)
3345     if new_ip != old_ip:
3346       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3347         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3348                                    " reachable on the network" %
3349                                    new_ip, errors.ECODE_NOTUNIQUE)
3350
3351     self.op.name = new_name
3352
3353   def Exec(self, feedback_fn):
3354     """Rename the cluster.
3355
3356     """
3357     clustername = self.op.name
3358     new_ip = self.ip
3359
3360     # shutdown the master IP
3361     master_params = self.cfg.GetMasterNetworkParameters()
3362     ems = self.cfg.GetUseExternalMipScript()
3363     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3364                                                      master_params, ems)
3365     result.Raise("Could not disable the master role")
3366
3367     try:
3368       cluster = self.cfg.GetClusterInfo()
3369       cluster.cluster_name = clustername
3370       cluster.master_ip = new_ip
3371       self.cfg.Update(cluster, feedback_fn)
3372
3373       # update the known hosts file
3374       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3375       node_list = self.cfg.GetOnlineNodeList()
3376       try:
3377         node_list.remove(master_params.name)
3378       except ValueError:
3379         pass
3380       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3381     finally:
3382       master_params.ip = new_ip
3383       result = self.rpc.call_node_activate_master_ip(master_params.name,
3384                                                      master_params, ems)
3385       msg = result.fail_msg
3386       if msg:
3387         self.LogWarning("Could not re-enable the master role on"
3388                         " the master, please restart manually: %s", msg)
3389
3390     return clustername
3391
3392
3393 def _ValidateNetmask(cfg, netmask):
3394   """Checks if a netmask is valid.
3395
3396   @type cfg: L{config.ConfigWriter}
3397   @param cfg: The cluster configuration
3398   @type netmask: int
3399   @param netmask: the netmask to be verified
3400   @raise errors.OpPrereqError: if the validation fails
3401
3402   """
3403   ip_family = cfg.GetPrimaryIPFamily()
3404   try:
3405     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3406   except errors.ProgrammerError:
3407     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3408                                ip_family)
3409   if not ipcls.ValidateNetmask(netmask):
3410     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3411                                 (netmask))
3412
3413
3414 class LUClusterSetParams(LogicalUnit):
3415   """Change the parameters of the cluster.
3416
3417   """
3418   HPATH = "cluster-modify"
3419   HTYPE = constants.HTYPE_CLUSTER
3420   REQ_BGL = False
3421
3422   def CheckArguments(self):
3423     """Check parameters
3424
3425     """
3426     if self.op.uid_pool:
3427       uidpool.CheckUidPool(self.op.uid_pool)
3428
3429     if self.op.add_uids:
3430       uidpool.CheckUidPool(self.op.add_uids)
3431
3432     if self.op.remove_uids:
3433       uidpool.CheckUidPool(self.op.remove_uids)
3434
3435     if self.op.master_netmask is not None:
3436       _ValidateNetmask(self.cfg, self.op.master_netmask)
3437
3438   def ExpandNames(self):
3439     # FIXME: in the future maybe other cluster params won't require checking on
3440     # all nodes to be modified.
3441     self.needed_locks = {
3442       locking.LEVEL_NODE: locking.ALL_SET,
3443     }
3444     self.share_locks[locking.LEVEL_NODE] = 1
3445
3446   def BuildHooksEnv(self):
3447     """Build hooks env.
3448
3449     """
3450     return {
3451       "OP_TARGET": self.cfg.GetClusterName(),
3452       "NEW_VG_NAME": self.op.vg_name,
3453       }
3454
3455   def BuildHooksNodes(self):
3456     """Build hooks nodes.
3457
3458     """
3459     mn = self.cfg.GetMasterNode()
3460     return ([mn], [mn])
3461
3462   def CheckPrereq(self):
3463     """Check prerequisites.
3464
3465     This checks whether the given params don't conflict and
3466     if the given volume group is valid.
3467
3468     """
3469     if self.op.vg_name is not None and not self.op.vg_name:
3470       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3471         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3472                                    " instances exist", errors.ECODE_INVAL)
3473
3474     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3475       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3476         raise errors.OpPrereqError("Cannot disable drbd helper while"
3477                                    " drbd-based instances exist",
3478                                    errors.ECODE_INVAL)
3479
3480     node_list = self.owned_locks(locking.LEVEL_NODE)
3481
3482     # if vg_name not None, checks given volume group on all nodes
3483     if self.op.vg_name:
3484       vglist = self.rpc.call_vg_list(node_list)
3485       for node in node_list:
3486         msg = vglist[node].fail_msg
3487         if msg:
3488           # ignoring down node
3489           self.LogWarning("Error while gathering data on node %s"
3490                           " (ignoring node): %s", node, msg)
3491           continue
3492         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3493                                               self.op.vg_name,
3494                                               constants.MIN_VG_SIZE)
3495         if vgstatus:
3496           raise errors.OpPrereqError("Error on node '%s': %s" %
3497                                      (node, vgstatus), errors.ECODE_ENVIRON)
3498
3499     if self.op.drbd_helper:
3500       # checks given drbd helper on all nodes
3501       helpers = self.rpc.call_drbd_helper(node_list)
3502       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3503         if ninfo.offline:
3504           self.LogInfo("Not checking drbd helper on offline node %s", node)
3505           continue
3506         msg = helpers[node].fail_msg
3507         if msg:
3508           raise errors.OpPrereqError("Error checking drbd helper on node"
3509                                      " '%s': %s" % (node, msg),
3510                                      errors.ECODE_ENVIRON)
3511         node_helper = helpers[node].payload
3512         if node_helper != self.op.drbd_helper:
3513           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3514                                      (node, node_helper), errors.ECODE_ENVIRON)
3515
3516     self.cluster = cluster = self.cfg.GetClusterInfo()
3517     # validate params changes
3518     if self.op.beparams:
3519       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3520       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3521
3522     if self.op.ndparams:
3523       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3524       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3525
3526       # TODO: we need a more general way to handle resetting
3527       # cluster-level parameters to default values
3528       if self.new_ndparams["oob_program"] == "":
3529         self.new_ndparams["oob_program"] = \
3530             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3531
3532     if self.op.nicparams:
3533       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3534       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3535       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3536       nic_errors = []
3537
3538       # check all instances for consistency
3539       for instance in self.cfg.GetAllInstancesInfo().values():
3540         for nic_idx, nic in enumerate(instance.nics):
3541           params_copy = copy.deepcopy(nic.nicparams)
3542           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3543
3544           # check parameter syntax
3545           try:
3546             objects.NIC.CheckParameterSyntax(params_filled)
3547           except errors.ConfigurationError, err:
3548             nic_errors.append("Instance %s, nic/%d: %s" %
3549                               (instance.name, nic_idx, err))
3550
3551           # if we're moving instances to routed, check that they have an ip
3552           target_mode = params_filled[constants.NIC_MODE]
3553           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3554             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3555                               " address" % (instance.name, nic_idx))
3556       if nic_errors:
3557         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3558                                    "\n".join(nic_errors))
3559
3560     # hypervisor list/parameters
3561     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3562     if self.op.hvparams:
3563       for hv_name, hv_dict in self.op.hvparams.items():
3564         if hv_name not in self.new_hvparams:
3565           self.new_hvparams[hv_name] = hv_dict
3566         else:
3567           self.new_hvparams[hv_name].update(hv_dict)
3568
3569     # os hypervisor parameters
3570     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3571     if self.op.os_hvp:
3572       for os_name, hvs in self.op.os_hvp.items():
3573         if os_name not in self.new_os_hvp:
3574           self.new_os_hvp[os_name] = hvs
3575         else:
3576           for hv_name, hv_dict in hvs.items():
3577             if hv_name not in self.new_os_hvp[os_name]:
3578               self.new_os_hvp[os_name][hv_name] = hv_dict
3579             else:
3580               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3581
3582     # os parameters
3583     self.new_osp = objects.FillDict(cluster.osparams, {})
3584     if self.op.osparams:
3585       for os_name, osp in self.op.osparams.items():
3586         if os_name not in self.new_osp:
3587           self.new_osp[os_name] = {}
3588
3589         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3590                                                   use_none=True)
3591
3592         if not self.new_osp[os_name]:
3593           # we removed all parameters
3594           del self.new_osp[os_name]
3595         else:
3596           # check the parameter validity (remote check)
3597           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3598                          os_name, self.new_osp[os_name])
3599
3600     # changes to the hypervisor list
3601     if self.op.enabled_hypervisors is not None:
3602       self.hv_list = self.op.enabled_hypervisors
3603       for hv in self.hv_list:
3604         # if the hypervisor doesn't already exist in the cluster
3605         # hvparams, we initialize it to empty, and then (in both
3606         # cases) we make sure to fill the defaults, as we might not
3607         # have a complete defaults list if the hypervisor wasn't
3608         # enabled before
3609         if hv not in new_hvp:
3610           new_hvp[hv] = {}
3611         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3612         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3613     else:
3614       self.hv_list = cluster.enabled_hypervisors
3615
3616     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3617       # either the enabled list has changed, or the parameters have, validate
3618       for hv_name, hv_params in self.new_hvparams.items():
3619         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3620             (self.op.enabled_hypervisors and
3621              hv_name in self.op.enabled_hypervisors)):
3622           # either this is a new hypervisor, or its parameters have changed
3623           hv_class = hypervisor.GetHypervisor(hv_name)
3624           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3625           hv_class.CheckParameterSyntax(hv_params)
3626           _CheckHVParams(self, node_list, hv_name, hv_params)
3627
3628     if self.op.os_hvp:
3629       # no need to check any newly-enabled hypervisors, since the
3630       # defaults have already been checked in the above code-block
3631       for os_name, os_hvp in self.new_os_hvp.items():
3632         for hv_name, hv_params in os_hvp.items():
3633           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3634           # we need to fill in the new os_hvp on top of the actual hv_p
3635           cluster_defaults = self.new_hvparams.get(hv_name, {})
3636           new_osp = objects.FillDict(cluster_defaults, hv_params)
3637           hv_class = hypervisor.GetHypervisor(hv_name)
3638           hv_class.CheckParameterSyntax(new_osp)
3639           _CheckHVParams(self, node_list, hv_name, new_osp)
3640
3641     if self.op.default_iallocator:
3642       alloc_script = utils.FindFile(self.op.default_iallocator,
3643                                     constants.IALLOCATOR_SEARCH_PATH,
3644                                     os.path.isfile)
3645       if alloc_script is None:
3646         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3647                                    " specified" % self.op.default_iallocator,
3648                                    errors.ECODE_INVAL)
3649
3650   def Exec(self, feedback_fn):
3651     """Change the parameters of the cluster.
3652
3653     """
3654     if self.op.vg_name is not None:
3655       new_volume = self.op.vg_name
3656       if not new_volume:
3657         new_volume = None
3658       if new_volume != self.cfg.GetVGName():
3659         self.cfg.SetVGName(new_volume)
3660       else:
3661         feedback_fn("Cluster LVM configuration already in desired"
3662                     " state, not changing")
3663     if self.op.drbd_helper is not None:
3664       new_helper = self.op.drbd_helper
3665       if not new_helper:
3666         new_helper = None
3667       if new_helper != self.cfg.GetDRBDHelper():
3668         self.cfg.SetDRBDHelper(new_helper)
3669       else:
3670         feedback_fn("Cluster DRBD helper already in desired state,"
3671                     " not changing")
3672     if self.op.hvparams:
3673       self.cluster.hvparams = self.new_hvparams
3674     if self.op.os_hvp:
3675       self.cluster.os_hvp = self.new_os_hvp
3676     if self.op.enabled_hypervisors is not None:
3677       self.cluster.hvparams = self.new_hvparams
3678       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3679     if self.op.beparams:
3680       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3681     if self.op.nicparams:
3682       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3683     if self.op.osparams:
3684       self.cluster.osparams = self.new_osp
3685     if self.op.ndparams:
3686       self.cluster.ndparams = self.new_ndparams
3687
3688     if self.op.candidate_pool_size is not None:
3689       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3690       # we need to update the pool size here, otherwise the save will fail
3691       _AdjustCandidatePool(self, [])
3692
3693     if self.op.maintain_node_health is not None:
3694       self.cluster.maintain_node_health = self.op.maintain_node_health
3695
3696     if self.op.prealloc_wipe_disks is not None:
3697       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3698
3699     if self.op.add_uids is not None:
3700       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3701
3702     if self.op.remove_uids is not None:
3703       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3704
3705     if self.op.uid_pool is not None:
3706       self.cluster.uid_pool = self.op.uid_pool
3707
3708     if self.op.default_iallocator is not None:
3709       self.cluster.default_iallocator = self.op.default_iallocator
3710
3711     if self.op.reserved_lvs is not None:
3712       self.cluster.reserved_lvs = self.op.reserved_lvs
3713
3714     if self.op.use_external_mip_script is not None:
3715       self.cluster.use_external_mip_script = self.op.use_external_mip_script
3716
3717     def helper_os(aname, mods, desc):
3718       desc += " OS list"
3719       lst = getattr(self.cluster, aname)
3720       for key, val in mods:
3721         if key == constants.DDM_ADD:
3722           if val in lst:
3723             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3724           else:
3725             lst.append(val)
3726         elif key == constants.DDM_REMOVE:
3727           if val in lst:
3728             lst.remove(val)
3729           else:
3730             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3731         else:
3732           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3733
3734     if self.op.hidden_os:
3735       helper_os("hidden_os", self.op.hidden_os, "hidden")
3736
3737     if self.op.blacklisted_os:
3738       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3739
3740     if self.op.master_netdev:
3741       master_params = self.cfg.GetMasterNetworkParameters()
3742       ems = self.cfg.GetUseExternalMipScript()
3743       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3744                   self.cluster.master_netdev)
3745       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3746                                                        master_params, ems)
3747       result.Raise("Could not disable the master ip")
3748       feedback_fn("Changing master_netdev from %s to %s" %
3749                   (master_params.netdev, self.op.master_netdev))
3750       self.cluster.master_netdev = self.op.master_netdev
3751
3752     if self.op.master_netmask:
3753       master_params = self.cfg.GetMasterNetworkParameters()
3754       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3755       result = self.rpc.call_node_change_master_netmask(master_params.name,
3756                                                         master_params.netmask,
3757                                                         self.op.master_netmask,
3758                                                         master_params.ip,
3759                                                         master_params.netdev)
3760       if result.fail_msg:
3761         msg = "Could not change the master IP netmask: %s" % result.fail_msg
3762         feedback_fn(msg)
3763
3764       self.cluster.master_netmask = self.op.master_netmask
3765
3766     self.cfg.Update(self.cluster, feedback_fn)
3767
3768     if self.op.master_netdev:
3769       master_params = self.cfg.GetMasterNetworkParameters()
3770       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3771                   self.op.master_netdev)
3772       ems = self.cfg.GetUseExternalMipScript()
3773       result = self.rpc.call_node_activate_master_ip(master_params.name,
3774                                                      master_params, ems)
3775       if result.fail_msg:
3776         self.LogWarning("Could not re-enable the master ip on"
3777                         " the master, please restart manually: %s",
3778                         result.fail_msg)
3779
3780
3781 def _UploadHelper(lu, nodes, fname):
3782   """Helper for uploading a file and showing warnings.
3783
3784   """
3785   if os.path.exists(fname):
3786     result = lu.rpc.call_upload_file(nodes, fname)
3787     for to_node, to_result in result.items():
3788       msg = to_result.fail_msg
3789       if msg:
3790         msg = ("Copy of file %s to node %s failed: %s" %
3791                (fname, to_node, msg))
3792         lu.proc.LogWarning(msg)
3793
3794
3795 def _ComputeAncillaryFiles(cluster, redist):
3796   """Compute files external to Ganeti which need to be consistent.
3797
3798   @type redist: boolean
3799   @param redist: Whether to include files which need to be redistributed
3800
3801   """
3802   # Compute files for all nodes
3803   files_all = set([
3804     constants.SSH_KNOWN_HOSTS_FILE,
3805     constants.CONFD_HMAC_KEY,
3806     constants.CLUSTER_DOMAIN_SECRET_FILE,
3807     constants.SPICE_CERT_FILE,
3808     constants.SPICE_CACERT_FILE,
3809     constants.RAPI_USERS_FILE,
3810     ])
3811
3812   if not redist:
3813     files_all.update(constants.ALL_CERT_FILES)
3814     files_all.update(ssconf.SimpleStore().GetFileList())
3815   else:
3816     # we need to ship at least the RAPI certificate
3817     files_all.add(constants.RAPI_CERT_FILE)
3818
3819   if cluster.modify_etc_hosts:
3820     files_all.add(constants.ETC_HOSTS)
3821
3822   # Files which are optional, these must:
3823   # - be present in one other category as well
3824   # - either exist or not exist on all nodes of that category (mc, vm all)
3825   files_opt = set([
3826     constants.RAPI_USERS_FILE,
3827     ])
3828
3829   # Files which should only be on master candidates
3830   files_mc = set()
3831
3832   if not redist:
3833     files_mc.add(constants.CLUSTER_CONF_FILE)
3834
3835     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3836     # replication
3837     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3838
3839   # Files which should only be on VM-capable nodes
3840   files_vm = set(filename
3841     for hv_name in cluster.enabled_hypervisors
3842     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3843
3844   files_opt |= set(filename
3845     for hv_name in cluster.enabled_hypervisors
3846     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3847
3848   # Filenames in each category must be unique
3849   all_files_set = files_all | files_mc | files_vm
3850   assert (len(all_files_set) ==
3851           sum(map(len, [files_all, files_mc, files_vm]))), \
3852          "Found file listed in more than one file list"
3853
3854   # Optional files must be present in one other category
3855   assert all_files_set.issuperset(files_opt), \
3856          "Optional file not in a different required list"
3857
3858   return (files_all, files_opt, files_mc, files_vm)
3859
3860
3861 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3862   """Distribute additional files which are part of the cluster configuration.
3863
3864   ConfigWriter takes care of distributing the config and ssconf files, but
3865   there are more files which should be distributed to all nodes. This function
3866   makes sure those are copied.
3867
3868   @param lu: calling logical unit
3869   @param additional_nodes: list of nodes not in the config to distribute to
3870   @type additional_vm: boolean
3871   @param additional_vm: whether the additional nodes are vm-capable or not
3872
3873   """
3874   # Gather target nodes
3875   cluster = lu.cfg.GetClusterInfo()
3876   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3877
3878   online_nodes = lu.cfg.GetOnlineNodeList()
3879   vm_nodes = lu.cfg.GetVmCapableNodeList()
3880
3881   if additional_nodes is not None:
3882     online_nodes.extend(additional_nodes)
3883     if additional_vm:
3884       vm_nodes.extend(additional_nodes)
3885
3886   # Never distribute to master node
3887   for nodelist in [online_nodes, vm_nodes]:
3888     if master_info.name in nodelist:
3889       nodelist.remove(master_info.name)
3890
3891   # Gather file lists
3892   (files_all, _, files_mc, files_vm) = \
3893     _ComputeAncillaryFiles(cluster, True)
3894
3895   # Never re-distribute configuration file from here
3896   assert not (constants.CLUSTER_CONF_FILE in files_all or
3897               constants.CLUSTER_CONF_FILE in files_vm)
3898   assert not files_mc, "Master candidates not handled in this function"
3899
3900   filemap = [
3901     (online_nodes, files_all),
3902     (vm_nodes, files_vm),
3903     ]
3904
3905   # Upload the files
3906   for (node_list, files) in filemap:
3907     for fname in files:
3908       _UploadHelper(lu, node_list, fname)
3909
3910
3911 class LUClusterRedistConf(NoHooksLU):
3912   """Force the redistribution of cluster configuration.
3913
3914   This is a very simple LU.
3915
3916   """
3917   REQ_BGL = False
3918
3919   def ExpandNames(self):
3920     self.needed_locks = {
3921       locking.LEVEL_NODE: locking.ALL_SET,
3922     }
3923     self.share_locks[locking.LEVEL_NODE] = 1
3924
3925   def Exec(self, feedback_fn):
3926     """Redistribute the configuration.
3927
3928     """
3929     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3930     _RedistributeAncillaryFiles(self)
3931
3932
3933 class LUClusterActivateMasterIp(NoHooksLU):
3934   """Activate the master IP on the master node.
3935
3936   """
3937   def Exec(self, feedback_fn):
3938     """Activate the master IP.
3939
3940     """
3941     master_params = self.cfg.GetMasterNetworkParameters()
3942     ems = self.cfg.GetUseExternalMipScript()
3943     self.rpc.call_node_activate_master_ip(master_params.name,
3944                                           master_params, ems)
3945
3946
3947 class LUClusterDeactivateMasterIp(NoHooksLU):
3948   """Deactivate the master IP on the master node.
3949
3950   """
3951   def Exec(self, feedback_fn):
3952     """Deactivate the master IP.
3953
3954     """
3955     master_params = self.cfg.GetMasterNetworkParameters()
3956     ems = self.cfg.GetUseExternalMipScript()
3957     self.rpc.call_node_deactivate_master_ip(master_params.name, master_params,
3958                                             ems)
3959
3960
3961 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3962   """Sleep and poll for an instance's disk to sync.
3963
3964   """
3965   if not instance.disks or disks is not None and not disks:
3966     return True
3967
3968   disks = _ExpandCheckDisks(instance, disks)
3969
3970   if not oneshot:
3971     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3972
3973   node = instance.primary_node
3974
3975   for dev in disks:
3976     lu.cfg.SetDiskID(dev, node)
3977
3978   # TODO: Convert to utils.Retry
3979
3980   retries = 0
3981   degr_retries = 10 # in seconds, as we sleep 1 second each time
3982   while True:
3983     max_time = 0
3984     done = True
3985     cumul_degraded = False
3986     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3987     msg = rstats.fail_msg
3988     if msg:
3989       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3990       retries += 1
3991       if retries >= 10:
3992         raise errors.RemoteError("Can't contact node %s for mirror data,"
3993                                  " aborting." % node)
3994       time.sleep(6)
3995       continue
3996     rstats = rstats.payload
3997     retries = 0
3998     for i, mstat in enumerate(rstats):
3999       if mstat is None:
4000         lu.LogWarning("Can't compute data for node %s/%s",
4001                            node, disks[i].iv_name)
4002         continue
4003
4004       cumul_degraded = (cumul_degraded or
4005                         (mstat.is_degraded and mstat.sync_percent is None))
4006       if mstat.sync_percent is not None:
4007         done = False
4008         if mstat.estimated_time is not None:
4009           rem_time = ("%s remaining (estimated)" %
4010                       utils.FormatSeconds(mstat.estimated_time))
4011           max_time = mstat.estimated_time
4012         else:
4013           rem_time = "no time estimate"
4014         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4015                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4016
4017     # if we're done but degraded, let's do a few small retries, to
4018     # make sure we see a stable and not transient situation; therefore
4019     # we force restart of the loop
4020     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4021       logging.info("Degraded disks found, %d retries left", degr_retries)
4022       degr_retries -= 1
4023       time.sleep(1)
4024       continue
4025
4026     if done or oneshot:
4027       break
4028
4029     time.sleep(min(60, max_time))
4030
4031   if done:
4032     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4033   return not cumul_degraded
4034
4035
4036 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4037   """Check that mirrors are not degraded.
4038
4039   The ldisk parameter, if True, will change the test from the
4040   is_degraded attribute (which represents overall non-ok status for
4041   the device(s)) to the ldisk (representing the local storage status).
4042
4043   """
4044   lu.cfg.SetDiskID(dev, node)
4045
4046   result = True
4047
4048   if on_primary or dev.AssembleOnSecondary():
4049     rstats = lu.rpc.call_blockdev_find(node, dev)
4050     msg = rstats.fail_msg
4051     if msg:
4052       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4053       result = False
4054     elif not rstats.payload:
4055       lu.LogWarning("Can't find disk on node %s", node)
4056       result = False
4057     else:
4058       if ldisk:
4059         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4060       else:
4061         result = result and not rstats.payload.is_degraded
4062
4063   if dev.children:
4064     for child in dev.children:
4065       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4066
4067   return result
4068
4069
4070 class LUOobCommand(NoHooksLU):
4071   """Logical unit for OOB handling.
4072
4073   """
4074   REG_BGL = False
4075   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4076
4077   def ExpandNames(self):
4078     """Gather locks we need.
4079
4080     """
4081     if self.op.node_names:
4082       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4083       lock_names = self.op.node_names
4084     else:
4085       lock_names = locking.ALL_SET
4086
4087     self.needed_locks = {
4088       locking.LEVEL_NODE: lock_names,
4089       }
4090
4091   def CheckPrereq(self):
4092     """Check prerequisites.
4093
4094     This checks:
4095      - the node exists in the configuration
4096      - OOB is supported
4097
4098     Any errors are signaled by raising errors.OpPrereqError.
4099
4100     """
4101     self.nodes = []
4102     self.master_node = self.cfg.GetMasterNode()
4103
4104     assert self.op.power_delay >= 0.0
4105
4106     if self.op.node_names:
4107       if (self.op.command in self._SKIP_MASTER and
4108           self.master_node in self.op.node_names):
4109         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4110         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4111
4112         if master_oob_handler:
4113           additional_text = ("run '%s %s %s' if you want to operate on the"
4114                              " master regardless") % (master_oob_handler,
4115                                                       self.op.command,
4116                                                       self.master_node)
4117         else:
4118           additional_text = "it does not support out-of-band operations"
4119
4120         raise errors.OpPrereqError(("Operating on the master node %s is not"
4121                                     " allowed for %s; %s") %
4122                                    (self.master_node, self.op.command,
4123                                     additional_text), errors.ECODE_INVAL)
4124     else:
4125       self.op.node_names = self.cfg.GetNodeList()
4126       if self.op.command in self._SKIP_MASTER:
4127         self.op.node_names.remove(self.master_node)
4128
4129     if self.op.command in self._SKIP_MASTER:
4130       assert self.master_node not in self.op.node_names
4131
4132     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4133       if node is None:
4134         raise errors.OpPrereqError("Node %s not found" % node_name,
4135                                    errors.ECODE_NOENT)
4136       else:
4137         self.nodes.append(node)
4138
4139       if (not self.op.ignore_status and
4140           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4141         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4142                                     " not marked offline") % node_name,
4143                                    errors.ECODE_STATE)
4144
4145   def Exec(self, feedback_fn):
4146     """Execute OOB and return result if we expect any.
4147
4148     """
4149     master_node = self.master_node
4150     ret = []
4151
4152     for idx, node in enumerate(utils.NiceSort(self.nodes,
4153                                               key=lambda node: node.name)):
4154       node_entry = [(constants.RS_NORMAL, node.name)]
4155       ret.append(node_entry)
4156
4157       oob_program = _SupportsOob(self.cfg, node)
4158
4159       if not oob_program:
4160         node_entry.append((constants.RS_UNAVAIL, None))
4161         continue
4162
4163       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4164                    self.op.command, oob_program, node.name)
4165       result = self.rpc.call_run_oob(master_node, oob_program,
4166                                      self.op.command, node.name,
4167                                      self.op.timeout)
4168
4169       if result.fail_msg:
4170         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4171                         node.name, result.fail_msg)
4172         node_entry.append((constants.RS_NODATA, None))
4173       else:
4174         try:
4175           self._CheckPayload(result)
4176         except errors.OpExecError, err:
4177           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4178                           node.name, err)
4179           node_entry.append((constants.RS_NODATA, None))
4180         else:
4181           if self.op.command == constants.OOB_HEALTH:
4182             # For health we should log important events
4183             for item, status in result.payload:
4184               if status in [constants.OOB_STATUS_WARNING,
4185                             constants.OOB_STATUS_CRITICAL]:
4186                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4187                                 item, node.name, status)
4188
4189           if self.op.command == constants.OOB_POWER_ON:
4190             node.powered = True
4191           elif self.op.command == constants.OOB_POWER_OFF:
4192             node.powered = False
4193           elif self.op.command == constants.OOB_POWER_STATUS:
4194             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4195             if powered != node.powered:
4196               logging.warning(("Recorded power state (%s) of node '%s' does not"
4197                                " match actual power state (%s)"), node.powered,
4198                               node.name, powered)
4199
4200           # For configuration changing commands we should update the node
4201           if self.op.command in (constants.OOB_POWER_ON,
4202                                  constants.OOB_POWER_OFF):
4203             self.cfg.Update(node, feedback_fn)
4204
4205           node_entry.append((constants.RS_NORMAL, result.payload))
4206
4207           if (self.op.command == constants.OOB_POWER_ON and
4208               idx < len(self.nodes) - 1):
4209             time.sleep(self.op.power_delay)
4210
4211     return ret
4212
4213   def _CheckPayload(self, result):
4214     """Checks if the payload is valid.
4215
4216     @param result: RPC result
4217     @raises errors.OpExecError: If payload is not valid
4218
4219     """
4220     errs = []
4221     if self.op.command == constants.OOB_HEALTH:
4222       if not isinstance(result.payload, list):
4223         errs.append("command 'health' is expected to return a list but got %s" %
4224                     type(result.payload))
4225       else:
4226         for item, status in result.payload:
4227           if status not in constants.OOB_STATUSES:
4228             errs.append("health item '%s' has invalid status '%s'" %
4229                         (item, status))
4230
4231     if self.op.command == constants.OOB_POWER_STATUS:
4232       if not isinstance(result.payload, dict):
4233         errs.append("power-status is expected to return a dict but got %s" %
4234                     type(result.payload))
4235
4236     if self.op.command in [
4237         constants.OOB_POWER_ON,
4238         constants.OOB_POWER_OFF,
4239         constants.OOB_POWER_CYCLE,
4240         ]:
4241       if result.payload is not None:
4242         errs.append("%s is expected to not return payload but got '%s'" %
4243                     (self.op.command, result.payload))
4244
4245     if errs:
4246       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4247                                utils.CommaJoin(errs))
4248
4249
4250 class _OsQuery(_QueryBase):
4251   FIELDS = query.OS_FIELDS
4252
4253   def ExpandNames(self, lu):
4254     # Lock all nodes in shared mode
4255     # Temporary removal of locks, should be reverted later
4256     # TODO: reintroduce locks when they are lighter-weight
4257     lu.needed_locks = {}
4258     #self.share_locks[locking.LEVEL_NODE] = 1
4259     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4260
4261     # The following variables interact with _QueryBase._GetNames
4262     if self.names:
4263       self.wanted = self.names
4264     else:
4265       self.wanted = locking.ALL_SET
4266
4267     self.do_locking = self.use_locking
4268
4269   def DeclareLocks(self, lu, level):
4270     pass
4271
4272   @staticmethod
4273   def _DiagnoseByOS(rlist):
4274     """Remaps a per-node return list into an a per-os per-node dictionary
4275
4276     @param rlist: a map with node names as keys and OS objects as values
4277
4278     @rtype: dict
4279     @return: a dictionary with osnames as keys and as value another
4280         map, with nodes as keys and tuples of (path, status, diagnose,
4281         variants, parameters, api_versions) as values, eg::
4282
4283           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4284                                      (/srv/..., False, "invalid api")],
4285                            "node2": [(/srv/..., True, "", [], [])]}
4286           }
4287
4288     """
4289     all_os = {}
4290     # we build here the list of nodes that didn't fail the RPC (at RPC
4291     # level), so that nodes with a non-responding node daemon don't
4292     # make all OSes invalid
4293     good_nodes = [node_name for node_name in rlist
4294                   if not rlist[node_name].fail_msg]
4295     for node_name, nr in rlist.items():
4296       if nr.fail_msg or not nr.payload:
4297         continue
4298       for (name, path, status, diagnose, variants,
4299            params, api_versions) in nr.payload:
4300         if name not in all_os:
4301           # build a list of nodes for this os containing empty lists
4302           # for each node in node_list
4303           all_os[name] = {}
4304           for nname in good_nodes:
4305             all_os[name][nname] = []
4306         # convert params from [name, help] to (name, help)
4307         params = [tuple(v) for v in params]
4308         all_os[name][node_name].append((path, status, diagnose,
4309                                         variants, params, api_versions))
4310     return all_os
4311
4312   def _GetQueryData(self, lu):
4313     """Computes the list of nodes and their attributes.
4314
4315     """
4316     # Locking is not used
4317     assert not (compat.any(lu.glm.is_owned(level)
4318                            for level in locking.LEVELS
4319                            if level != locking.LEVEL_CLUSTER) or
4320                 self.do_locking or self.use_locking)
4321
4322     valid_nodes = [node.name
4323                    for node in lu.cfg.GetAllNodesInfo().values()
4324                    if not node.offline and node.vm_capable]
4325     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4326     cluster = lu.cfg.GetClusterInfo()
4327
4328     data = {}
4329
4330     for (os_name, os_data) in pol.items():
4331       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4332                           hidden=(os_name in cluster.hidden_os),
4333                           blacklisted=(os_name in cluster.blacklisted_os))
4334
4335       variants = set()
4336       parameters = set()
4337       api_versions = set()
4338
4339       for idx, osl in enumerate(os_data.values()):
4340         info.valid = bool(info.valid and osl and osl[0][1])
4341         if not info.valid:
4342           break
4343
4344         (node_variants, node_params, node_api) = osl[0][3:6]
4345         if idx == 0:
4346           # First entry
4347           variants.update(node_variants)
4348           parameters.update(node_params)
4349           api_versions.update(node_api)
4350         else:
4351           # Filter out inconsistent values
4352           variants.intersection_update(node_variants)
4353           parameters.intersection_update(node_params)
4354           api_versions.intersection_update(node_api)
4355
4356       info.variants = list(variants)
4357       info.parameters = list(parameters)
4358       info.api_versions = list(api_versions)
4359
4360       data[os_name] = info
4361
4362     # Prepare data in requested order
4363     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4364             if name in data]
4365
4366
4367 class LUOsDiagnose(NoHooksLU):
4368   """Logical unit for OS diagnose/query.
4369
4370   """
4371   REQ_BGL = False
4372
4373   @staticmethod
4374   def _BuildFilter(fields, names):
4375     """Builds a filter for querying OSes.
4376
4377     """
4378     name_filter = qlang.MakeSimpleFilter("name", names)
4379
4380     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4381     # respective field is not requested
4382     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4383                      for fname in ["hidden", "blacklisted"]
4384                      if fname not in fields]
4385     if "valid" not in fields:
4386       status_filter.append([qlang.OP_TRUE, "valid"])
4387
4388     if status_filter:
4389       status_filter.insert(0, qlang.OP_AND)
4390     else:
4391       status_filter = None
4392
4393     if name_filter and status_filter:
4394       return [qlang.OP_AND, name_filter, status_filter]
4395     elif name_filter:
4396       return name_filter
4397     else:
4398       return status_filter
4399
4400   def CheckArguments(self):
4401     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4402                        self.op.output_fields, False)
4403
4404   def ExpandNames(self):
4405     self.oq.ExpandNames(self)
4406
4407   def Exec(self, feedback_fn):
4408     return self.oq.OldStyleQuery(self)
4409
4410
4411 class LUNodeRemove(LogicalUnit):
4412   """Logical unit for removing a node.
4413
4414   """
4415   HPATH = "node-remove"
4416   HTYPE = constants.HTYPE_NODE
4417
4418   def BuildHooksEnv(self):
4419     """Build hooks env.
4420
4421     This doesn't run on the target node in the pre phase as a failed
4422     node would then be impossible to remove.
4423
4424     """
4425     return {
4426       "OP_TARGET": self.op.node_name,
4427       "NODE_NAME": self.op.node_name,
4428       }
4429
4430   def BuildHooksNodes(self):
4431     """Build hooks nodes.
4432
4433     """
4434     all_nodes = self.cfg.GetNodeList()
4435     try:
4436       all_nodes.remove(self.op.node_name)
4437     except ValueError:
4438       logging.warning("Node '%s', which is about to be removed, was not found"
4439                       " in the list of all nodes", self.op.node_name)
4440     return (all_nodes, all_nodes)
4441
4442   def CheckPrereq(self):
4443     """Check prerequisites.
4444
4445     This checks:
4446      - the node exists in the configuration
4447      - it does not have primary or secondary instances
4448      - it's not the master
4449
4450     Any errors are signaled by raising errors.OpPrereqError.
4451
4452     """
4453     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4454     node = self.cfg.GetNodeInfo(self.op.node_name)
4455     assert node is not None
4456
4457     masternode = self.cfg.GetMasterNode()
4458     if node.name == masternode:
4459       raise errors.OpPrereqError("Node is the master node, failover to another"
4460                                  " node is required", errors.ECODE_INVAL)
4461
4462     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4463       if node.name in instance.all_nodes:
4464         raise errors.OpPrereqError("Instance %s is still running on the node,"
4465                                    " please remove first" % instance_name,
4466                                    errors.ECODE_INVAL)
4467     self.op.node_name = node.name
4468     self.node = node
4469
4470   def Exec(self, feedback_fn):
4471     """Removes the node from the cluster.
4472
4473     """
4474     node = self.node
4475     logging.info("Stopping the node daemon and removing configs from node %s",
4476                  node.name)
4477
4478     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4479
4480     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4481       "Not owning BGL"
4482
4483     # Promote nodes to master candidate as needed
4484     _AdjustCandidatePool(self, exceptions=[node.name])
4485     self.context.RemoveNode(node.name)
4486
4487     # Run post hooks on the node before it's removed
4488     _RunPostHook(self, node.name)
4489
4490     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4491     msg = result.fail_msg
4492     if msg:
4493       self.LogWarning("Errors encountered on the remote node while leaving"
4494                       " the cluster: %s", msg)
4495
4496     # Remove node from our /etc/hosts
4497     if self.cfg.GetClusterInfo().modify_etc_hosts:
4498       master_node = self.cfg.GetMasterNode()
4499       result = self.rpc.call_etc_hosts_modify(master_node,
4500                                               constants.ETC_HOSTS_REMOVE,
4501                                               node.name, None)
4502       result.Raise("Can't update hosts file with new host data")
4503       _RedistributeAncillaryFiles(self)
4504
4505
4506 class _NodeQuery(_QueryBase):
4507   FIELDS = query.NODE_FIELDS
4508
4509   def ExpandNames(self, lu):
4510     lu.needed_locks = {}
4511     lu.share_locks = _ShareAll()
4512
4513     if self.names:
4514       self.wanted = _GetWantedNodes(lu, self.names)
4515     else:
4516       self.wanted = locking.ALL_SET
4517
4518     self.do_locking = (self.use_locking and
4519                        query.NQ_LIVE in self.requested_data)
4520
4521     if self.do_locking:
4522       # If any non-static field is requested we need to lock the nodes
4523       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4524
4525   def DeclareLocks(self, lu, level):
4526     pass
4527
4528   def _GetQueryData(self, lu):
4529     """Computes the list of nodes and their attributes.
4530
4531     """
4532     all_info = lu.cfg.GetAllNodesInfo()
4533
4534     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4535
4536     # Gather data as requested
4537     if query.NQ_LIVE in self.requested_data:
4538       # filter out non-vm_capable nodes
4539       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4540
4541       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4542                                         lu.cfg.GetHypervisorType())
4543       live_data = dict((name, nresult.payload)
4544                        for (name, nresult) in node_data.items()
4545                        if not nresult.fail_msg and nresult.payload)
4546     else:
4547       live_data = None
4548
4549     if query.NQ_INST in self.requested_data:
4550       node_to_primary = dict([(name, set()) for name in nodenames])
4551       node_to_secondary = dict([(name, set()) for name in nodenames])
4552
4553       inst_data = lu.cfg.GetAllInstancesInfo()
4554
4555       for inst in inst_data.values():
4556         if inst.primary_node in node_to_primary:
4557           node_to_primary[inst.primary_node].add(inst.name)
4558         for secnode in inst.secondary_nodes:
4559           if secnode in node_to_secondary:
4560             node_to_secondary[secnode].add(inst.name)
4561     else:
4562       node_to_primary = None
4563       node_to_secondary = None
4564
4565     if query.NQ_OOB in self.requested_data:
4566       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4567                          for name, node in all_info.iteritems())
4568     else:
4569       oob_support = None
4570
4571     if query.NQ_GROUP in self.requested_data:
4572       groups = lu.cfg.GetAllNodeGroupsInfo()
4573     else:
4574       groups = {}
4575
4576     return query.NodeQueryData([all_info[name] for name in nodenames],
4577                                live_data, lu.cfg.GetMasterNode(),
4578                                node_to_primary, node_to_secondary, groups,
4579                                oob_support, lu.cfg.GetClusterInfo())
4580
4581
4582 class LUNodeQuery(NoHooksLU):
4583   """Logical unit for querying nodes.
4584
4585   """
4586   # pylint: disable=W0142
4587   REQ_BGL = False
4588
4589   def CheckArguments(self):
4590     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4591                          self.op.output_fields, self.op.use_locking)
4592
4593   def ExpandNames(self):
4594     self.nq.ExpandNames(self)
4595
4596   def DeclareLocks(self, level):
4597     self.nq.DeclareLocks(self, level)
4598
4599   def Exec(self, feedback_fn):
4600     return self.nq.OldStyleQuery(self)
4601
4602
4603 class LUNodeQueryvols(NoHooksLU):
4604   """Logical unit for getting volumes on node(s).
4605
4606   """
4607   REQ_BGL = False
4608   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4609   _FIELDS_STATIC = utils.FieldSet("node")
4610
4611   def CheckArguments(self):
4612     _CheckOutputFields(static=self._FIELDS_STATIC,
4613                        dynamic=self._FIELDS_DYNAMIC,
4614                        selected=self.op.output_fields)
4615
4616   def ExpandNames(self):
4617     self.share_locks = _ShareAll()
4618     self.needed_locks = {}
4619
4620     if not self.op.nodes:
4621       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4622     else:
4623       self.needed_locks[locking.LEVEL_NODE] = \
4624         _GetWantedNodes(self, self.op.nodes)
4625
4626   def Exec(self, feedback_fn):
4627     """Computes the list of nodes and their attributes.
4628
4629     """
4630     nodenames = self.owned_locks(locking.LEVEL_NODE)
4631     volumes = self.rpc.call_node_volumes(nodenames)
4632
4633     ilist = self.cfg.GetAllInstancesInfo()
4634     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4635
4636     output = []
4637     for node in nodenames:
4638       nresult = volumes[node]
4639       if nresult.offline:
4640         continue
4641       msg = nresult.fail_msg
4642       if msg:
4643         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4644         continue
4645
4646       node_vols = sorted(nresult.payload,
4647                          key=operator.itemgetter("dev"))
4648
4649       for vol in node_vols:
4650         node_output = []
4651         for field in self.op.output_fields:
4652           if field == "node":
4653             val = node
4654           elif field == "phys":
4655             val = vol["dev"]
4656           elif field == "vg":
4657             val = vol["vg"]
4658           elif field == "name":
4659             val = vol["name"]
4660           elif field == "size":
4661             val = int(float(vol["size"]))
4662           elif field == "instance":
4663             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4664           else:
4665             raise errors.ParameterError(field)
4666           node_output.append(str(val))
4667
4668         output.append(node_output)
4669
4670     return output
4671
4672
4673 class LUNodeQueryStorage(NoHooksLU):
4674   """Logical unit for getting information on storage units on node(s).
4675
4676   """
4677   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4678   REQ_BGL = False
4679
4680   def CheckArguments(self):
4681     _CheckOutputFields(static=self._FIELDS_STATIC,
4682                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4683                        selected=self.op.output_fields)
4684
4685   def ExpandNames(self):
4686     self.share_locks = _ShareAll()
4687     self.needed_locks = {}
4688
4689     if self.op.nodes:
4690       self.needed_locks[locking.LEVEL_NODE] = \
4691         _GetWantedNodes(self, self.op.nodes)
4692     else:
4693       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4694
4695   def Exec(self, feedback_fn):
4696     """Computes the list of nodes and their attributes.
4697
4698     """
4699     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4700
4701     # Always get name to sort by
4702     if constants.SF_NAME in self.op.output_fields:
4703       fields = self.op.output_fields[:]
4704     else:
4705       fields = [constants.SF_NAME] + self.op.output_fields
4706
4707     # Never ask for node or type as it's only known to the LU
4708     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4709       while extra in fields:
4710         fields.remove(extra)
4711
4712     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4713     name_idx = field_idx[constants.SF_NAME]
4714
4715     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4716     data = self.rpc.call_storage_list(self.nodes,
4717                                       self.op.storage_type, st_args,
4718                                       self.op.name, fields)
4719
4720     result = []
4721
4722     for node in utils.NiceSort(self.nodes):
4723       nresult = data[node]
4724       if nresult.offline:
4725         continue
4726
4727       msg = nresult.fail_msg
4728       if msg:
4729         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4730         continue
4731
4732       rows = dict([(row[name_idx], row) for row in nresult.payload])
4733
4734       for name in utils.NiceSort(rows.keys()):
4735         row = rows[name]
4736
4737         out = []
4738
4739         for field in self.op.output_fields:
4740           if field == constants.SF_NODE:
4741             val = node
4742           elif field == constants.SF_TYPE:
4743             val = self.op.storage_type
4744           elif field in field_idx:
4745             val = row[field_idx[field]]
4746           else:
4747             raise errors.ParameterError(field)
4748
4749           out.append(val)
4750
4751         result.append(out)
4752
4753     return result
4754
4755
4756 class _InstanceQuery(_QueryBase):
4757   FIELDS = query.INSTANCE_FIELDS
4758
4759   def ExpandNames(self, lu):
4760     lu.needed_locks = {}
4761     lu.share_locks = _ShareAll()
4762
4763     if self.names:
4764       self.wanted = _GetWantedInstances(lu, self.names)
4765     else:
4766       self.wanted = locking.ALL_SET
4767
4768     self.do_locking = (self.use_locking and
4769                        query.IQ_LIVE in self.requested_data)
4770     if self.do_locking:
4771       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4772       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4773       lu.needed_locks[locking.LEVEL_NODE] = []
4774       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4775
4776     self.do_grouplocks = (self.do_locking and
4777                           query.IQ_NODES in self.requested_data)
4778
4779   def DeclareLocks(self, lu, level):
4780     if self.do_locking:
4781       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4782         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4783
4784         # Lock all groups used by instances optimistically; this requires going
4785         # via the node before it's locked, requiring verification later on
4786         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4787           set(group_uuid
4788               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4789               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4790       elif level == locking.LEVEL_NODE:
4791         lu._LockInstancesNodes() # pylint: disable=W0212
4792
4793   @staticmethod
4794   def _CheckGroupLocks(lu):
4795     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4796     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4797
4798     # Check if node groups for locked instances are still correct
4799     for instance_name in owned_instances:
4800       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4801
4802   def _GetQueryData(self, lu):
4803     """Computes the list of instances and their attributes.
4804
4805     """
4806     if self.do_grouplocks:
4807       self._CheckGroupLocks(lu)
4808
4809     cluster = lu.cfg.GetClusterInfo()
4810     all_info = lu.cfg.GetAllInstancesInfo()
4811
4812     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4813
4814     instance_list = [all_info[name] for name in instance_names]
4815     nodes = frozenset(itertools.chain(*(inst.all_nodes
4816                                         for inst in instance_list)))
4817     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4818     bad_nodes = []
4819     offline_nodes = []
4820     wrongnode_inst = set()
4821
4822     # Gather data as requested
4823     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4824       live_data = {}
4825       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4826       for name in nodes:
4827         result = node_data[name]
4828         if result.offline:
4829           # offline nodes will be in both lists
4830           assert result.fail_msg
4831           offline_nodes.append(name)
4832         if result.fail_msg:
4833           bad_nodes.append(name)
4834         elif result.payload:
4835           for inst in result.payload:
4836             if inst in all_info:
4837               if all_info[inst].primary_node == name:
4838                 live_data.update(result.payload)
4839               else:
4840                 wrongnode_inst.add(inst)
4841             else:
4842               # orphan instance; we don't list it here as we don't
4843               # handle this case yet in the output of instance listing
4844               logging.warning("Orphan instance '%s' found on node %s",
4845                               inst, name)
4846         # else no instance is alive
4847     else:
4848       live_data = {}
4849
4850     if query.IQ_DISKUSAGE in self.requested_data:
4851       disk_usage = dict((inst.name,
4852                          _ComputeDiskSize(inst.disk_template,
4853                                           [{constants.IDISK_SIZE: disk.size}
4854                                            for disk in inst.disks]))
4855                         for inst in instance_list)
4856     else:
4857       disk_usage = None
4858
4859     if query.IQ_CONSOLE in self.requested_data:
4860       consinfo = {}
4861       for inst in instance_list:
4862         if inst.name in live_data:
4863           # Instance is running
4864           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4865         else:
4866           consinfo[inst.name] = None
4867       assert set(consinfo.keys()) == set(instance_names)
4868     else:
4869       consinfo = None
4870
4871     if query.IQ_NODES in self.requested_data:
4872       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4873                                             instance_list)))
4874       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4875       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4876                     for uuid in set(map(operator.attrgetter("group"),
4877                                         nodes.values())))
4878     else:
4879       nodes = None
4880       groups = None
4881
4882     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4883                                    disk_usage, offline_nodes, bad_nodes,
4884                                    live_data, wrongnode_inst, consinfo,
4885                                    nodes, groups)
4886
4887
4888 class LUQuery(NoHooksLU):
4889   """Query for resources/items of a certain kind.
4890
4891   """
4892   # pylint: disable=W0142
4893   REQ_BGL = False
4894
4895   def CheckArguments(self):
4896     qcls = _GetQueryImplementation(self.op.what)
4897
4898     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4899
4900   def ExpandNames(self):
4901     self.impl.ExpandNames(self)
4902
4903   def DeclareLocks(self, level):
4904     self.impl.DeclareLocks(self, level)
4905
4906   def Exec(self, feedback_fn):
4907     return self.impl.NewStyleQuery(self)
4908
4909
4910 class LUQueryFields(NoHooksLU):
4911   """Query for resources/items of a certain kind.
4912
4913   """
4914   # pylint: disable=W0142
4915   REQ_BGL = False
4916
4917   def CheckArguments(self):
4918     self.qcls = _GetQueryImplementation(self.op.what)
4919
4920   def ExpandNames(self):
4921     self.needed_locks = {}
4922
4923   def Exec(self, feedback_fn):
4924     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4925
4926
4927 class LUNodeModifyStorage(NoHooksLU):
4928   """Logical unit for modifying a storage volume on a node.
4929
4930   """
4931   REQ_BGL = False
4932
4933   def CheckArguments(self):
4934     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4935
4936     storage_type = self.op.storage_type
4937
4938     try:
4939       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4940     except KeyError:
4941       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4942                                  " modified" % storage_type,
4943                                  errors.ECODE_INVAL)
4944
4945     diff = set(self.op.changes.keys()) - modifiable
4946     if diff:
4947       raise errors.OpPrereqError("The following fields can not be modified for"
4948                                  " storage units of type '%s': %r" %
4949                                  (storage_type, list(diff)),
4950                                  errors.ECODE_INVAL)
4951
4952   def ExpandNames(self):
4953     self.needed_locks = {
4954       locking.LEVEL_NODE: self.op.node_name,
4955       }
4956
4957   def Exec(self, feedback_fn):
4958     """Computes the list of nodes and their attributes.
4959
4960     """
4961     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4962     result = self.rpc.call_storage_modify(self.op.node_name,
4963                                           self.op.storage_type, st_args,
4964                                           self.op.name, self.op.changes)
4965     result.Raise("Failed to modify storage unit '%s' on %s" %
4966                  (self.op.name, self.op.node_name))
4967
4968
4969 class LUNodeAdd(LogicalUnit):
4970   """Logical unit for adding node to the cluster.
4971
4972   """
4973   HPATH = "node-add"
4974   HTYPE = constants.HTYPE_NODE
4975   _NFLAGS = ["master_capable", "vm_capable"]
4976
4977   def CheckArguments(self):
4978     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4979     # validate/normalize the node name
4980     self.hostname = netutils.GetHostname(name=self.op.node_name,
4981                                          family=self.primary_ip_family)
4982     self.op.node_name = self.hostname.name
4983
4984     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4985       raise errors.OpPrereqError("Cannot readd the master node",
4986                                  errors.ECODE_STATE)
4987
4988     if self.op.readd and self.op.group:
4989       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4990                                  " being readded", errors.ECODE_INVAL)
4991
4992   def BuildHooksEnv(self):
4993     """Build hooks env.
4994
4995     This will run on all nodes before, and on all nodes + the new node after.
4996
4997     """
4998     return {
4999       "OP_TARGET": self.op.node_name,
5000       "NODE_NAME": self.op.node_name,
5001       "NODE_PIP": self.op.primary_ip,
5002       "NODE_SIP": self.op.secondary_ip,
5003       "MASTER_CAPABLE": str(self.op.master_capable),
5004       "VM_CAPABLE": str(self.op.vm_capable),
5005       }
5006
5007   def BuildHooksNodes(self):
5008     """Build hooks nodes.
5009
5010     """
5011     # Exclude added node
5012     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5013     post_nodes = pre_nodes + [self.op.node_name, ]
5014
5015     return (pre_nodes, post_nodes)
5016
5017   def CheckPrereq(self):
5018     """Check prerequisites.
5019
5020     This checks:
5021      - the new node is not already in the config
5022      - it is resolvable
5023      - its parameters (single/dual homed) matches the cluster
5024
5025     Any errors are signaled by raising errors.OpPrereqError.
5026
5027     """
5028     cfg = self.cfg
5029     hostname = self.hostname
5030     node = hostname.name
5031     primary_ip = self.op.primary_ip = hostname.ip
5032     if self.op.secondary_ip is None:
5033       if self.primary_ip_family == netutils.IP6Address.family:
5034         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5035                                    " IPv4 address must be given as secondary",
5036                                    errors.ECODE_INVAL)
5037       self.op.secondary_ip = primary_ip
5038
5039     secondary_ip = self.op.secondary_ip
5040     if not netutils.IP4Address.IsValid(secondary_ip):
5041       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5042                                  " address" % secondary_ip, errors.ECODE_INVAL)
5043
5044     node_list = cfg.GetNodeList()
5045     if not self.op.readd and node in node_list:
5046       raise errors.OpPrereqError("Node %s is already in the configuration" %
5047                                  node, errors.ECODE_EXISTS)
5048     elif self.op.readd and node not in node_list:
5049       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5050                                  errors.ECODE_NOENT)
5051
5052     self.changed_primary_ip = False
5053
5054     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5055       if self.op.readd and node == existing_node_name:
5056         if existing_node.secondary_ip != secondary_ip:
5057           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5058                                      " address configuration as before",
5059                                      errors.ECODE_INVAL)
5060         if existing_node.primary_ip != primary_ip:
5061           self.changed_primary_ip = True
5062
5063         continue
5064
5065       if (existing_node.primary_ip == primary_ip or
5066           existing_node.secondary_ip == primary_ip or
5067           existing_node.primary_ip == secondary_ip or
5068           existing_node.secondary_ip == secondary_ip):
5069         raise errors.OpPrereqError("New node ip address(es) conflict with"
5070                                    " existing node %s" % existing_node.name,
5071                                    errors.ECODE_NOTUNIQUE)
5072
5073     # After this 'if' block, None is no longer a valid value for the
5074     # _capable op attributes
5075     if self.op.readd:
5076       old_node = self.cfg.GetNodeInfo(node)
5077       assert old_node is not None, "Can't retrieve locked node %s" % node
5078       for attr in self._NFLAGS:
5079         if getattr(self.op, attr) is None:
5080           setattr(self.op, attr, getattr(old_node, attr))
5081     else:
5082       for attr in self._NFLAGS:
5083         if getattr(self.op, attr) is None:
5084           setattr(self.op, attr, True)
5085
5086     if self.op.readd and not self.op.vm_capable:
5087       pri, sec = cfg.GetNodeInstances(node)
5088       if pri or sec:
5089         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5090                                    " flag set to false, but it already holds"
5091                                    " instances" % node,
5092                                    errors.ECODE_STATE)
5093
5094     # check that the type of the node (single versus dual homed) is the
5095     # same as for the master
5096     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5097     master_singlehomed = myself.secondary_ip == myself.primary_ip
5098     newbie_singlehomed = secondary_ip == primary_ip
5099     if master_singlehomed != newbie_singlehomed:
5100       if master_singlehomed:
5101         raise errors.OpPrereqError("The master has no secondary ip but the"
5102                                    " new node has one",
5103                                    errors.ECODE_INVAL)
5104       else:
5105         raise errors.OpPrereqError("The master has a secondary ip but the"
5106                                    " new node doesn't have one",
5107                                    errors.ECODE_INVAL)
5108
5109     # checks reachability
5110     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5111       raise errors.OpPrereqError("Node not reachable by ping",
5112                                  errors.ECODE_ENVIRON)
5113
5114     if not newbie_singlehomed:
5115       # check reachability from my secondary ip to newbie's secondary ip
5116       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5117                            source=myself.secondary_ip):
5118         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5119                                    " based ping to node daemon port",
5120                                    errors.ECODE_ENVIRON)
5121
5122     if self.op.readd:
5123       exceptions = [node]
5124     else:
5125       exceptions = []
5126
5127     if self.op.master_capable:
5128       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5129     else:
5130       self.master_candidate = False
5131
5132     if self.op.readd:
5133       self.new_node = old_node
5134     else:
5135       node_group = cfg.LookupNodeGroup(self.op.group)
5136       self.new_node = objects.Node(name=node,
5137                                    primary_ip=primary_ip,
5138                                    secondary_ip=secondary_ip,
5139                                    master_candidate=self.master_candidate,
5140                                    offline=False, drained=False,
5141                                    group=node_group)
5142
5143     if self.op.ndparams:
5144       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5145
5146   def Exec(self, feedback_fn):
5147     """Adds the new node to the cluster.
5148
5149     """
5150     new_node = self.new_node
5151     node = new_node.name
5152
5153     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5154       "Not owning BGL"
5155
5156     # We adding a new node so we assume it's powered
5157     new_node.powered = True
5158
5159     # for re-adds, reset the offline/drained/master-candidate flags;
5160     # we need to reset here, otherwise offline would prevent RPC calls
5161     # later in the procedure; this also means that if the re-add
5162     # fails, we are left with a non-offlined, broken node
5163     if self.op.readd:
5164       new_node.drained = new_node.offline = False # pylint: disable=W0201
5165       self.LogInfo("Readding a node, the offline/drained flags were reset")
5166       # if we demote the node, we do cleanup later in the procedure
5167       new_node.master_candidate = self.master_candidate
5168       if self.changed_primary_ip:
5169         new_node.primary_ip = self.op.primary_ip
5170
5171     # copy the master/vm_capable flags
5172     for attr in self._NFLAGS:
5173       setattr(new_node, attr, getattr(self.op, attr))
5174
5175     # notify the user about any possible mc promotion
5176     if new_node.master_candidate:
5177       self.LogInfo("Node will be a master candidate")
5178
5179     if self.op.ndparams:
5180       new_node.ndparams = self.op.ndparams
5181     else:
5182       new_node.ndparams = {}
5183
5184     # check connectivity
5185     result = self.rpc.call_version([node])[node]
5186     result.Raise("Can't get version information from node %s" % node)
5187     if constants.PROTOCOL_VERSION == result.payload:
5188       logging.info("Communication to node %s fine, sw version %s match",
5189                    node, result.payload)
5190     else:
5191       raise errors.OpExecError("Version mismatch master version %s,"
5192                                " node version %s" %
5193                                (constants.PROTOCOL_VERSION, result.payload))
5194
5195     # Add node to our /etc/hosts, and add key to known_hosts
5196     if self.cfg.GetClusterInfo().modify_etc_hosts:
5197       master_node = self.cfg.GetMasterNode()
5198       result = self.rpc.call_etc_hosts_modify(master_node,
5199                                               constants.ETC_HOSTS_ADD,
5200                                               self.hostname.name,
5201                                               self.hostname.ip)
5202       result.Raise("Can't update hosts file with new host data")
5203
5204     if new_node.secondary_ip != new_node.primary_ip:
5205       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5206                                False)
5207
5208     node_verify_list = [self.cfg.GetMasterNode()]
5209     node_verify_param = {
5210       constants.NV_NODELIST: ([node], {}),
5211       # TODO: do a node-net-test as well?
5212     }
5213
5214     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5215                                        self.cfg.GetClusterName())
5216     for verifier in node_verify_list:
5217       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5218       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5219       if nl_payload:
5220         for failed in nl_payload:
5221           feedback_fn("ssh/hostname verification failed"
5222                       " (checking from %s): %s" %
5223                       (verifier, nl_payload[failed]))
5224         raise errors.OpExecError("ssh/hostname verification failed")
5225
5226     if self.op.readd:
5227       _RedistributeAncillaryFiles(self)
5228       self.context.ReaddNode(new_node)
5229       # make sure we redistribute the config
5230       self.cfg.Update(new_node, feedback_fn)
5231       # and make sure the new node will not have old files around
5232       if not new_node.master_candidate:
5233         result = self.rpc.call_node_demote_from_mc(new_node.name)
5234         msg = result.fail_msg
5235         if msg:
5236           self.LogWarning("Node failed to demote itself from master"
5237                           " candidate status: %s" % msg)
5238     else:
5239       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5240                                   additional_vm=self.op.vm_capable)
5241       self.context.AddNode(new_node, self.proc.GetECId())
5242
5243
5244 class LUNodeSetParams(LogicalUnit):
5245   """Modifies the parameters of a node.
5246
5247   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5248       to the node role (as _ROLE_*)
5249   @cvar _R2F: a dictionary from node role to tuples of flags
5250   @cvar _FLAGS: a list of attribute names corresponding to the flags
5251
5252   """
5253   HPATH = "node-modify"
5254   HTYPE = constants.HTYPE_NODE
5255   REQ_BGL = False
5256   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5257   _F2R = {
5258     (True, False, False): _ROLE_CANDIDATE,
5259     (False, True, False): _ROLE_DRAINED,
5260     (False, False, True): _ROLE_OFFLINE,
5261     (False, False, False): _ROLE_REGULAR,
5262     }
5263   _R2F = dict((v, k) for k, v in _F2R.items())
5264   _FLAGS = ["master_candidate", "drained", "offline"]
5265
5266   def CheckArguments(self):
5267     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5268     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5269                 self.op.master_capable, self.op.vm_capable,
5270                 self.op.secondary_ip, self.op.ndparams]
5271     if all_mods.count(None) == len(all_mods):
5272       raise errors.OpPrereqError("Please pass at least one modification",
5273                                  errors.ECODE_INVAL)
5274     if all_mods.count(True) > 1:
5275       raise errors.OpPrereqError("Can't set the node into more than one"
5276                                  " state at the same time",
5277                                  errors.ECODE_INVAL)
5278
5279     # Boolean value that tells us whether we might be demoting from MC
5280     self.might_demote = (self.op.master_candidate == False or
5281                          self.op.offline == True or
5282                          self.op.drained == True or
5283                          self.op.master_capable == False)
5284
5285     if self.op.secondary_ip:
5286       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5287         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5288                                    " address" % self.op.secondary_ip,
5289                                    errors.ECODE_INVAL)
5290
5291     self.lock_all = self.op.auto_promote and self.might_demote
5292     self.lock_instances = self.op.secondary_ip is not None
5293
5294   def _InstanceFilter(self, instance):
5295     """Filter for getting affected instances.
5296
5297     """
5298     return (instance.disk_template in constants.DTS_INT_MIRROR and
5299             self.op.node_name in instance.all_nodes)
5300
5301   def ExpandNames(self):
5302     if self.lock_all:
5303       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5304     else:
5305       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5306
5307     if self.lock_instances:
5308       self.needed_locks[locking.LEVEL_INSTANCE] = \
5309         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5310
5311   def BuildHooksEnv(self):
5312     """Build hooks env.
5313
5314     This runs on the master node.
5315
5316     """
5317     return {
5318       "OP_TARGET": self.op.node_name,
5319       "MASTER_CANDIDATE": str(self.op.master_candidate),
5320       "OFFLINE": str(self.op.offline),
5321       "DRAINED": str(self.op.drained),
5322       "MASTER_CAPABLE": str(self.op.master_capable),
5323       "VM_CAPABLE": str(self.op.vm_capable),
5324       }
5325
5326   def BuildHooksNodes(self):
5327     """Build hooks nodes.
5328
5329     """
5330     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5331     return (nl, nl)
5332
5333   def CheckPrereq(self):
5334     """Check prerequisites.
5335
5336     This only checks the instance list against the existing names.
5337
5338     """
5339     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5340
5341     if self.lock_instances:
5342       affected_instances = \
5343         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5344
5345       # Verify instance locks
5346       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5347       wanted_instances = frozenset(affected_instances.keys())
5348       if wanted_instances - owned_instances:
5349         raise errors.OpPrereqError("Instances affected by changing node %s's"
5350                                    " secondary IP address have changed since"
5351                                    " locks were acquired, wanted '%s', have"
5352                                    " '%s'; retry the operation" %
5353                                    (self.op.node_name,
5354                                     utils.CommaJoin(wanted_instances),
5355                                     utils.CommaJoin(owned_instances)),
5356                                    errors.ECODE_STATE)
5357     else:
5358       affected_instances = None
5359
5360     if (self.op.master_candidate is not None or
5361         self.op.drained is not None or
5362         self.op.offline is not None):
5363       # we can't change the master's node flags
5364       if self.op.node_name == self.cfg.GetMasterNode():
5365         raise errors.OpPrereqError("The master role can be changed"
5366                                    " only via master-failover",
5367                                    errors.ECODE_INVAL)
5368
5369     if self.op.master_candidate and not node.master_capable:
5370       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5371                                  " it a master candidate" % node.name,
5372                                  errors.ECODE_STATE)
5373
5374     if self.op.vm_capable == False:
5375       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5376       if ipri or isec:
5377         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5378                                    " the vm_capable flag" % node.name,
5379                                    errors.ECODE_STATE)
5380
5381     if node.master_candidate and self.might_demote and not self.lock_all:
5382       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5383       # check if after removing the current node, we're missing master
5384       # candidates
5385       (mc_remaining, mc_should, _) = \
5386           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5387       if mc_remaining < mc_should:
5388         raise errors.OpPrereqError("Not enough master candidates, please"
5389                                    " pass auto promote option to allow"
5390                                    " promotion", errors.ECODE_STATE)
5391
5392     self.old_flags = old_flags = (node.master_candidate,
5393                                   node.drained, node.offline)
5394     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5395     self.old_role = old_role = self._F2R[old_flags]
5396
5397     # Check for ineffective changes
5398     for attr in self._FLAGS:
5399       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5400         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5401         setattr(self.op, attr, None)
5402
5403     # Past this point, any flag change to False means a transition
5404     # away from the respective state, as only real changes are kept
5405
5406     # TODO: We might query the real power state if it supports OOB
5407     if _SupportsOob(self.cfg, node):
5408       if self.op.offline is False and not (node.powered or
5409                                            self.op.powered == True):
5410         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5411                                     " offline status can be reset") %
5412                                    self.op.node_name)
5413     elif self.op.powered is not None:
5414       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5415                                   " as it does not support out-of-band"
5416                                   " handling") % self.op.node_name)
5417
5418     # If we're being deofflined/drained, we'll MC ourself if needed
5419     if (self.op.drained == False or self.op.offline == False or
5420         (self.op.master_capable and not node.master_capable)):
5421       if _DecideSelfPromotion(self):
5422         self.op.master_candidate = True
5423         self.LogInfo("Auto-promoting node to master candidate")
5424
5425     # If we're no longer master capable, we'll demote ourselves from MC
5426     if self.op.master_capable == False and node.master_candidate:
5427       self.LogInfo("Demoting from master candidate")
5428       self.op.master_candidate = False
5429
5430     # Compute new role
5431     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5432     if self.op.master_candidate:
5433       new_role = self._ROLE_CANDIDATE
5434     elif self.op.drained:
5435       new_role = self._ROLE_DRAINED
5436     elif self.op.offline:
5437       new_role = self._ROLE_OFFLINE
5438     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5439       # False is still in new flags, which means we're un-setting (the
5440       # only) True flag
5441       new_role = self._ROLE_REGULAR
5442     else: # no new flags, nothing, keep old role
5443       new_role = old_role
5444
5445     self.new_role = new_role
5446
5447     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5448       # Trying to transition out of offline status
5449       # TODO: Use standard RPC runner, but make sure it works when the node is
5450       # still marked offline
5451       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5452       if result.fail_msg:
5453         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5454                                    " to report its version: %s" %
5455                                    (node.name, result.fail_msg),
5456                                    errors.ECODE_STATE)
5457       else:
5458         self.LogWarning("Transitioning node from offline to online state"
5459                         " without using re-add. Please make sure the node"
5460                         " is healthy!")
5461
5462     if self.op.secondary_ip:
5463       # Ok even without locking, because this can't be changed by any LU
5464       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5465       master_singlehomed = master.secondary_ip == master.primary_ip
5466       if master_singlehomed and self.op.secondary_ip:
5467         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5468                                    " homed cluster", errors.ECODE_INVAL)
5469
5470       assert not (frozenset(affected_instances) -
5471                   self.owned_locks(locking.LEVEL_INSTANCE))
5472
5473       if node.offline:
5474         if affected_instances:
5475           raise errors.OpPrereqError("Cannot change secondary IP address:"
5476                                      " offline node has instances (%s)"
5477                                      " configured to use it" %
5478                                      utils.CommaJoin(affected_instances.keys()))
5479       else:
5480         # On online nodes, check that no instances are running, and that
5481         # the node has the new ip and we can reach it.
5482         for instance in affected_instances.values():
5483           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5484
5485         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5486         if master.name != node.name:
5487           # check reachability from master secondary ip to new secondary ip
5488           if not netutils.TcpPing(self.op.secondary_ip,
5489                                   constants.DEFAULT_NODED_PORT,
5490                                   source=master.secondary_ip):
5491             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5492                                        " based ping to node daemon port",
5493                                        errors.ECODE_ENVIRON)
5494
5495     if self.op.ndparams:
5496       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5497       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5498       self.new_ndparams = new_ndparams
5499
5500   def Exec(self, feedback_fn):
5501     """Modifies a node.
5502
5503     """
5504     node = self.node
5505     old_role = self.old_role
5506     new_role = self.new_role
5507
5508     result = []
5509
5510     if self.op.ndparams:
5511       node.ndparams = self.new_ndparams
5512
5513     if self.op.powered is not None:
5514       node.powered = self.op.powered
5515
5516     for attr in ["master_capable", "vm_capable"]:
5517       val = getattr(self.op, attr)
5518       if val is not None:
5519         setattr(node, attr, val)
5520         result.append((attr, str(val)))
5521
5522     if new_role != old_role:
5523       # Tell the node to demote itself, if no longer MC and not offline
5524       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5525         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5526         if msg:
5527           self.LogWarning("Node failed to demote itself: %s", msg)
5528
5529       new_flags = self._R2F[new_role]
5530       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5531         if of != nf:
5532           result.append((desc, str(nf)))
5533       (node.master_candidate, node.drained, node.offline) = new_flags
5534
5535       # we locked all nodes, we adjust the CP before updating this node
5536       if self.lock_all:
5537         _AdjustCandidatePool(self, [node.name])
5538
5539     if self.op.secondary_ip:
5540       node.secondary_ip = self.op.secondary_ip
5541       result.append(("secondary_ip", self.op.secondary_ip))
5542
5543     # this will trigger configuration file update, if needed
5544     self.cfg.Update(node, feedback_fn)
5545
5546     # this will trigger job queue propagation or cleanup if the mc
5547     # flag changed
5548     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5549       self.context.ReaddNode(node)
5550
5551     return result
5552
5553
5554 class LUNodePowercycle(NoHooksLU):
5555   """Powercycles a node.
5556
5557   """
5558   REQ_BGL = False
5559
5560   def CheckArguments(self):
5561     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5562     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5563       raise errors.OpPrereqError("The node is the master and the force"
5564                                  " parameter was not set",
5565                                  errors.ECODE_INVAL)
5566
5567   def ExpandNames(self):
5568     """Locking for PowercycleNode.
5569
5570     This is a last-resort option and shouldn't block on other
5571     jobs. Therefore, we grab no locks.
5572
5573     """
5574     self.needed_locks = {}
5575
5576   def Exec(self, feedback_fn):
5577     """Reboots a node.
5578
5579     """
5580     result = self.rpc.call_node_powercycle(self.op.node_name,
5581                                            self.cfg.GetHypervisorType())
5582     result.Raise("Failed to schedule the reboot")
5583     return result.payload
5584
5585
5586 class LUClusterQuery(NoHooksLU):
5587   """Query cluster configuration.
5588
5589   """
5590   REQ_BGL = False
5591
5592   def ExpandNames(self):
5593     self.needed_locks = {}
5594
5595   def Exec(self, feedback_fn):
5596     """Return cluster config.
5597
5598     """
5599     cluster = self.cfg.GetClusterInfo()
5600     os_hvp = {}
5601
5602     # Filter just for enabled hypervisors
5603     for os_name, hv_dict in cluster.os_hvp.items():
5604       os_hvp[os_name] = {}
5605       for hv_name, hv_params in hv_dict.items():
5606         if hv_name in cluster.enabled_hypervisors:
5607           os_hvp[os_name][hv_name] = hv_params
5608
5609     # Convert ip_family to ip_version
5610     primary_ip_version = constants.IP4_VERSION
5611     if cluster.primary_ip_family == netutils.IP6Address.family:
5612       primary_ip_version = constants.IP6_VERSION
5613
5614     result = {
5615       "software_version": constants.RELEASE_VERSION,
5616       "protocol_version": constants.PROTOCOL_VERSION,
5617       "config_version": constants.CONFIG_VERSION,
5618       "os_api_version": max(constants.OS_API_VERSIONS),
5619       "export_version": constants.EXPORT_VERSION,
5620       "architecture": (platform.architecture()[0], platform.machine()),
5621       "name": cluster.cluster_name,
5622       "master": cluster.master_node,
5623       "default_hypervisor": cluster.enabled_hypervisors[0],
5624       "enabled_hypervisors": cluster.enabled_hypervisors,
5625       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5626                         for hypervisor_name in cluster.enabled_hypervisors]),
5627       "os_hvp": os_hvp,
5628       "beparams": cluster.beparams,
5629       "osparams": cluster.osparams,
5630       "nicparams": cluster.nicparams,
5631       "ndparams": cluster.ndparams,
5632       "candidate_pool_size": cluster.candidate_pool_size,
5633       "master_netdev": cluster.master_netdev,
5634       "master_netmask": cluster.master_netmask,
5635       "use_external_mip_script": cluster.use_external_mip_script,
5636       "volume_group_name": cluster.volume_group_name,
5637       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5638       "file_storage_dir": cluster.file_storage_dir,
5639       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5640       "maintain_node_health": cluster.maintain_node_health,
5641       "ctime": cluster.ctime,
5642       "mtime": cluster.mtime,
5643       "uuid": cluster.uuid,
5644       "tags": list(cluster.GetTags()),
5645       "uid_pool": cluster.uid_pool,
5646       "default_iallocator": cluster.default_iallocator,
5647       "reserved_lvs": cluster.reserved_lvs,
5648       "primary_ip_version": primary_ip_version,
5649       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5650       "hidden_os": cluster.hidden_os,
5651       "blacklisted_os": cluster.blacklisted_os,
5652       }
5653
5654     return result
5655
5656
5657 class LUClusterConfigQuery(NoHooksLU):
5658   """Return configuration values.
5659
5660   """
5661   REQ_BGL = False
5662   _FIELDS_DYNAMIC = utils.FieldSet()
5663   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5664                                   "watcher_pause", "volume_group_name")
5665
5666   def CheckArguments(self):
5667     _CheckOutputFields(static=self._FIELDS_STATIC,
5668                        dynamic=self._FIELDS_DYNAMIC,
5669                        selected=self.op.output_fields)
5670
5671   def ExpandNames(self):
5672     self.needed_locks = {}
5673
5674   def Exec(self, feedback_fn):
5675     """Dump a representation of the cluster config to the standard output.
5676
5677     """
5678     values = []
5679     for field in self.op.output_fields:
5680       if field == "cluster_name":
5681         entry = self.cfg.GetClusterName()
5682       elif field == "master_node":
5683         entry = self.cfg.GetMasterNode()
5684       elif field == "drain_flag":
5685         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5686       elif field == "watcher_pause":
5687         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5688       elif field == "volume_group_name":
5689         entry = self.cfg.GetVGName()
5690       else:
5691         raise errors.ParameterError(field)
5692       values.append(entry)
5693     return values
5694
5695
5696 class LUInstanceActivateDisks(NoHooksLU):
5697   """Bring up an instance's disks.
5698
5699   """
5700   REQ_BGL = False
5701
5702   def ExpandNames(self):
5703     self._ExpandAndLockInstance()
5704     self.needed_locks[locking.LEVEL_NODE] = []
5705     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5706
5707   def DeclareLocks(self, level):
5708     if level == locking.LEVEL_NODE:
5709       self._LockInstancesNodes()
5710
5711   def CheckPrereq(self):
5712     """Check prerequisites.
5713
5714     This checks that the instance is in the cluster.
5715
5716     """
5717     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5718     assert self.instance is not None, \
5719       "Cannot retrieve locked instance %s" % self.op.instance_name
5720     _CheckNodeOnline(self, self.instance.primary_node)
5721
5722   def Exec(self, feedback_fn):
5723     """Activate the disks.
5724
5725     """
5726     disks_ok, disks_info = \
5727               _AssembleInstanceDisks(self, self.instance,
5728                                      ignore_size=self.op.ignore_size)
5729     if not disks_ok:
5730       raise errors.OpExecError("Cannot activate block devices")
5731
5732     return disks_info
5733
5734
5735 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5736                            ignore_size=False):
5737   """Prepare the block devices for an instance.
5738
5739   This sets up the block devices on all nodes.
5740
5741   @type lu: L{LogicalUnit}
5742   @param lu: the logical unit on whose behalf we execute
5743   @type instance: L{objects.Instance}
5744   @param instance: the instance for whose disks we assemble
5745   @type disks: list of L{objects.Disk} or None
5746   @param disks: which disks to assemble (or all, if None)
5747   @type ignore_secondaries: boolean
5748   @param ignore_secondaries: if true, errors on secondary nodes
5749       won't result in an error return from the function
5750   @type ignore_size: boolean
5751   @param ignore_size: if true, the current known size of the disk
5752       will not be used during the disk activation, useful for cases
5753       when the size is wrong
5754   @return: False if the operation failed, otherwise a list of
5755       (host, instance_visible_name, node_visible_name)
5756       with the mapping from node devices to instance devices
5757
5758   """
5759   device_info = []
5760   disks_ok = True
5761   iname = instance.name
5762   disks = _ExpandCheckDisks(instance, disks)
5763
5764   # With the two passes mechanism we try to reduce the window of
5765   # opportunity for the race condition of switching DRBD to primary
5766   # before handshaking occured, but we do not eliminate it
5767
5768   # The proper fix would be to wait (with some limits) until the
5769   # connection has been made and drbd transitions from WFConnection
5770   # into any other network-connected state (Connected, SyncTarget,
5771   # SyncSource, etc.)
5772
5773   # 1st pass, assemble on all nodes in secondary mode
5774   for idx, inst_disk in enumerate(disks):
5775     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5776       if ignore_size:
5777         node_disk = node_disk.Copy()
5778         node_disk.UnsetSize()
5779       lu.cfg.SetDiskID(node_disk, node)
5780       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5781       msg = result.fail_msg
5782       if msg:
5783         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5784                            " (is_primary=False, pass=1): %s",
5785                            inst_disk.iv_name, node, msg)
5786         if not ignore_secondaries:
5787           disks_ok = False
5788
5789   # FIXME: race condition on drbd migration to primary
5790
5791   # 2nd pass, do only the primary node
5792   for idx, inst_disk in enumerate(disks):
5793     dev_path = None
5794
5795     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5796       if node != instance.primary_node:
5797         continue
5798       if ignore_size:
5799         node_disk = node_disk.Copy()
5800         node_disk.UnsetSize()
5801       lu.cfg.SetDiskID(node_disk, node)
5802       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5803       msg = result.fail_msg
5804       if msg:
5805         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5806                            " (is_primary=True, pass=2): %s",
5807                            inst_disk.iv_name, node, msg)
5808         disks_ok = False
5809       else:
5810         dev_path = result.payload
5811
5812     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5813
5814   # leave the disks configured for the primary node
5815   # this is a workaround that would be fixed better by
5816   # improving the logical/physical id handling
5817   for disk in disks:
5818     lu.cfg.SetDiskID(disk, instance.primary_node)
5819
5820   return disks_ok, device_info
5821
5822
5823 def _StartInstanceDisks(lu, instance, force):
5824   """Start the disks of an instance.
5825
5826   """
5827   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5828                                            ignore_secondaries=force)
5829   if not disks_ok:
5830     _ShutdownInstanceDisks(lu, instance)
5831     if force is not None and not force:
5832       lu.proc.LogWarning("", hint="If the message above refers to a"
5833                          " secondary node,"
5834                          " you can retry the operation using '--force'.")
5835     raise errors.OpExecError("Disk consistency error")
5836
5837
5838 class LUInstanceDeactivateDisks(NoHooksLU):
5839   """Shutdown an instance's disks.
5840
5841   """
5842   REQ_BGL = False
5843
5844   def ExpandNames(self):
5845     self._ExpandAndLockInstance()
5846     self.needed_locks[locking.LEVEL_NODE] = []
5847     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5848
5849   def DeclareLocks(self, level):
5850     if level == locking.LEVEL_NODE:
5851       self._LockInstancesNodes()
5852
5853   def CheckPrereq(self):
5854     """Check prerequisites.
5855
5856     This checks that the instance is in the cluster.
5857
5858     """
5859     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5860     assert self.instance is not None, \
5861       "Cannot retrieve locked instance %s" % self.op.instance_name
5862
5863   def Exec(self, feedback_fn):
5864     """Deactivate the disks
5865
5866     """
5867     instance = self.instance
5868     if self.op.force:
5869       _ShutdownInstanceDisks(self, instance)
5870     else:
5871       _SafeShutdownInstanceDisks(self, instance)
5872
5873
5874 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5875   """Shutdown block devices of an instance.
5876
5877   This function checks if an instance is running, before calling
5878   _ShutdownInstanceDisks.
5879
5880   """
5881   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5882   _ShutdownInstanceDisks(lu, instance, disks=disks)
5883
5884
5885 def _ExpandCheckDisks(instance, disks):
5886   """Return the instance disks selected by the disks list
5887
5888   @type disks: list of L{objects.Disk} or None
5889   @param disks: selected disks
5890   @rtype: list of L{objects.Disk}
5891   @return: selected instance disks to act on
5892
5893   """
5894   if disks is None:
5895     return instance.disks
5896   else:
5897     if not set(disks).issubset(instance.disks):
5898       raise errors.ProgrammerError("Can only act on disks belonging to the"
5899                                    " target instance")
5900     return disks
5901
5902
5903 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5904   """Shutdown block devices of an instance.
5905
5906   This does the shutdown on all nodes of the instance.
5907
5908   If the ignore_primary is false, errors on the primary node are
5909   ignored.
5910
5911   """
5912   all_result = True
5913   disks = _ExpandCheckDisks(instance, disks)
5914
5915   for disk in disks:
5916     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5917       lu.cfg.SetDiskID(top_disk, node)
5918       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5919       msg = result.fail_msg
5920       if msg:
5921         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5922                       disk.iv_name, node, msg)
5923         if ((node == instance.primary_node and not ignore_primary) or
5924             (node != instance.primary_node and not result.offline)):
5925           all_result = False
5926   return all_result
5927
5928
5929 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5930   """Checks if a node has enough free memory.
5931
5932   This function check if a given node has the needed amount of free
5933   memory. In case the node has less memory or we cannot get the
5934   information from the node, this function raise an OpPrereqError
5935   exception.
5936
5937   @type lu: C{LogicalUnit}
5938   @param lu: a logical unit from which we get configuration data
5939   @type node: C{str}
5940   @param node: the node to check
5941   @type reason: C{str}
5942   @param reason: string to use in the error message
5943   @type requested: C{int}
5944   @param requested: the amount of memory in MiB to check for
5945   @type hypervisor_name: C{str}
5946   @param hypervisor_name: the hypervisor to ask for memory stats
5947   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5948       we cannot check the node
5949
5950   """
5951   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5952   nodeinfo[node].Raise("Can't get data from node %s" % node,
5953                        prereq=True, ecode=errors.ECODE_ENVIRON)
5954   free_mem = nodeinfo[node].payload.get("memory_free", None)
5955   if not isinstance(free_mem, int):
5956     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5957                                " was '%s'" % (node, free_mem),
5958                                errors.ECODE_ENVIRON)
5959   if requested > free_mem:
5960     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5961                                " needed %s MiB, available %s MiB" %
5962                                (node, reason, requested, free_mem),
5963                                errors.ECODE_NORES)
5964
5965
5966 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5967   """Checks if nodes have enough free disk space in the all VGs.
5968
5969   This function check if all given nodes have the needed amount of
5970   free disk. In case any node has less disk or we cannot get the
5971   information from the node, this function raise an OpPrereqError
5972   exception.
5973
5974   @type lu: C{LogicalUnit}
5975   @param lu: a logical unit from which we get configuration data
5976   @type nodenames: C{list}
5977   @param nodenames: the list of node names to check
5978   @type req_sizes: C{dict}
5979   @param req_sizes: the hash of vg and corresponding amount of disk in
5980       MiB to check for
5981   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5982       or we cannot check the node
5983
5984   """
5985   for vg, req_size in req_sizes.items():
5986     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5987
5988
5989 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5990   """Checks if nodes have enough free disk space in the specified VG.
5991
5992   This function check if all given nodes have the needed amount of
5993   free disk. In case any node has less disk or we cannot get the
5994   information from the node, this function raise an OpPrereqError
5995   exception.
5996
5997   @type lu: C{LogicalUnit}
5998   @param lu: a logical unit from which we get configuration data
5999   @type nodenames: C{list}
6000   @param nodenames: the list of node names to check
6001   @type vg: C{str}
6002   @param vg: the volume group to check
6003   @type requested: C{int}
6004   @param requested: the amount of disk in MiB to check for
6005   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6006       or we cannot check the node
6007
6008   """
6009   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6010   for node in nodenames:
6011     info = nodeinfo[node]
6012     info.Raise("Cannot get current information from node %s" % node,
6013                prereq=True, ecode=errors.ECODE_ENVIRON)
6014     vg_free = info.payload.get("vg_free", None)
6015     if not isinstance(vg_free, int):
6016       raise errors.OpPrereqError("Can't compute free disk space on node"
6017                                  " %s for vg %s, result was '%s'" %
6018                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6019     if requested > vg_free:
6020       raise errors.OpPrereqError("Not enough disk space on target node %s"
6021                                  " vg %s: required %d MiB, available %d MiB" %
6022                                  (node, vg, requested, vg_free),
6023                                  errors.ECODE_NORES)
6024
6025
6026 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6027   """Checks if nodes have enough physical CPUs
6028
6029   This function checks if all given nodes have the needed number of
6030   physical CPUs. In case any node has less CPUs or we cannot get the
6031   information from the node, this function raises an OpPrereqError
6032   exception.
6033
6034   @type lu: C{LogicalUnit}
6035   @param lu: a logical unit from which we get configuration data
6036   @type nodenames: C{list}
6037   @param nodenames: the list of node names to check
6038   @type requested: C{int}
6039   @param requested: the minimum acceptable number of physical CPUs
6040   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6041       or we cannot check the node
6042
6043   """
6044   nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6045   for node in nodenames:
6046     info = nodeinfo[node]
6047     info.Raise("Cannot get current information from node %s" % node,
6048                prereq=True, ecode=errors.ECODE_ENVIRON)
6049     num_cpus = info.payload.get("cpu_total", None)
6050     if not isinstance(num_cpus, int):
6051       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6052                                  " on node %s, result was '%s'" %
6053                                  (node, num_cpus), errors.ECODE_ENVIRON)
6054     if requested > num_cpus:
6055       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6056                                  "required" % (node, num_cpus, requested),
6057                                  errors.ECODE_NORES)
6058
6059
6060 class LUInstanceStartup(LogicalUnit):
6061   """Starts an instance.
6062
6063   """
6064   HPATH = "instance-start"
6065   HTYPE = constants.HTYPE_INSTANCE
6066   REQ_BGL = False
6067
6068   def CheckArguments(self):
6069     # extra beparams
6070     if self.op.beparams:
6071       # fill the beparams dict
6072       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6073
6074   def ExpandNames(self):
6075     self._ExpandAndLockInstance()
6076
6077   def BuildHooksEnv(self):
6078     """Build hooks env.
6079
6080     This runs on master, primary and secondary nodes of the instance.
6081
6082     """
6083     env = {
6084       "FORCE": self.op.force,
6085       }
6086
6087     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6088
6089     return env
6090
6091   def BuildHooksNodes(self):
6092     """Build hooks nodes.
6093
6094     """
6095     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6096     return (nl, nl)
6097
6098   def CheckPrereq(self):
6099     """Check prerequisites.
6100
6101     This checks that the instance is in the cluster.
6102
6103     """
6104     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6105     assert self.instance is not None, \
6106       "Cannot retrieve locked instance %s" % self.op.instance_name
6107
6108     # extra hvparams
6109     if self.op.hvparams:
6110       # check hypervisor parameter syntax (locally)
6111       cluster = self.cfg.GetClusterInfo()
6112       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6113       filled_hvp = cluster.FillHV(instance)
6114       filled_hvp.update(self.op.hvparams)
6115       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6116       hv_type.CheckParameterSyntax(filled_hvp)
6117       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6118
6119     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6120
6121     if self.primary_offline and self.op.ignore_offline_nodes:
6122       self.proc.LogWarning("Ignoring offline primary node")
6123
6124       if self.op.hvparams or self.op.beparams:
6125         self.proc.LogWarning("Overridden parameters are ignored")
6126     else:
6127       _CheckNodeOnline(self, instance.primary_node)
6128
6129       bep = self.cfg.GetClusterInfo().FillBE(instance)
6130
6131       # check bridges existence
6132       _CheckInstanceBridgesExist(self, instance)
6133
6134       remote_info = self.rpc.call_instance_info(instance.primary_node,
6135                                                 instance.name,
6136                                                 instance.hypervisor)
6137       remote_info.Raise("Error checking node %s" % instance.primary_node,
6138                         prereq=True, ecode=errors.ECODE_ENVIRON)
6139       if not remote_info.payload: # not running already
6140         _CheckNodeFreeMemory(self, instance.primary_node,
6141                              "starting instance %s" % instance.name,
6142                              bep[constants.BE_MEMORY], instance.hypervisor)
6143
6144   def Exec(self, feedback_fn):
6145     """Start the instance.
6146
6147     """
6148     instance = self.instance
6149     force = self.op.force
6150
6151     if not self.op.no_remember:
6152       self.cfg.MarkInstanceUp(instance.name)
6153
6154     if self.primary_offline:
6155       assert self.op.ignore_offline_nodes
6156       self.proc.LogInfo("Primary node offline, marked instance as started")
6157     else:
6158       node_current = instance.primary_node
6159
6160       _StartInstanceDisks(self, instance, force)
6161
6162       result = \
6163         self.rpc.call_instance_start(node_current,
6164                                      (instance, self.op.hvparams,
6165                                       self.op.beparams),
6166                                      self.op.startup_paused)
6167       msg = result.fail_msg
6168       if msg:
6169         _ShutdownInstanceDisks(self, instance)
6170         raise errors.OpExecError("Could not start instance: %s" % msg)
6171
6172
6173 class LUInstanceReboot(LogicalUnit):
6174   """Reboot an instance.
6175
6176   """
6177   HPATH = "instance-reboot"
6178   HTYPE = constants.HTYPE_INSTANCE
6179   REQ_BGL = False
6180
6181   def ExpandNames(self):
6182     self._ExpandAndLockInstance()
6183
6184   def BuildHooksEnv(self):
6185     """Build hooks env.
6186
6187     This runs on master, primary and secondary nodes of the instance.
6188
6189     """
6190     env = {
6191       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6192       "REBOOT_TYPE": self.op.reboot_type,
6193       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6194       }
6195
6196     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6197
6198     return env
6199
6200   def BuildHooksNodes(self):
6201     """Build hooks nodes.
6202
6203     """
6204     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6205     return (nl, nl)
6206
6207   def CheckPrereq(self):
6208     """Check prerequisites.
6209
6210     This checks that the instance is in the cluster.
6211
6212     """
6213     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6214     assert self.instance is not None, \
6215       "Cannot retrieve locked instance %s" % self.op.instance_name
6216
6217     _CheckNodeOnline(self, instance.primary_node)
6218
6219     # check bridges existence
6220     _CheckInstanceBridgesExist(self, instance)
6221
6222   def Exec(self, feedback_fn):
6223     """Reboot the instance.
6224
6225     """
6226     instance = self.instance
6227     ignore_secondaries = self.op.ignore_secondaries
6228     reboot_type = self.op.reboot_type
6229
6230     remote_info = self.rpc.call_instance_info(instance.primary_node,
6231                                               instance.name,
6232                                               instance.hypervisor)
6233     remote_info.Raise("Error checking node %s" % instance.primary_node)
6234     instance_running = bool(remote_info.payload)
6235
6236     node_current = instance.primary_node
6237
6238     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6239                                             constants.INSTANCE_REBOOT_HARD]:
6240       for disk in instance.disks:
6241         self.cfg.SetDiskID(disk, node_current)
6242       result = self.rpc.call_instance_reboot(node_current, instance,
6243                                              reboot_type,
6244                                              self.op.shutdown_timeout)
6245       result.Raise("Could not reboot instance")
6246     else:
6247       if instance_running:
6248         result = self.rpc.call_instance_shutdown(node_current, instance,
6249                                                  self.op.shutdown_timeout)
6250         result.Raise("Could not shutdown instance for full reboot")
6251         _ShutdownInstanceDisks(self, instance)
6252       else:
6253         self.LogInfo("Instance %s was already stopped, starting now",
6254                      instance.name)
6255       _StartInstanceDisks(self, instance, ignore_secondaries)
6256       result = self.rpc.call_instance_start(node_current,
6257                                             (instance, None, None), False)
6258       msg = result.fail_msg
6259       if msg:
6260         _ShutdownInstanceDisks(self, instance)
6261         raise errors.OpExecError("Could not start instance for"
6262                                  " full reboot: %s" % msg)
6263
6264     self.cfg.MarkInstanceUp(instance.name)
6265
6266
6267 class LUInstanceShutdown(LogicalUnit):
6268   """Shutdown an instance.
6269
6270   """
6271   HPATH = "instance-stop"
6272   HTYPE = constants.HTYPE_INSTANCE
6273   REQ_BGL = False
6274
6275   def ExpandNames(self):
6276     self._ExpandAndLockInstance()
6277
6278   def BuildHooksEnv(self):
6279     """Build hooks env.
6280
6281     This runs on master, primary and secondary nodes of the instance.
6282
6283     """
6284     env = _BuildInstanceHookEnvByObject(self, self.instance)
6285     env["TIMEOUT"] = self.op.timeout
6286     return env
6287
6288   def BuildHooksNodes(self):
6289     """Build hooks nodes.
6290
6291     """
6292     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6293     return (nl, nl)
6294
6295   def CheckPrereq(self):
6296     """Check prerequisites.
6297
6298     This checks that the instance is in the cluster.
6299
6300     """
6301     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6302     assert self.instance is not None, \
6303       "Cannot retrieve locked instance %s" % self.op.instance_name
6304
6305     self.primary_offline = \
6306       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6307
6308     if self.primary_offline and self.op.ignore_offline_nodes:
6309       self.proc.LogWarning("Ignoring offline primary node")
6310     else:
6311       _CheckNodeOnline(self, self.instance.primary_node)
6312
6313   def Exec(self, feedback_fn):
6314     """Shutdown the instance.
6315
6316     """
6317     instance = self.instance
6318     node_current = instance.primary_node
6319     timeout = self.op.timeout
6320
6321     if not self.op.no_remember:
6322       self.cfg.MarkInstanceDown(instance.name)
6323
6324     if self.primary_offline:
6325       assert self.op.ignore_offline_nodes
6326       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6327     else:
6328       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6329       msg = result.fail_msg
6330       if msg:
6331         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6332
6333       _ShutdownInstanceDisks(self, instance)
6334
6335
6336 class LUInstanceReinstall(LogicalUnit):
6337   """Reinstall an instance.
6338
6339   """
6340   HPATH = "instance-reinstall"
6341   HTYPE = constants.HTYPE_INSTANCE
6342   REQ_BGL = False
6343
6344   def ExpandNames(self):
6345     self._ExpandAndLockInstance()
6346
6347   def BuildHooksEnv(self):
6348     """Build hooks env.
6349
6350     This runs on master, primary and secondary nodes of the instance.
6351
6352     """
6353     return _BuildInstanceHookEnvByObject(self, self.instance)
6354
6355   def BuildHooksNodes(self):
6356     """Build hooks nodes.
6357
6358     """
6359     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6360     return (nl, nl)
6361
6362   def CheckPrereq(self):
6363     """Check prerequisites.
6364
6365     This checks that the instance is in the cluster and is not running.
6366
6367     """
6368     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6369     assert instance is not None, \
6370       "Cannot retrieve locked instance %s" % self.op.instance_name
6371     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6372                      " offline, cannot reinstall")
6373     for node in instance.secondary_nodes:
6374       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6375                        " cannot reinstall")
6376
6377     if instance.disk_template == constants.DT_DISKLESS:
6378       raise errors.OpPrereqError("Instance '%s' has no disks" %
6379                                  self.op.instance_name,
6380                                  errors.ECODE_INVAL)
6381     _CheckInstanceDown(self, instance, "cannot reinstall")
6382
6383     if self.op.os_type is not None:
6384       # OS verification
6385       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6386       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6387       instance_os = self.op.os_type
6388     else:
6389       instance_os = instance.os
6390
6391     nodelist = list(instance.all_nodes)
6392
6393     if self.op.osparams:
6394       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6395       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6396       self.os_inst = i_osdict # the new dict (without defaults)
6397     else:
6398       self.os_inst = None
6399
6400     self.instance = instance
6401
6402   def Exec(self, feedback_fn):
6403     """Reinstall the instance.
6404
6405     """
6406     inst = self.instance
6407
6408     if self.op.os_type is not None:
6409       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6410       inst.os = self.op.os_type
6411       # Write to configuration
6412       self.cfg.Update(inst, feedback_fn)
6413
6414     _StartInstanceDisks(self, inst, None)
6415     try:
6416       feedback_fn("Running the instance OS create scripts...")
6417       # FIXME: pass debug option from opcode to backend
6418       result = self.rpc.call_instance_os_add(inst.primary_node,
6419                                              (inst, self.os_inst), True,
6420                                              self.op.debug_level)
6421       result.Raise("Could not install OS for instance %s on node %s" %
6422                    (inst.name, inst.primary_node))
6423     finally:
6424       _ShutdownInstanceDisks(self, inst)
6425
6426
6427 class LUInstanceRecreateDisks(LogicalUnit):
6428   """Recreate an instance's missing disks.
6429
6430   """
6431   HPATH = "instance-recreate-disks"
6432   HTYPE = constants.HTYPE_INSTANCE
6433   REQ_BGL = False
6434
6435   def CheckArguments(self):
6436     # normalise the disk list
6437     self.op.disks = sorted(frozenset(self.op.disks))
6438
6439   def ExpandNames(self):
6440     self._ExpandAndLockInstance()
6441     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6442     if self.op.nodes:
6443       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6444       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6445     else:
6446       self.needed_locks[locking.LEVEL_NODE] = []
6447
6448   def DeclareLocks(self, level):
6449     if level == locking.LEVEL_NODE:
6450       # if we replace the nodes, we only need to lock the old primary,
6451       # otherwise we need to lock all nodes for disk re-creation
6452       primary_only = bool(self.op.nodes)
6453       self._LockInstancesNodes(primary_only=primary_only)
6454
6455   def BuildHooksEnv(self):
6456     """Build hooks env.
6457
6458     This runs on master, primary and secondary nodes of the instance.
6459
6460     """
6461     return _BuildInstanceHookEnvByObject(self, self.instance)
6462
6463   def BuildHooksNodes(self):
6464     """Build hooks nodes.
6465
6466     """
6467     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6468     return (nl, nl)
6469
6470   def CheckPrereq(self):
6471     """Check prerequisites.
6472
6473     This checks that the instance is in the cluster and is not running.
6474
6475     """
6476     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6477     assert instance is not None, \
6478       "Cannot retrieve locked instance %s" % self.op.instance_name
6479     if self.op.nodes:
6480       if len(self.op.nodes) != len(instance.all_nodes):
6481         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6482                                    " %d replacement nodes were specified" %
6483                                    (instance.name, len(instance.all_nodes),
6484                                     len(self.op.nodes)),
6485                                    errors.ECODE_INVAL)
6486       assert instance.disk_template != constants.DT_DRBD8 or \
6487           len(self.op.nodes) == 2
6488       assert instance.disk_template != constants.DT_PLAIN or \
6489           len(self.op.nodes) == 1
6490       primary_node = self.op.nodes[0]
6491     else:
6492       primary_node = instance.primary_node
6493     _CheckNodeOnline(self, primary_node)
6494
6495     if instance.disk_template == constants.DT_DISKLESS:
6496       raise errors.OpPrereqError("Instance '%s' has no disks" %
6497                                  self.op.instance_name, errors.ECODE_INVAL)
6498     # if we replace nodes *and* the old primary is offline, we don't
6499     # check
6500     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6501     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6502     if not (self.op.nodes and old_pnode.offline):
6503       _CheckInstanceDown(self, instance, "cannot recreate disks")
6504
6505     if not self.op.disks:
6506       self.op.disks = range(len(instance.disks))
6507     else:
6508       for idx in self.op.disks:
6509         if idx >= len(instance.disks):
6510           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6511                                      errors.ECODE_INVAL)
6512     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6513       raise errors.OpPrereqError("Can't recreate disks partially and"
6514                                  " change the nodes at the same time",
6515                                  errors.ECODE_INVAL)
6516     self.instance = instance
6517
6518   def Exec(self, feedback_fn):
6519     """Recreate the disks.
6520
6521     """
6522     instance = self.instance
6523
6524     to_skip = []
6525     mods = [] # keeps track of needed logical_id changes
6526
6527     for idx, disk in enumerate(instance.disks):
6528       if idx not in self.op.disks: # disk idx has not been passed in
6529         to_skip.append(idx)
6530         continue
6531       # update secondaries for disks, if needed
6532       if self.op.nodes:
6533         if disk.dev_type == constants.LD_DRBD8:
6534           # need to update the nodes and minors
6535           assert len(self.op.nodes) == 2
6536           assert len(disk.logical_id) == 6 # otherwise disk internals
6537                                            # have changed
6538           (_, _, old_port, _, _, old_secret) = disk.logical_id
6539           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6540           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6541                     new_minors[0], new_minors[1], old_secret)
6542           assert len(disk.logical_id) == len(new_id)
6543           mods.append((idx, new_id))
6544
6545     # now that we have passed all asserts above, we can apply the mods
6546     # in a single run (to avoid partial changes)
6547     for idx, new_id in mods:
6548       instance.disks[idx].logical_id = new_id
6549
6550     # change primary node, if needed
6551     if self.op.nodes:
6552       instance.primary_node = self.op.nodes[0]
6553       self.LogWarning("Changing the instance's nodes, you will have to"
6554                       " remove any disks left on the older nodes manually")
6555
6556     if self.op.nodes:
6557       self.cfg.Update(instance, feedback_fn)
6558
6559     _CreateDisks(self, instance, to_skip=to_skip)
6560
6561
6562 class LUInstanceRename(LogicalUnit):
6563   """Rename an instance.
6564
6565   """
6566   HPATH = "instance-rename"
6567   HTYPE = constants.HTYPE_INSTANCE
6568
6569   def CheckArguments(self):
6570     """Check arguments.
6571
6572     """
6573     if self.op.ip_check and not self.op.name_check:
6574       # TODO: make the ip check more flexible and not depend on the name check
6575       raise errors.OpPrereqError("IP address check requires a name check",
6576                                  errors.ECODE_INVAL)
6577
6578   def BuildHooksEnv(self):
6579     """Build hooks env.
6580
6581     This runs on master, primary and secondary nodes of the instance.
6582
6583     """
6584     env = _BuildInstanceHookEnvByObject(self, self.instance)
6585     env["INSTANCE_NEW_NAME"] = self.op.new_name
6586     return env
6587
6588   def BuildHooksNodes(self):
6589     """Build hooks nodes.
6590
6591     """
6592     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6593     return (nl, nl)
6594
6595   def CheckPrereq(self):
6596     """Check prerequisites.
6597
6598     This checks that the instance is in the cluster and is not running.
6599
6600     """
6601     self.op.instance_name = _ExpandInstanceName(self.cfg,
6602                                                 self.op.instance_name)
6603     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6604     assert instance is not None
6605     _CheckNodeOnline(self, instance.primary_node)
6606     _CheckInstanceDown(self, instance, "cannot rename")
6607     self.instance = instance
6608
6609     new_name = self.op.new_name
6610     if self.op.name_check:
6611       hostname = netutils.GetHostname(name=new_name)
6612       if hostname != new_name:
6613         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6614                      hostname.name)
6615       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6616         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6617                                     " same as given hostname '%s'") %
6618                                     (hostname.name, self.op.new_name),
6619                                     errors.ECODE_INVAL)
6620       new_name = self.op.new_name = hostname.name
6621       if (self.op.ip_check and
6622           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6623         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6624                                    (hostname.ip, new_name),
6625                                    errors.ECODE_NOTUNIQUE)
6626
6627     instance_list = self.cfg.GetInstanceList()
6628     if new_name in instance_list and new_name != instance.name:
6629       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6630                                  new_name, errors.ECODE_EXISTS)
6631
6632   def Exec(self, feedback_fn):
6633     """Rename the instance.
6634
6635     """
6636     inst = self.instance
6637     old_name = inst.name
6638
6639     rename_file_storage = False
6640     if (inst.disk_template in constants.DTS_FILEBASED and
6641         self.op.new_name != inst.name):
6642       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6643       rename_file_storage = True
6644
6645     self.cfg.RenameInstance(inst.name, self.op.new_name)
6646     # Change the instance lock. This is definitely safe while we hold the BGL.
6647     # Otherwise the new lock would have to be added in acquired mode.
6648     assert self.REQ_BGL
6649     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6650     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6651
6652     # re-read the instance from the configuration after rename
6653     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6654
6655     if rename_file_storage:
6656       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6657       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6658                                                      old_file_storage_dir,
6659                                                      new_file_storage_dir)
6660       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6661                    " (but the instance has been renamed in Ganeti)" %
6662                    (inst.primary_node, old_file_storage_dir,
6663                     new_file_storage_dir))
6664
6665     _StartInstanceDisks(self, inst, None)
6666     try:
6667       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6668                                                  old_name, self.op.debug_level)
6669       msg = result.fail_msg
6670       if msg:
6671         msg = ("Could not run OS rename script for instance %s on node %s"
6672                " (but the instance has been renamed in Ganeti): %s" %
6673                (inst.name, inst.primary_node, msg))
6674         self.proc.LogWarning(msg)
6675     finally:
6676       _ShutdownInstanceDisks(self, inst)
6677
6678     return inst.name
6679
6680
6681 class LUInstanceRemove(LogicalUnit):
6682   """Remove an instance.
6683
6684   """
6685   HPATH = "instance-remove"
6686   HTYPE = constants.HTYPE_INSTANCE
6687   REQ_BGL = False
6688
6689   def ExpandNames(self):
6690     self._ExpandAndLockInstance()
6691     self.needed_locks[locking.LEVEL_NODE] = []
6692     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6693
6694   def DeclareLocks(self, level):
6695     if level == locking.LEVEL_NODE:
6696       self._LockInstancesNodes()
6697
6698   def BuildHooksEnv(self):
6699     """Build hooks env.
6700
6701     This runs on master, primary and secondary nodes of the instance.
6702
6703     """
6704     env = _BuildInstanceHookEnvByObject(self, self.instance)
6705     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6706     return env
6707
6708   def BuildHooksNodes(self):
6709     """Build hooks nodes.
6710
6711     """
6712     nl = [self.cfg.GetMasterNode()]
6713     nl_post = list(self.instance.all_nodes) + nl
6714     return (nl, nl_post)
6715
6716   def CheckPrereq(self):
6717     """Check prerequisites.
6718
6719     This checks that the instance is in the cluster.
6720
6721     """
6722     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6723     assert self.instance is not None, \
6724       "Cannot retrieve locked instance %s" % self.op.instance_name
6725
6726   def Exec(self, feedback_fn):
6727     """Remove the instance.
6728
6729     """
6730     instance = self.instance
6731     logging.info("Shutting down instance %s on node %s",
6732                  instance.name, instance.primary_node)
6733
6734     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6735                                              self.op.shutdown_timeout)
6736     msg = result.fail_msg
6737     if msg:
6738       if self.op.ignore_failures:
6739         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6740       else:
6741         raise errors.OpExecError("Could not shutdown instance %s on"
6742                                  " node %s: %s" %
6743                                  (instance.name, instance.primary_node, msg))
6744
6745     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6746
6747
6748 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6749   """Utility function to remove an instance.
6750
6751   """
6752   logging.info("Removing block devices for instance %s", instance.name)
6753
6754   if not _RemoveDisks(lu, instance):
6755     if not ignore_failures:
6756       raise errors.OpExecError("Can't remove instance's disks")
6757     feedback_fn("Warning: can't remove instance's disks")
6758
6759   logging.info("Removing instance %s out of cluster config", instance.name)
6760
6761   lu.cfg.RemoveInstance(instance.name)
6762
6763   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6764     "Instance lock removal conflict"
6765
6766   # Remove lock for the instance
6767   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6768
6769
6770 class LUInstanceQuery(NoHooksLU):
6771   """Logical unit for querying instances.
6772
6773   """
6774   # pylint: disable=W0142
6775   REQ_BGL = False
6776
6777   def CheckArguments(self):
6778     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6779                              self.op.output_fields, self.op.use_locking)
6780
6781   def ExpandNames(self):
6782     self.iq.ExpandNames(self)
6783
6784   def DeclareLocks(self, level):
6785     self.iq.DeclareLocks(self, level)
6786
6787   def Exec(self, feedback_fn):
6788     return self.iq.OldStyleQuery(self)
6789
6790
6791 class LUInstanceFailover(LogicalUnit):
6792   """Failover an instance.
6793
6794   """
6795   HPATH = "instance-failover"
6796   HTYPE = constants.HTYPE_INSTANCE
6797   REQ_BGL = False
6798
6799   def CheckArguments(self):
6800     """Check the arguments.
6801
6802     """
6803     self.iallocator = getattr(self.op, "iallocator", None)
6804     self.target_node = getattr(self.op, "target_node", None)
6805
6806   def ExpandNames(self):
6807     self._ExpandAndLockInstance()
6808
6809     if self.op.target_node is not None:
6810       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6811
6812     self.needed_locks[locking.LEVEL_NODE] = []
6813     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6814
6815     ignore_consistency = self.op.ignore_consistency
6816     shutdown_timeout = self.op.shutdown_timeout
6817     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6818                                        cleanup=False,
6819                                        failover=True,
6820                                        ignore_consistency=ignore_consistency,
6821                                        shutdown_timeout=shutdown_timeout)
6822     self.tasklets = [self._migrater]
6823
6824   def DeclareLocks(self, level):
6825     if level == locking.LEVEL_NODE:
6826       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6827       if instance.disk_template in constants.DTS_EXT_MIRROR:
6828         if self.op.target_node is None:
6829           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6830         else:
6831           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6832                                                    self.op.target_node]
6833         del self.recalculate_locks[locking.LEVEL_NODE]
6834       else:
6835         self._LockInstancesNodes()
6836
6837   def BuildHooksEnv(self):
6838     """Build hooks env.
6839
6840     This runs on master, primary and secondary nodes of the instance.
6841
6842     """
6843     instance = self._migrater.instance
6844     source_node = instance.primary_node
6845     target_node = self.op.target_node
6846     env = {
6847       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6848       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6849       "OLD_PRIMARY": source_node,
6850       "NEW_PRIMARY": target_node,
6851       }
6852
6853     if instance.disk_template in constants.DTS_INT_MIRROR:
6854       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6855       env["NEW_SECONDARY"] = source_node
6856     else:
6857       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6858
6859     env.update(_BuildInstanceHookEnvByObject(self, instance))
6860
6861     return env
6862
6863   def BuildHooksNodes(self):
6864     """Build hooks nodes.
6865
6866     """
6867     instance = self._migrater.instance
6868     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6869     return (nl, nl + [instance.primary_node])
6870
6871
6872 class LUInstanceMigrate(LogicalUnit):
6873   """Migrate an instance.
6874
6875   This is migration without shutting down, compared to the failover,
6876   which is done with shutdown.
6877
6878   """
6879   HPATH = "instance-migrate"
6880   HTYPE = constants.HTYPE_INSTANCE
6881   REQ_BGL = False
6882
6883   def ExpandNames(self):
6884     self._ExpandAndLockInstance()
6885
6886     if self.op.target_node is not None:
6887       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6888
6889     self.needed_locks[locking.LEVEL_NODE] = []
6890     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6891
6892     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6893                                        cleanup=self.op.cleanup,
6894                                        failover=False,
6895                                        fallback=self.op.allow_failover)
6896     self.tasklets = [self._migrater]
6897
6898   def DeclareLocks(self, level):
6899     if level == locking.LEVEL_NODE:
6900       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6901       if instance.disk_template in constants.DTS_EXT_MIRROR:
6902         if self.op.target_node is None:
6903           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6904         else:
6905           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6906                                                    self.op.target_node]
6907         del self.recalculate_locks[locking.LEVEL_NODE]
6908       else:
6909         self._LockInstancesNodes()
6910
6911   def BuildHooksEnv(self):
6912     """Build hooks env.
6913
6914     This runs on master, primary and secondary nodes of the instance.
6915
6916     """
6917     instance = self._migrater.instance
6918     source_node = instance.primary_node
6919     target_node = self.op.target_node
6920     env = _BuildInstanceHookEnvByObject(self, instance)
6921     env.update({
6922       "MIGRATE_LIVE": self._migrater.live,
6923       "MIGRATE_CLEANUP": self.op.cleanup,
6924       "OLD_PRIMARY": source_node,
6925       "NEW_PRIMARY": target_node,
6926       })
6927
6928     if instance.disk_template in constants.DTS_INT_MIRROR:
6929       env["OLD_SECONDARY"] = target_node
6930       env["NEW_SECONDARY"] = source_node
6931     else:
6932       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6933
6934     return env
6935
6936   def BuildHooksNodes(self):
6937     """Build hooks nodes.
6938
6939     """
6940     instance = self._migrater.instance
6941     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6942     return (nl, nl + [instance.primary_node])
6943
6944
6945 class LUInstanceMove(LogicalUnit):
6946   """Move an instance by data-copying.
6947
6948   """
6949   HPATH = "instance-move"
6950   HTYPE = constants.HTYPE_INSTANCE
6951   REQ_BGL = False
6952
6953   def ExpandNames(self):
6954     self._ExpandAndLockInstance()
6955     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6956     self.op.target_node = target_node
6957     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6958     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6959
6960   def DeclareLocks(self, level):
6961     if level == locking.LEVEL_NODE:
6962       self._LockInstancesNodes(primary_only=True)
6963
6964   def BuildHooksEnv(self):
6965     """Build hooks env.
6966
6967     This runs on master, primary and secondary nodes of the instance.
6968
6969     """
6970     env = {
6971       "TARGET_NODE": self.op.target_node,
6972       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6973       }
6974     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6975     return env
6976
6977   def BuildHooksNodes(self):
6978     """Build hooks nodes.
6979
6980     """
6981     nl = [
6982       self.cfg.GetMasterNode(),
6983       self.instance.primary_node,
6984       self.op.target_node,
6985       ]
6986     return (nl, nl)
6987
6988   def CheckPrereq(self):
6989     """Check prerequisites.
6990
6991     This checks that the instance is in the cluster.
6992
6993     """
6994     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6995     assert self.instance is not None, \
6996       "Cannot retrieve locked instance %s" % self.op.instance_name
6997
6998     node = self.cfg.GetNodeInfo(self.op.target_node)
6999     assert node is not None, \
7000       "Cannot retrieve locked node %s" % self.op.target_node
7001
7002     self.target_node = target_node = node.name
7003
7004     if target_node == instance.primary_node:
7005       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7006                                  (instance.name, target_node),
7007                                  errors.ECODE_STATE)
7008
7009     bep = self.cfg.GetClusterInfo().FillBE(instance)
7010
7011     for idx, dsk in enumerate(instance.disks):
7012       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7013         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7014                                    " cannot copy" % idx, errors.ECODE_STATE)
7015
7016     _CheckNodeOnline(self, target_node)
7017     _CheckNodeNotDrained(self, target_node)
7018     _CheckNodeVmCapable(self, target_node)
7019
7020     if instance.admin_up:
7021       # check memory requirements on the secondary node
7022       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7023                            instance.name, bep[constants.BE_MEMORY],
7024                            instance.hypervisor)
7025     else:
7026       self.LogInfo("Not checking memory on the secondary node as"
7027                    " instance will not be started")
7028
7029     # check bridge existance
7030     _CheckInstanceBridgesExist(self, instance, node=target_node)
7031
7032   def Exec(self, feedback_fn):
7033     """Move an instance.
7034
7035     The move is done by shutting it down on its present node, copying
7036     the data over (slow) and starting it on the new node.
7037
7038     """
7039     instance = self.instance
7040
7041     source_node = instance.primary_node
7042     target_node = self.target_node
7043
7044     self.LogInfo("Shutting down instance %s on source node %s",
7045                  instance.name, source_node)
7046
7047     result = self.rpc.call_instance_shutdown(source_node, instance,
7048                                              self.op.shutdown_timeout)
7049     msg = result.fail_msg
7050     if msg:
7051       if self.op.ignore_consistency:
7052         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7053                              " Proceeding anyway. Please make sure node"
7054                              " %s is down. Error details: %s",
7055                              instance.name, source_node, source_node, msg)
7056       else:
7057         raise errors.OpExecError("Could not shutdown instance %s on"
7058                                  " node %s: %s" %
7059                                  (instance.name, source_node, msg))
7060
7061     # create the target disks
7062     try:
7063       _CreateDisks(self, instance, target_node=target_node)
7064     except errors.OpExecError:
7065       self.LogWarning("Device creation failed, reverting...")
7066       try:
7067         _RemoveDisks(self, instance, target_node=target_node)
7068       finally:
7069         self.cfg.ReleaseDRBDMinors(instance.name)
7070         raise
7071
7072     cluster_name = self.cfg.GetClusterInfo().cluster_name
7073
7074     errs = []
7075     # activate, get path, copy the data over
7076     for idx, disk in enumerate(instance.disks):
7077       self.LogInfo("Copying data for disk %d", idx)
7078       result = self.rpc.call_blockdev_assemble(target_node, disk,
7079                                                instance.name, True, idx)
7080       if result.fail_msg:
7081         self.LogWarning("Can't assemble newly created disk %d: %s",
7082                         idx, result.fail_msg)
7083         errs.append(result.fail_msg)
7084         break
7085       dev_path = result.payload
7086       result = self.rpc.call_blockdev_export(source_node, disk,
7087                                              target_node, dev_path,
7088                                              cluster_name)
7089       if result.fail_msg:
7090         self.LogWarning("Can't copy data over for disk %d: %s",
7091                         idx, result.fail_msg)
7092         errs.append(result.fail_msg)
7093         break
7094
7095     if errs:
7096       self.LogWarning("Some disks failed to copy, aborting")
7097       try:
7098         _RemoveDisks(self, instance, target_node=target_node)
7099       finally:
7100         self.cfg.ReleaseDRBDMinors(instance.name)
7101         raise errors.OpExecError("Errors during disk copy: %s" %
7102                                  (",".join(errs),))
7103
7104     instance.primary_node = target_node
7105     self.cfg.Update(instance, feedback_fn)
7106
7107     self.LogInfo("Removing the disks on the original node")
7108     _RemoveDisks(self, instance, target_node=source_node)
7109
7110     # Only start the instance if it's marked as up
7111     if instance.admin_up:
7112       self.LogInfo("Starting instance %s on node %s",
7113                    instance.name, target_node)
7114
7115       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7116                                            ignore_secondaries=True)
7117       if not disks_ok:
7118         _ShutdownInstanceDisks(self, instance)
7119         raise errors.OpExecError("Can't activate the instance's disks")
7120
7121       result = self.rpc.call_instance_start(target_node,
7122                                             (instance, None, None), False)
7123       msg = result.fail_msg
7124       if msg:
7125         _ShutdownInstanceDisks(self, instance)
7126         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7127                                  (instance.name, target_node, msg))
7128
7129
7130 class LUNodeMigrate(LogicalUnit):
7131   """Migrate all instances from a node.
7132
7133   """
7134   HPATH = "node-migrate"
7135   HTYPE = constants.HTYPE_NODE
7136   REQ_BGL = False
7137
7138   def CheckArguments(self):
7139     pass
7140
7141   def ExpandNames(self):
7142     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7143
7144     self.share_locks = _ShareAll()
7145     self.needed_locks = {
7146       locking.LEVEL_NODE: [self.op.node_name],
7147       }
7148
7149   def BuildHooksEnv(self):
7150     """Build hooks env.
7151
7152     This runs on the master, the primary and all the secondaries.
7153
7154     """
7155     return {
7156       "NODE_NAME": self.op.node_name,
7157       }
7158
7159   def BuildHooksNodes(self):
7160     """Build hooks nodes.
7161
7162     """
7163     nl = [self.cfg.GetMasterNode()]
7164     return (nl, nl)
7165
7166   def CheckPrereq(self):
7167     pass
7168
7169   def Exec(self, feedback_fn):
7170     # Prepare jobs for migration instances
7171     jobs = [
7172       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7173                                  mode=self.op.mode,
7174                                  live=self.op.live,
7175                                  iallocator=self.op.iallocator,
7176                                  target_node=self.op.target_node)]
7177       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7178       ]
7179
7180     # TODO: Run iallocator in this opcode and pass correct placement options to
7181     # OpInstanceMigrate. Since other jobs can modify the cluster between
7182     # running the iallocator and the actual migration, a good consistency model
7183     # will have to be found.
7184
7185     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7186             frozenset([self.op.node_name]))
7187
7188     return ResultWithJobs(jobs)
7189
7190
7191 class TLMigrateInstance(Tasklet):
7192   """Tasklet class for instance migration.
7193
7194   @type live: boolean
7195   @ivar live: whether the migration will be done live or non-live;
7196       this variable is initalized only after CheckPrereq has run
7197   @type cleanup: boolean
7198   @ivar cleanup: Wheater we cleanup from a failed migration
7199   @type iallocator: string
7200   @ivar iallocator: The iallocator used to determine target_node
7201   @type target_node: string
7202   @ivar target_node: If given, the target_node to reallocate the instance to
7203   @type failover: boolean
7204   @ivar failover: Whether operation results in failover or migration
7205   @type fallback: boolean
7206   @ivar fallback: Whether fallback to failover is allowed if migration not
7207                   possible
7208   @type ignore_consistency: boolean
7209   @ivar ignore_consistency: Wheter we should ignore consistency between source
7210                             and target node
7211   @type shutdown_timeout: int
7212   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7213
7214   """
7215
7216   # Constants
7217   _MIGRATION_POLL_INTERVAL = 1      # seconds
7218   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7219
7220   def __init__(self, lu, instance_name, cleanup=False,
7221                failover=False, fallback=False,
7222                ignore_consistency=False,
7223                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7224     """Initializes this class.
7225
7226     """
7227     Tasklet.__init__(self, lu)
7228
7229     # Parameters
7230     self.instance_name = instance_name
7231     self.cleanup = cleanup
7232     self.live = False # will be overridden later
7233     self.failover = failover
7234     self.fallback = fallback
7235     self.ignore_consistency = ignore_consistency
7236     self.shutdown_timeout = shutdown_timeout
7237
7238   def CheckPrereq(self):
7239     """Check prerequisites.
7240
7241     This checks that the instance is in the cluster.
7242
7243     """
7244     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7245     instance = self.cfg.GetInstanceInfo(instance_name)
7246     assert instance is not None
7247     self.instance = instance
7248
7249     if (not self.cleanup and not instance.admin_up and not self.failover and
7250         self.fallback):
7251       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7252                       " to failover")
7253       self.failover = True
7254
7255     if instance.disk_template not in constants.DTS_MIRRORED:
7256       if self.failover:
7257         text = "failovers"
7258       else:
7259         text = "migrations"
7260       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7261                                  " %s" % (instance.disk_template, text),
7262                                  errors.ECODE_STATE)
7263
7264     if instance.disk_template in constants.DTS_EXT_MIRROR:
7265       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7266
7267       if self.lu.op.iallocator:
7268         self._RunAllocator()
7269       else:
7270         # We set set self.target_node as it is required by
7271         # BuildHooksEnv
7272         self.target_node = self.lu.op.target_node
7273
7274       # self.target_node is already populated, either directly or by the
7275       # iallocator run
7276       target_node = self.target_node
7277       if self.target_node == instance.primary_node:
7278         raise errors.OpPrereqError("Cannot migrate instance %s"
7279                                    " to its primary (%s)" %
7280                                    (instance.name, instance.primary_node))
7281
7282       if len(self.lu.tasklets) == 1:
7283         # It is safe to release locks only when we're the only tasklet
7284         # in the LU
7285         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7286                       keep=[instance.primary_node, self.target_node])
7287
7288     else:
7289       secondary_nodes = instance.secondary_nodes
7290       if not secondary_nodes:
7291         raise errors.ConfigurationError("No secondary node but using"
7292                                         " %s disk template" %
7293                                         instance.disk_template)
7294       target_node = secondary_nodes[0]
7295       if self.lu.op.iallocator or (self.lu.op.target_node and
7296                                    self.lu.op.target_node != target_node):
7297         if self.failover:
7298           text = "failed over"
7299         else:
7300           text = "migrated"
7301         raise errors.OpPrereqError("Instances with disk template %s cannot"
7302                                    " be %s to arbitrary nodes"
7303                                    " (neither an iallocator nor a target"
7304                                    " node can be passed)" %
7305                                    (instance.disk_template, text),
7306                                    errors.ECODE_INVAL)
7307
7308     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7309
7310     # check memory requirements on the secondary node
7311     if not self.failover or instance.admin_up:
7312       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7313                            instance.name, i_be[constants.BE_MEMORY],
7314                            instance.hypervisor)
7315     else:
7316       self.lu.LogInfo("Not checking memory on the secondary node as"
7317                       " instance will not be started")
7318
7319     # check bridge existance
7320     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7321
7322     if not self.cleanup:
7323       _CheckNodeNotDrained(self.lu, target_node)
7324       if not self.failover:
7325         result = self.rpc.call_instance_migratable(instance.primary_node,
7326                                                    instance)
7327         if result.fail_msg and self.fallback:
7328           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7329                           " failover")
7330           self.failover = True
7331         else:
7332           result.Raise("Can't migrate, please use failover",
7333                        prereq=True, ecode=errors.ECODE_STATE)
7334
7335     assert not (self.failover and self.cleanup)
7336
7337     if not self.failover:
7338       if self.lu.op.live is not None and self.lu.op.mode is not None:
7339         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7340                                    " parameters are accepted",
7341                                    errors.ECODE_INVAL)
7342       if self.lu.op.live is not None:
7343         if self.lu.op.live:
7344           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7345         else:
7346           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7347         # reset the 'live' parameter to None so that repeated
7348         # invocations of CheckPrereq do not raise an exception
7349         self.lu.op.live = None
7350       elif self.lu.op.mode is None:
7351         # read the default value from the hypervisor
7352         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7353                                                 skip_globals=False)
7354         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7355
7356       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7357     else:
7358       # Failover is never live
7359       self.live = False
7360
7361   def _RunAllocator(self):
7362     """Run the allocator based on input opcode.
7363
7364     """
7365     ial = IAllocator(self.cfg, self.rpc,
7366                      mode=constants.IALLOCATOR_MODE_RELOC,
7367                      name=self.instance_name,
7368                      # TODO See why hail breaks with a single node below
7369                      relocate_from=[self.instance.primary_node,
7370                                     self.instance.primary_node],
7371                      )
7372
7373     ial.Run(self.lu.op.iallocator)
7374
7375     if not ial.success:
7376       raise errors.OpPrereqError("Can't compute nodes using"
7377                                  " iallocator '%s': %s" %
7378                                  (self.lu.op.iallocator, ial.info),
7379                                  errors.ECODE_NORES)
7380     if len(ial.result) != ial.required_nodes:
7381       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7382                                  " of nodes (%s), required %s" %
7383                                  (self.lu.op.iallocator, len(ial.result),
7384                                   ial.required_nodes), errors.ECODE_FAULT)
7385     self.target_node = ial.result[0]
7386     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7387                  self.instance_name, self.lu.op.iallocator,
7388                  utils.CommaJoin(ial.result))
7389
7390   def _WaitUntilSync(self):
7391     """Poll with custom rpc for disk sync.
7392
7393     This uses our own step-based rpc call.
7394
7395     """
7396     self.feedback_fn("* wait until resync is done")
7397     all_done = False
7398     while not all_done:
7399       all_done = True
7400       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7401                                             self.nodes_ip,
7402                                             self.instance.disks)
7403       min_percent = 100
7404       for node, nres in result.items():
7405         nres.Raise("Cannot resync disks on node %s" % node)
7406         node_done, node_percent = nres.payload
7407         all_done = all_done and node_done
7408         if node_percent is not None:
7409           min_percent = min(min_percent, node_percent)
7410       if not all_done:
7411         if min_percent < 100:
7412           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7413         time.sleep(2)
7414
7415   def _EnsureSecondary(self, node):
7416     """Demote a node to secondary.
7417
7418     """
7419     self.feedback_fn("* switching node %s to secondary mode" % node)
7420
7421     for dev in self.instance.disks:
7422       self.cfg.SetDiskID(dev, node)
7423
7424     result = self.rpc.call_blockdev_close(node, self.instance.name,
7425                                           self.instance.disks)
7426     result.Raise("Cannot change disk to secondary on node %s" % node)
7427
7428   def _GoStandalone(self):
7429     """Disconnect from the network.
7430
7431     """
7432     self.feedback_fn("* changing into standalone mode")
7433     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7434                                                self.instance.disks)
7435     for node, nres in result.items():
7436       nres.Raise("Cannot disconnect disks node %s" % node)
7437
7438   def _GoReconnect(self, multimaster):
7439     """Reconnect to the network.
7440
7441     """
7442     if multimaster:
7443       msg = "dual-master"
7444     else:
7445       msg = "single-master"
7446     self.feedback_fn("* changing disks into %s mode" % msg)
7447     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7448                                            self.instance.disks,
7449                                            self.instance.name, multimaster)
7450     for node, nres in result.items():
7451       nres.Raise("Cannot change disks config on node %s" % node)
7452
7453   def _ExecCleanup(self):
7454     """Try to cleanup after a failed migration.
7455
7456     The cleanup is done by:
7457       - check that the instance is running only on one node
7458         (and update the config if needed)
7459       - change disks on its secondary node to secondary
7460       - wait until disks are fully synchronized
7461       - disconnect from the network
7462       - change disks into single-master mode
7463       - wait again until disks are fully synchronized
7464
7465     """
7466     instance = self.instance
7467     target_node = self.target_node
7468     source_node = self.source_node
7469
7470     # check running on only one node
7471     self.feedback_fn("* checking where the instance actually runs"
7472                      " (if this hangs, the hypervisor might be in"
7473                      " a bad state)")
7474     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7475     for node, result in ins_l.items():
7476       result.Raise("Can't contact node %s" % node)
7477
7478     runningon_source = instance.name in ins_l[source_node].payload
7479     runningon_target = instance.name in ins_l[target_node].payload
7480
7481     if runningon_source and runningon_target:
7482       raise errors.OpExecError("Instance seems to be running on two nodes,"
7483                                " or the hypervisor is confused; you will have"
7484                                " to ensure manually that it runs only on one"
7485                                " and restart this operation")
7486
7487     if not (runningon_source or runningon_target):
7488       raise errors.OpExecError("Instance does not seem to be running at all;"
7489                                " in this case it's safer to repair by"
7490                                " running 'gnt-instance stop' to ensure disk"
7491                                " shutdown, and then restarting it")
7492
7493     if runningon_target:
7494       # the migration has actually succeeded, we need to update the config
7495       self.feedback_fn("* instance running on secondary node (%s),"
7496                        " updating config" % target_node)
7497       instance.primary_node = target_node
7498       self.cfg.Update(instance, self.feedback_fn)
7499       demoted_node = source_node
7500     else:
7501       self.feedback_fn("* instance confirmed to be running on its"
7502                        " primary node (%s)" % source_node)
7503       demoted_node = target_node
7504
7505     if instance.disk_template in constants.DTS_INT_MIRROR:
7506       self._EnsureSecondary(demoted_node)
7507       try:
7508         self._WaitUntilSync()
7509       except errors.OpExecError:
7510         # we ignore here errors, since if the device is standalone, it
7511         # won't be able to sync
7512         pass
7513       self._GoStandalone()
7514       self._GoReconnect(False)
7515       self._WaitUntilSync()
7516
7517     self.feedback_fn("* done")
7518
7519   def _RevertDiskStatus(self):
7520     """Try to revert the disk status after a failed migration.
7521
7522     """
7523     target_node = self.target_node
7524     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7525       return
7526
7527     try:
7528       self._EnsureSecondary(target_node)
7529       self._GoStandalone()
7530       self._GoReconnect(False)
7531       self._WaitUntilSync()
7532     except errors.OpExecError, err:
7533       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7534                          " please try to recover the instance manually;"
7535                          " error '%s'" % str(err))
7536
7537   def _AbortMigration(self):
7538     """Call the hypervisor code to abort a started migration.
7539
7540     """
7541     instance = self.instance
7542     target_node = self.target_node
7543     source_node = self.source_node
7544     migration_info = self.migration_info
7545
7546     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7547                                                                  instance,
7548                                                                  migration_info,
7549                                                                  False)
7550     abort_msg = abort_result.fail_msg
7551     if abort_msg:
7552       logging.error("Aborting migration failed on target node %s: %s",
7553                     target_node, abort_msg)
7554       # Don't raise an exception here, as we stil have to try to revert the
7555       # disk status, even if this step failed.
7556
7557     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7558         instance, False, self.live)
7559     abort_msg = abort_result.fail_msg
7560     if abort_msg:
7561       logging.error("Aborting migration failed on source node %s: %s",
7562                     source_node, abort_msg)
7563
7564   def _ExecMigration(self):
7565     """Migrate an instance.
7566
7567     The migrate is done by:
7568       - change the disks into dual-master mode
7569       - wait until disks are fully synchronized again
7570       - migrate the instance
7571       - change disks on the new secondary node (the old primary) to secondary
7572       - wait until disks are fully synchronized
7573       - change disks into single-master mode
7574
7575     """
7576     instance = self.instance
7577     target_node = self.target_node
7578     source_node = self.source_node
7579
7580     # Check for hypervisor version mismatch and warn the user.
7581     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7582                                        None, self.instance.hypervisor)
7583     src_info = nodeinfo[source_node]
7584     dst_info = nodeinfo[target_node]
7585
7586     if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7587         (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7588       src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7589       dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7590       if src_version != dst_version:
7591         self.feedback_fn("* warning: hypervisor version mismatch between"
7592                          " source (%s) and target (%s) node" %
7593                          (src_version, dst_version))
7594
7595     self.feedback_fn("* checking disk consistency between source and target")
7596     for dev in instance.disks:
7597       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7598         raise errors.OpExecError("Disk %s is degraded or not fully"
7599                                  " synchronized on target node,"
7600                                  " aborting migration" % dev.iv_name)
7601
7602     # First get the migration information from the remote node
7603     result = self.rpc.call_migration_info(source_node, instance)
7604     msg = result.fail_msg
7605     if msg:
7606       log_err = ("Failed fetching source migration information from %s: %s" %
7607                  (source_node, msg))
7608       logging.error(log_err)
7609       raise errors.OpExecError(log_err)
7610
7611     self.migration_info = migration_info = result.payload
7612
7613     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7614       # Then switch the disks to master/master mode
7615       self._EnsureSecondary(target_node)
7616       self._GoStandalone()
7617       self._GoReconnect(True)
7618       self._WaitUntilSync()
7619
7620     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7621     result = self.rpc.call_accept_instance(target_node,
7622                                            instance,
7623                                            migration_info,
7624                                            self.nodes_ip[target_node])
7625
7626     msg = result.fail_msg
7627     if msg:
7628       logging.error("Instance pre-migration failed, trying to revert"
7629                     " disk status: %s", msg)
7630       self.feedback_fn("Pre-migration failed, aborting")
7631       self._AbortMigration()
7632       self._RevertDiskStatus()
7633       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7634                                (instance.name, msg))
7635
7636     self.feedback_fn("* migrating instance to %s" % target_node)
7637     result = self.rpc.call_instance_migrate(source_node, instance,
7638                                             self.nodes_ip[target_node],
7639                                             self.live)
7640     msg = result.fail_msg
7641     if msg:
7642       logging.error("Instance migration failed, trying to revert"
7643                     " disk status: %s", msg)
7644       self.feedback_fn("Migration failed, aborting")
7645       self._AbortMigration()
7646       self._RevertDiskStatus()
7647       raise errors.OpExecError("Could not migrate instance %s: %s" %
7648                                (instance.name, msg))
7649
7650     self.feedback_fn("* starting memory transfer")
7651     last_feedback = time.time()
7652     while True:
7653       result = self.rpc.call_instance_get_migration_status(source_node,
7654                                                            instance)
7655       msg = result.fail_msg
7656       ms = result.payload   # MigrationStatus instance
7657       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7658         logging.error("Instance migration failed, trying to revert"
7659                       " disk status: %s", msg)
7660         self.feedback_fn("Migration failed, aborting")
7661         self._AbortMigration()
7662         self._RevertDiskStatus()
7663         raise errors.OpExecError("Could not migrate instance %s: %s" %
7664                                  (instance.name, msg))
7665
7666       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7667         self.feedback_fn("* memory transfer complete")
7668         break
7669
7670       if (utils.TimeoutExpired(last_feedback,
7671                                self._MIGRATION_FEEDBACK_INTERVAL) and
7672           ms.transferred_ram is not None):
7673         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7674         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7675         last_feedback = time.time()
7676
7677       time.sleep(self._MIGRATION_POLL_INTERVAL)
7678
7679     result = self.rpc.call_instance_finalize_migration_src(source_node,
7680                                                            instance,
7681                                                            True,
7682                                                            self.live)
7683     msg = result.fail_msg
7684     if msg:
7685       logging.error("Instance migration succeeded, but finalization failed"
7686                     " on the source node: %s", msg)
7687       raise errors.OpExecError("Could not finalize instance migration: %s" %
7688                                msg)
7689
7690     instance.primary_node = target_node
7691
7692     # distribute new instance config to the other nodes
7693     self.cfg.Update(instance, self.feedback_fn)
7694
7695     result = self.rpc.call_instance_finalize_migration_dst(target_node,
7696                                                            instance,
7697                                                            migration_info,
7698                                                            True)
7699     msg = result.fail_msg
7700     if msg:
7701       logging.error("Instance migration succeeded, but finalization failed"
7702                     " on the target node: %s", msg)
7703       raise errors.OpExecError("Could not finalize instance migration: %s" %
7704                                msg)
7705
7706     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7707       self._EnsureSecondary(source_node)
7708       self._WaitUntilSync()
7709       self._GoStandalone()
7710       self._GoReconnect(False)
7711       self._WaitUntilSync()
7712
7713     self.feedback_fn("* done")
7714
7715   def _ExecFailover(self):
7716     """Failover an instance.
7717
7718     The failover is done by shutting it down on its present node and
7719     starting it on the secondary.
7720
7721     """
7722     instance = self.instance
7723     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7724
7725     source_node = instance.primary_node
7726     target_node = self.target_node
7727
7728     if instance.admin_up:
7729       self.feedback_fn("* checking disk consistency between source and target")
7730       for dev in instance.disks:
7731         # for drbd, these are drbd over lvm
7732         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7733           if primary_node.offline:
7734             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7735                              " target node %s" %
7736                              (primary_node.name, dev.iv_name, target_node))
7737           elif not self.ignore_consistency:
7738             raise errors.OpExecError("Disk %s is degraded on target node,"
7739                                      " aborting failover" % dev.iv_name)
7740     else:
7741       self.feedback_fn("* not checking disk consistency as instance is not"
7742                        " running")
7743
7744     self.feedback_fn("* shutting down instance on source node")
7745     logging.info("Shutting down instance %s on node %s",
7746                  instance.name, source_node)
7747
7748     result = self.rpc.call_instance_shutdown(source_node, instance,
7749                                              self.shutdown_timeout)
7750     msg = result.fail_msg
7751     if msg:
7752       if self.ignore_consistency or primary_node.offline:
7753         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7754                            " proceeding anyway; please make sure node"
7755                            " %s is down; error details: %s",
7756                            instance.name, source_node, source_node, msg)
7757       else:
7758         raise errors.OpExecError("Could not shutdown instance %s on"
7759                                  " node %s: %s" %
7760                                  (instance.name, source_node, msg))
7761
7762     self.feedback_fn("* deactivating the instance's disks on source node")
7763     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7764       raise errors.OpExecError("Can't shut down the instance's disks")
7765
7766     instance.primary_node = target_node
7767     # distribute new instance config to the other nodes
7768     self.cfg.Update(instance, self.feedback_fn)
7769
7770     # Only start the instance if it's marked as up
7771     if instance.admin_up:
7772       self.feedback_fn("* activating the instance's disks on target node %s" %
7773                        target_node)
7774       logging.info("Starting instance %s on node %s",
7775                    instance.name, target_node)
7776
7777       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7778                                            ignore_secondaries=True)
7779       if not disks_ok:
7780         _ShutdownInstanceDisks(self.lu, instance)
7781         raise errors.OpExecError("Can't activate the instance's disks")
7782
7783       self.feedback_fn("* starting the instance on the target node %s" %
7784                        target_node)
7785       result = self.rpc.call_instance_start(target_node, (instance, None, None),
7786                                             False)
7787       msg = result.fail_msg
7788       if msg:
7789         _ShutdownInstanceDisks(self.lu, instance)
7790         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7791                                  (instance.name, target_node, msg))
7792
7793   def Exec(self, feedback_fn):
7794     """Perform the migration.
7795
7796     """
7797     self.feedback_fn = feedback_fn
7798     self.source_node = self.instance.primary_node
7799
7800     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7801     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7802       self.target_node = self.instance.secondary_nodes[0]
7803       # Otherwise self.target_node has been populated either
7804       # directly, or through an iallocator.
7805
7806     self.all_nodes = [self.source_node, self.target_node]
7807     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7808                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7809
7810     if self.failover:
7811       feedback_fn("Failover instance %s" % self.instance.name)
7812       self._ExecFailover()
7813     else:
7814       feedback_fn("Migrating instance %s" % self.instance.name)
7815
7816       if self.cleanup:
7817         return self._ExecCleanup()
7818       else:
7819         return self._ExecMigration()
7820
7821
7822 def _CreateBlockDev(lu, node, instance, device, force_create,
7823                     info, force_open):
7824   """Create a tree of block devices on a given node.
7825
7826   If this device type has to be created on secondaries, create it and
7827   all its children.
7828
7829   If not, just recurse to children keeping the same 'force' value.
7830
7831   @param lu: the lu on whose behalf we execute
7832   @param node: the node on which to create the device
7833   @type instance: L{objects.Instance}
7834   @param instance: the instance which owns the device
7835   @type device: L{objects.Disk}
7836   @param device: the device to create
7837   @type force_create: boolean
7838   @param force_create: whether to force creation of this device; this
7839       will be change to True whenever we find a device which has
7840       CreateOnSecondary() attribute
7841   @param info: the extra 'metadata' we should attach to the device
7842       (this will be represented as a LVM tag)
7843   @type force_open: boolean
7844   @param force_open: this parameter will be passes to the
7845       L{backend.BlockdevCreate} function where it specifies
7846       whether we run on primary or not, and it affects both
7847       the child assembly and the device own Open() execution
7848
7849   """
7850   if device.CreateOnSecondary():
7851     force_create = True
7852
7853   if device.children:
7854     for child in device.children:
7855       _CreateBlockDev(lu, node, instance, child, force_create,
7856                       info, force_open)
7857
7858   if not force_create:
7859     return
7860
7861   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7862
7863
7864 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7865   """Create a single block device on a given node.
7866
7867   This will not recurse over children of the device, so they must be
7868   created in advance.
7869
7870   @param lu: the lu on whose behalf we execute
7871   @param node: the node on which to create the device
7872   @type instance: L{objects.Instance}
7873   @param instance: the instance which owns the device
7874   @type device: L{objects.Disk}
7875   @param device: the device to create
7876   @param info: the extra 'metadata' we should attach to the device
7877       (this will be represented as a LVM tag)
7878   @type force_open: boolean
7879   @param force_open: this parameter will be passes to the
7880       L{backend.BlockdevCreate} function where it specifies
7881       whether we run on primary or not, and it affects both
7882       the child assembly and the device own Open() execution
7883
7884   """
7885   lu.cfg.SetDiskID(device, node)
7886   result = lu.rpc.call_blockdev_create(node, device, device.size,
7887                                        instance.name, force_open, info)
7888   result.Raise("Can't create block device %s on"
7889                " node %s for instance %s" % (device, node, instance.name))
7890   if device.physical_id is None:
7891     device.physical_id = result.payload
7892
7893
7894 def _GenerateUniqueNames(lu, exts):
7895   """Generate a suitable LV name.
7896
7897   This will generate a logical volume name for the given instance.
7898
7899   """
7900   results = []
7901   for val in exts:
7902     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7903     results.append("%s%s" % (new_id, val))
7904   return results
7905
7906
7907 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7908                          iv_name, p_minor, s_minor):
7909   """Generate a drbd8 device complete with its children.
7910
7911   """
7912   assert len(vgnames) == len(names) == 2
7913   port = lu.cfg.AllocatePort()
7914   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7915   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7916                           logical_id=(vgnames[0], names[0]))
7917   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7918                           logical_id=(vgnames[1], names[1]))
7919   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7920                           logical_id=(primary, secondary, port,
7921                                       p_minor, s_minor,
7922                                       shared_secret),
7923                           children=[dev_data, dev_meta],
7924                           iv_name=iv_name)
7925   return drbd_dev
7926
7927
7928 def _GenerateDiskTemplate(lu, template_name,
7929                           instance_name, primary_node,
7930                           secondary_nodes, disk_info,
7931                           file_storage_dir, file_driver,
7932                           base_index, feedback_fn):
7933   """Generate the entire disk layout for a given template type.
7934
7935   """
7936   #TODO: compute space requirements
7937
7938   vgname = lu.cfg.GetVGName()
7939   disk_count = len(disk_info)
7940   disks = []
7941   if template_name == constants.DT_DISKLESS:
7942     pass
7943   elif template_name == constants.DT_PLAIN:
7944     if len(secondary_nodes) != 0:
7945       raise errors.ProgrammerError("Wrong template configuration")
7946
7947     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7948                                       for i in range(disk_count)])
7949     for idx, disk in enumerate(disk_info):
7950       disk_index = idx + base_index
7951       vg = disk.get(constants.IDISK_VG, vgname)
7952       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7953       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7954                               size=disk[constants.IDISK_SIZE],
7955                               logical_id=(vg, names[idx]),
7956                               iv_name="disk/%d" % disk_index,
7957                               mode=disk[constants.IDISK_MODE])
7958       disks.append(disk_dev)
7959   elif template_name == constants.DT_DRBD8:
7960     if len(secondary_nodes) != 1:
7961       raise errors.ProgrammerError("Wrong template configuration")
7962     remote_node = secondary_nodes[0]
7963     minors = lu.cfg.AllocateDRBDMinor(
7964       [primary_node, remote_node] * len(disk_info), instance_name)
7965
7966     names = []
7967     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7968                                                for i in range(disk_count)]):
7969       names.append(lv_prefix + "_data")
7970       names.append(lv_prefix + "_meta")
7971     for idx, disk in enumerate(disk_info):
7972       disk_index = idx + base_index
7973       data_vg = disk.get(constants.IDISK_VG, vgname)
7974       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7975       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7976                                       disk[constants.IDISK_SIZE],
7977                                       [data_vg, meta_vg],
7978                                       names[idx * 2:idx * 2 + 2],
7979                                       "disk/%d" % disk_index,
7980                                       minors[idx * 2], minors[idx * 2 + 1])
7981       disk_dev.mode = disk[constants.IDISK_MODE]
7982       disks.append(disk_dev)
7983   elif template_name == constants.DT_FILE:
7984     if len(secondary_nodes) != 0:
7985       raise errors.ProgrammerError("Wrong template configuration")
7986
7987     opcodes.RequireFileStorage()
7988
7989     for idx, disk in enumerate(disk_info):
7990       disk_index = idx + base_index
7991       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7992                               size=disk[constants.IDISK_SIZE],
7993                               iv_name="disk/%d" % disk_index,
7994                               logical_id=(file_driver,
7995                                           "%s/disk%d" % (file_storage_dir,
7996                                                          disk_index)),
7997                               mode=disk[constants.IDISK_MODE])
7998       disks.append(disk_dev)
7999   elif template_name == constants.DT_SHARED_FILE:
8000     if len(secondary_nodes) != 0:
8001       raise errors.ProgrammerError("Wrong template configuration")
8002
8003     opcodes.RequireSharedFileStorage()
8004
8005     for idx, disk in enumerate(disk_info):
8006       disk_index = idx + base_index
8007       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8008                               size=disk[constants.IDISK_SIZE],
8009                               iv_name="disk/%d" % disk_index,
8010                               logical_id=(file_driver,
8011                                           "%s/disk%d" % (file_storage_dir,
8012                                                          disk_index)),
8013                               mode=disk[constants.IDISK_MODE])
8014       disks.append(disk_dev)
8015   elif template_name == constants.DT_BLOCK:
8016     if len(secondary_nodes) != 0:
8017       raise errors.ProgrammerError("Wrong template configuration")
8018
8019     for idx, disk in enumerate(disk_info):
8020       disk_index = idx + base_index
8021       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8022                               size=disk[constants.IDISK_SIZE],
8023                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8024                                           disk[constants.IDISK_ADOPT]),
8025                               iv_name="disk/%d" % disk_index,
8026                               mode=disk[constants.IDISK_MODE])
8027       disks.append(disk_dev)
8028
8029   else:
8030     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8031   return disks
8032
8033
8034 def _GetInstanceInfoText(instance):
8035   """Compute that text that should be added to the disk's metadata.
8036
8037   """
8038   return "originstname+%s" % instance.name
8039
8040
8041 def _CalcEta(time_taken, written, total_size):
8042   """Calculates the ETA based on size written and total size.
8043
8044   @param time_taken: The time taken so far
8045   @param written: amount written so far
8046   @param total_size: The total size of data to be written
8047   @return: The remaining time in seconds
8048
8049   """
8050   avg_time = time_taken / float(written)
8051   return (total_size - written) * avg_time
8052
8053
8054 def _WipeDisks(lu, instance):
8055   """Wipes instance disks.
8056
8057   @type lu: L{LogicalUnit}
8058   @param lu: the logical unit on whose behalf we execute
8059   @type instance: L{objects.Instance}
8060   @param instance: the instance whose disks we should create
8061   @return: the success of the wipe
8062
8063   """
8064   node = instance.primary_node
8065
8066   for device in instance.disks:
8067     lu.cfg.SetDiskID(device, node)
8068
8069   logging.info("Pause sync of instance %s disks", instance.name)
8070   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8071
8072   for idx, success in enumerate(result.payload):
8073     if not success:
8074       logging.warn("pause-sync of instance %s for disks %d failed",
8075                    instance.name, idx)
8076
8077   try:
8078     for idx, device in enumerate(instance.disks):
8079       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8080       # MAX_WIPE_CHUNK at max
8081       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8082                             constants.MIN_WIPE_CHUNK_PERCENT)
8083       # we _must_ make this an int, otherwise rounding errors will
8084       # occur
8085       wipe_chunk_size = int(wipe_chunk_size)
8086
8087       lu.LogInfo("* Wiping disk %d", idx)
8088       logging.info("Wiping disk %d for instance %s, node %s using"
8089                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8090
8091       offset = 0
8092       size = device.size
8093       last_output = 0
8094       start_time = time.time()
8095
8096       while offset < size:
8097         wipe_size = min(wipe_chunk_size, size - offset)
8098         logging.debug("Wiping disk %d, offset %s, chunk %s",
8099                       idx, offset, wipe_size)
8100         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8101         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8102                      (idx, offset, wipe_size))
8103         now = time.time()
8104         offset += wipe_size
8105         if now - last_output >= 60:
8106           eta = _CalcEta(now - start_time, offset, size)
8107           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8108                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8109           last_output = now
8110   finally:
8111     logging.info("Resume sync of instance %s disks", instance.name)
8112
8113     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8114
8115     for idx, success in enumerate(result.payload):
8116       if not success:
8117         lu.LogWarning("Resume sync of disk %d failed, please have a"
8118                       " look at the status and troubleshoot the issue", idx)
8119         logging.warn("resume-sync of instance %s for disks %d failed",
8120                      instance.name, idx)
8121
8122
8123 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8124   """Create all disks for an instance.
8125
8126   This abstracts away some work from AddInstance.
8127
8128   @type lu: L{LogicalUnit}
8129   @param lu: the logical unit on whose behalf we execute
8130   @type instance: L{objects.Instance}
8131   @param instance: the instance whose disks we should create
8132   @type to_skip: list
8133   @param to_skip: list of indices to skip
8134   @type target_node: string
8135   @param target_node: if passed, overrides the target node for creation
8136   @rtype: boolean
8137   @return: the success of the creation
8138
8139   """
8140   info = _GetInstanceInfoText(instance)
8141   if target_node is None:
8142     pnode = instance.primary_node
8143     all_nodes = instance.all_nodes
8144   else:
8145     pnode = target_node
8146     all_nodes = [pnode]
8147
8148   if instance.disk_template in constants.DTS_FILEBASED:
8149     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8150     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8151
8152     result.Raise("Failed to create directory '%s' on"
8153                  " node %s" % (file_storage_dir, pnode))
8154
8155   # Note: this needs to be kept in sync with adding of disks in
8156   # LUInstanceSetParams
8157   for idx, device in enumerate(instance.disks):
8158     if to_skip and idx in to_skip:
8159       continue
8160     logging.info("Creating volume %s for instance %s",
8161                  device.iv_name, instance.name)
8162     #HARDCODE
8163     for node in all_nodes:
8164       f_create = node == pnode
8165       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8166
8167
8168 def _RemoveDisks(lu, instance, target_node=None):
8169   """Remove all disks for an instance.
8170
8171   This abstracts away some work from `AddInstance()` and
8172   `RemoveInstance()`. Note that in case some of the devices couldn't
8173   be removed, the removal will continue with the other ones (compare
8174   with `_CreateDisks()`).
8175
8176   @type lu: L{LogicalUnit}
8177   @param lu: the logical unit on whose behalf we execute
8178   @type instance: L{objects.Instance}
8179   @param instance: the instance whose disks we should remove
8180   @type target_node: string
8181   @param target_node: used to override the node on which to remove the disks
8182   @rtype: boolean
8183   @return: the success of the removal
8184
8185   """
8186   logging.info("Removing block devices for instance %s", instance.name)
8187
8188   all_result = True
8189   for device in instance.disks:
8190     if target_node:
8191       edata = [(target_node, device)]
8192     else:
8193       edata = device.ComputeNodeTree(instance.primary_node)
8194     for node, disk in edata:
8195       lu.cfg.SetDiskID(disk, node)
8196       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8197       if msg:
8198         lu.LogWarning("Could not remove block device %s on node %s,"
8199                       " continuing anyway: %s", device.iv_name, node, msg)
8200         all_result = False
8201
8202   if instance.disk_template == constants.DT_FILE:
8203     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8204     if target_node:
8205       tgt = target_node
8206     else:
8207       tgt = instance.primary_node
8208     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8209     if result.fail_msg:
8210       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8211                     file_storage_dir, instance.primary_node, result.fail_msg)
8212       all_result = False
8213
8214   return all_result
8215
8216
8217 def _ComputeDiskSizePerVG(disk_template, disks):
8218   """Compute disk size requirements in the volume group
8219
8220   """
8221   def _compute(disks, payload):
8222     """Universal algorithm.
8223
8224     """
8225     vgs = {}
8226     for disk in disks:
8227       vgs[disk[constants.IDISK_VG]] = \
8228         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8229
8230     return vgs
8231
8232   # Required free disk space as a function of disk and swap space
8233   req_size_dict = {
8234     constants.DT_DISKLESS: {},
8235     constants.DT_PLAIN: _compute(disks, 0),
8236     # 128 MB are added for drbd metadata for each disk
8237     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8238     constants.DT_FILE: {},
8239     constants.DT_SHARED_FILE: {},
8240   }
8241
8242   if disk_template not in req_size_dict:
8243     raise errors.ProgrammerError("Disk template '%s' size requirement"
8244                                  " is unknown" % disk_template)
8245
8246   return req_size_dict[disk_template]
8247
8248
8249 def _ComputeDiskSize(disk_template, disks):
8250   """Compute disk size requirements in the volume group
8251
8252   """
8253   # Required free disk space as a function of disk and swap space
8254   req_size_dict = {
8255     constants.DT_DISKLESS: None,
8256     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8257     # 128 MB are added for drbd metadata for each disk
8258     constants.DT_DRBD8:
8259       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8260     constants.DT_FILE: None,
8261     constants.DT_SHARED_FILE: 0,
8262     constants.DT_BLOCK: 0,
8263   }
8264
8265   if disk_template not in req_size_dict:
8266     raise errors.ProgrammerError("Disk template '%s' size requirement"
8267                                  " is unknown" % disk_template)
8268
8269   return req_size_dict[disk_template]
8270
8271
8272 def _FilterVmNodes(lu, nodenames):
8273   """Filters out non-vm_capable nodes from a list.
8274
8275   @type lu: L{LogicalUnit}
8276   @param lu: the logical unit for which we check
8277   @type nodenames: list
8278   @param nodenames: the list of nodes on which we should check
8279   @rtype: list
8280   @return: the list of vm-capable nodes
8281
8282   """
8283   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8284   return [name for name in nodenames if name not in vm_nodes]
8285
8286
8287 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8288   """Hypervisor parameter validation.
8289
8290   This function abstract the hypervisor parameter validation to be
8291   used in both instance create and instance modify.
8292
8293   @type lu: L{LogicalUnit}
8294   @param lu: the logical unit for which we check
8295   @type nodenames: list
8296   @param nodenames: the list of nodes on which we should check
8297   @type hvname: string
8298   @param hvname: the name of the hypervisor we should use
8299   @type hvparams: dict
8300   @param hvparams: the parameters which we need to check
8301   @raise errors.OpPrereqError: if the parameters are not valid
8302
8303   """
8304   nodenames = _FilterVmNodes(lu, nodenames)
8305
8306   cluster = lu.cfg.GetClusterInfo()
8307   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8308
8309   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8310   for node in nodenames:
8311     info = hvinfo[node]
8312     if info.offline:
8313       continue
8314     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8315
8316
8317 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8318   """OS parameters validation.
8319
8320   @type lu: L{LogicalUnit}
8321   @param lu: the logical unit for which we check
8322   @type required: boolean
8323   @param required: whether the validation should fail if the OS is not
8324       found
8325   @type nodenames: list
8326   @param nodenames: the list of nodes on which we should check
8327   @type osname: string
8328   @param osname: the name of the hypervisor we should use
8329   @type osparams: dict
8330   @param osparams: the parameters which we need to check
8331   @raise errors.OpPrereqError: if the parameters are not valid
8332
8333   """
8334   nodenames = _FilterVmNodes(lu, nodenames)
8335   result = lu.rpc.call_os_validate(nodenames, required, osname,
8336                                    [constants.OS_VALIDATE_PARAMETERS],
8337                                    osparams)
8338   for node, nres in result.items():
8339     # we don't check for offline cases since this should be run only
8340     # against the master node and/or an instance's nodes
8341     nres.Raise("OS Parameters validation failed on node %s" % node)
8342     if not nres.payload:
8343       lu.LogInfo("OS %s not found on node %s, validation skipped",
8344                  osname, node)
8345
8346
8347 class LUInstanceCreate(LogicalUnit):
8348   """Create an instance.
8349
8350   """
8351   HPATH = "instance-add"
8352   HTYPE = constants.HTYPE_INSTANCE
8353   REQ_BGL = False
8354
8355   def CheckArguments(self):
8356     """Check arguments.
8357
8358     """
8359     # do not require name_check to ease forward/backward compatibility
8360     # for tools
8361     if self.op.no_install and self.op.start:
8362       self.LogInfo("No-installation mode selected, disabling startup")
8363       self.op.start = False
8364     # validate/normalize the instance name
8365     self.op.instance_name = \
8366       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8367
8368     if self.op.ip_check and not self.op.name_check:
8369       # TODO: make the ip check more flexible and not depend on the name check
8370       raise errors.OpPrereqError("Cannot do IP address check without a name"
8371                                  " check", errors.ECODE_INVAL)
8372
8373     # check nics' parameter names
8374     for nic in self.op.nics:
8375       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8376
8377     # check disks. parameter names and consistent adopt/no-adopt strategy
8378     has_adopt = has_no_adopt = False
8379     for disk in self.op.disks:
8380       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8381       if constants.IDISK_ADOPT in disk:
8382         has_adopt = True
8383       else:
8384         has_no_adopt = True
8385     if has_adopt and has_no_adopt:
8386       raise errors.OpPrereqError("Either all disks are adopted or none is",
8387                                  errors.ECODE_INVAL)
8388     if has_adopt:
8389       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8390         raise errors.OpPrereqError("Disk adoption is not supported for the"
8391                                    " '%s' disk template" %
8392                                    self.op.disk_template,
8393                                    errors.ECODE_INVAL)
8394       if self.op.iallocator is not None:
8395         raise errors.OpPrereqError("Disk adoption not allowed with an"
8396                                    " iallocator script", errors.ECODE_INVAL)
8397       if self.op.mode == constants.INSTANCE_IMPORT:
8398         raise errors.OpPrereqError("Disk adoption not allowed for"
8399                                    " instance import", errors.ECODE_INVAL)
8400     else:
8401       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8402         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8403                                    " but no 'adopt' parameter given" %
8404                                    self.op.disk_template,
8405                                    errors.ECODE_INVAL)
8406
8407     self.adopt_disks = has_adopt
8408
8409     # instance name verification
8410     if self.op.name_check:
8411       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8412       self.op.instance_name = self.hostname1.name
8413       # used in CheckPrereq for ip ping check
8414       self.check_ip = self.hostname1.ip
8415     else:
8416       self.check_ip = None
8417
8418     # file storage checks
8419     if (self.op.file_driver and
8420         not self.op.file_driver in constants.FILE_DRIVER):
8421       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8422                                  self.op.file_driver, errors.ECODE_INVAL)
8423
8424     if self.op.disk_template == constants.DT_FILE:
8425       opcodes.RequireFileStorage()
8426     elif self.op.disk_template == constants.DT_SHARED_FILE:
8427       opcodes.RequireSharedFileStorage()
8428
8429     ### Node/iallocator related checks
8430     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8431
8432     if self.op.pnode is not None:
8433       if self.op.disk_template in constants.DTS_INT_MIRROR:
8434         if self.op.snode is None:
8435           raise errors.OpPrereqError("The networked disk templates need"
8436                                      " a mirror node", errors.ECODE_INVAL)
8437       elif self.op.snode:
8438         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8439                         " template")
8440         self.op.snode = None
8441
8442     self._cds = _GetClusterDomainSecret()
8443
8444     if self.op.mode == constants.INSTANCE_IMPORT:
8445       # On import force_variant must be True, because if we forced it at
8446       # initial install, our only chance when importing it back is that it
8447       # works again!
8448       self.op.force_variant = True
8449
8450       if self.op.no_install:
8451         self.LogInfo("No-installation mode has no effect during import")
8452
8453     elif self.op.mode == constants.INSTANCE_CREATE:
8454       if self.op.os_type is None:
8455         raise errors.OpPrereqError("No guest OS specified",
8456                                    errors.ECODE_INVAL)
8457       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8458         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8459                                    " installation" % self.op.os_type,
8460                                    errors.ECODE_STATE)
8461       if self.op.disk_template is None:
8462         raise errors.OpPrereqError("No disk template specified",
8463                                    errors.ECODE_INVAL)
8464
8465     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8466       # Check handshake to ensure both clusters have the same domain secret
8467       src_handshake = self.op.source_handshake
8468       if not src_handshake:
8469         raise errors.OpPrereqError("Missing source handshake",
8470                                    errors.ECODE_INVAL)
8471
8472       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8473                                                            src_handshake)
8474       if errmsg:
8475         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8476                                    errors.ECODE_INVAL)
8477
8478       # Load and check source CA
8479       self.source_x509_ca_pem = self.op.source_x509_ca
8480       if not self.source_x509_ca_pem:
8481         raise errors.OpPrereqError("Missing source X509 CA",
8482                                    errors.ECODE_INVAL)
8483
8484       try:
8485         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8486                                                     self._cds)
8487       except OpenSSL.crypto.Error, err:
8488         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8489                                    (err, ), errors.ECODE_INVAL)
8490
8491       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8492       if errcode is not None:
8493         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8494                                    errors.ECODE_INVAL)
8495
8496       self.source_x509_ca = cert
8497
8498       src_instance_name = self.op.source_instance_name
8499       if not src_instance_name:
8500         raise errors.OpPrereqError("Missing source instance name",
8501                                    errors.ECODE_INVAL)
8502
8503       self.source_instance_name = \
8504           netutils.GetHostname(name=src_instance_name).name
8505
8506     else:
8507       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8508                                  self.op.mode, errors.ECODE_INVAL)
8509
8510   def ExpandNames(self):
8511     """ExpandNames for CreateInstance.
8512
8513     Figure out the right locks for instance creation.
8514
8515     """
8516     self.needed_locks = {}
8517
8518     instance_name = self.op.instance_name
8519     # this is just a preventive check, but someone might still add this
8520     # instance in the meantime, and creation will fail at lock-add time
8521     if instance_name in self.cfg.GetInstanceList():
8522       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8523                                  instance_name, errors.ECODE_EXISTS)
8524
8525     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8526
8527     if self.op.iallocator:
8528       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8529     else:
8530       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8531       nodelist = [self.op.pnode]
8532       if self.op.snode is not None:
8533         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8534         nodelist.append(self.op.snode)
8535       self.needed_locks[locking.LEVEL_NODE] = nodelist
8536
8537     # in case of import lock the source node too
8538     if self.op.mode == constants.INSTANCE_IMPORT:
8539       src_node = self.op.src_node
8540       src_path = self.op.src_path
8541
8542       if src_path is None:
8543         self.op.src_path = src_path = self.op.instance_name
8544
8545       if src_node is None:
8546         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8547         self.op.src_node = None
8548         if os.path.isabs(src_path):
8549           raise errors.OpPrereqError("Importing an instance from a path"
8550                                      " requires a source node option",
8551                                      errors.ECODE_INVAL)
8552       else:
8553         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8554         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8555           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8556         if not os.path.isabs(src_path):
8557           self.op.src_path = src_path = \
8558             utils.PathJoin(constants.EXPORT_DIR, src_path)
8559
8560   def _RunAllocator(self):
8561     """Run the allocator based on input opcode.
8562
8563     """
8564     nics = [n.ToDict() for n in self.nics]
8565     ial = IAllocator(self.cfg, self.rpc,
8566                      mode=constants.IALLOCATOR_MODE_ALLOC,
8567                      name=self.op.instance_name,
8568                      disk_template=self.op.disk_template,
8569                      tags=self.op.tags,
8570                      os=self.op.os_type,
8571                      vcpus=self.be_full[constants.BE_VCPUS],
8572                      memory=self.be_full[constants.BE_MEMORY],
8573                      disks=self.disks,
8574                      nics=nics,
8575                      hypervisor=self.op.hypervisor,
8576                      )
8577
8578     ial.Run(self.op.iallocator)
8579
8580     if not ial.success:
8581       raise errors.OpPrereqError("Can't compute nodes using"
8582                                  " iallocator '%s': %s" %
8583                                  (self.op.iallocator, ial.info),
8584                                  errors.ECODE_NORES)
8585     if len(ial.result) != ial.required_nodes:
8586       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8587                                  " of nodes (%s), required %s" %
8588                                  (self.op.iallocator, len(ial.result),
8589                                   ial.required_nodes), errors.ECODE_FAULT)
8590     self.op.pnode = ial.result[0]
8591     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8592                  self.op.instance_name, self.op.iallocator,
8593                  utils.CommaJoin(ial.result))
8594     if ial.required_nodes == 2:
8595       self.op.snode = ial.result[1]
8596
8597   def BuildHooksEnv(self):
8598     """Build hooks env.
8599
8600     This runs on master, primary and secondary nodes of the instance.
8601
8602     """
8603     env = {
8604       "ADD_MODE": self.op.mode,
8605       }
8606     if self.op.mode == constants.INSTANCE_IMPORT:
8607       env["SRC_NODE"] = self.op.src_node
8608       env["SRC_PATH"] = self.op.src_path
8609       env["SRC_IMAGES"] = self.src_images
8610
8611     env.update(_BuildInstanceHookEnv(
8612       name=self.op.instance_name,
8613       primary_node=self.op.pnode,
8614       secondary_nodes=self.secondaries,
8615       status=self.op.start,
8616       os_type=self.op.os_type,
8617       memory=self.be_full[constants.BE_MEMORY],
8618       vcpus=self.be_full[constants.BE_VCPUS],
8619       nics=_NICListToTuple(self, self.nics),
8620       disk_template=self.op.disk_template,
8621       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8622              for d in self.disks],
8623       bep=self.be_full,
8624       hvp=self.hv_full,
8625       hypervisor_name=self.op.hypervisor,
8626       tags=self.op.tags,
8627     ))
8628
8629     return env
8630
8631   def BuildHooksNodes(self):
8632     """Build hooks nodes.
8633
8634     """
8635     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8636     return nl, nl
8637
8638   def _ReadExportInfo(self):
8639     """Reads the export information from disk.
8640
8641     It will override the opcode source node and path with the actual
8642     information, if these two were not specified before.
8643
8644     @return: the export information
8645
8646     """
8647     assert self.op.mode == constants.INSTANCE_IMPORT
8648
8649     src_node = self.op.src_node
8650     src_path = self.op.src_path
8651
8652     if src_node is None:
8653       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8654       exp_list = self.rpc.call_export_list(locked_nodes)
8655       found = False
8656       for node in exp_list:
8657         if exp_list[node].fail_msg:
8658           continue
8659         if src_path in exp_list[node].payload:
8660           found = True
8661           self.op.src_node = src_node = node
8662           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8663                                                        src_path)
8664           break
8665       if not found:
8666         raise errors.OpPrereqError("No export found for relative path %s" %
8667                                     src_path, errors.ECODE_INVAL)
8668
8669     _CheckNodeOnline(self, src_node)
8670     result = self.rpc.call_export_info(src_node, src_path)
8671     result.Raise("No export or invalid export found in dir %s" % src_path)
8672
8673     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8674     if not export_info.has_section(constants.INISECT_EXP):
8675       raise errors.ProgrammerError("Corrupted export config",
8676                                    errors.ECODE_ENVIRON)
8677
8678     ei_version = export_info.get(constants.INISECT_EXP, "version")
8679     if (int(ei_version) != constants.EXPORT_VERSION):
8680       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8681                                  (ei_version, constants.EXPORT_VERSION),
8682                                  errors.ECODE_ENVIRON)
8683     return export_info
8684
8685   def _ReadExportParams(self, einfo):
8686     """Use export parameters as defaults.
8687
8688     In case the opcode doesn't specify (as in override) some instance
8689     parameters, then try to use them from the export information, if
8690     that declares them.
8691
8692     """
8693     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8694
8695     if self.op.disk_template is None:
8696       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8697         self.op.disk_template = einfo.get(constants.INISECT_INS,
8698                                           "disk_template")
8699         if self.op.disk_template not in constants.DISK_TEMPLATES:
8700           raise errors.OpPrereqError("Disk template specified in configuration"
8701                                      " file is not one of the allowed values:"
8702                                      " %s" % " ".join(constants.DISK_TEMPLATES))
8703       else:
8704         raise errors.OpPrereqError("No disk template specified and the export"
8705                                    " is missing the disk_template information",
8706                                    errors.ECODE_INVAL)
8707
8708     if not self.op.disks:
8709       disks = []
8710       # TODO: import the disk iv_name too
8711       for idx in range(constants.MAX_DISKS):
8712         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8713           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8714           disks.append({constants.IDISK_SIZE: disk_sz})
8715       self.op.disks = disks
8716       if not disks and self.op.disk_template != constants.DT_DISKLESS:
8717         raise errors.OpPrereqError("No disk info specified and the export"
8718                                    " is missing the disk information",
8719                                    errors.ECODE_INVAL)
8720
8721     if not self.op.nics:
8722       nics = []
8723       for idx in range(constants.MAX_NICS):
8724         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8725           ndict = {}
8726           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8727             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8728             ndict[name] = v
8729           nics.append(ndict)
8730         else:
8731           break
8732       self.op.nics = nics
8733
8734     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8735       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8736
8737     if (self.op.hypervisor is None and
8738         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8739       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8740
8741     if einfo.has_section(constants.INISECT_HYP):
8742       # use the export parameters but do not override the ones
8743       # specified by the user
8744       for name, value in einfo.items(constants.INISECT_HYP):
8745         if name not in self.op.hvparams:
8746           self.op.hvparams[name] = value
8747
8748     if einfo.has_section(constants.INISECT_BEP):
8749       # use the parameters, without overriding
8750       for name, value in einfo.items(constants.INISECT_BEP):
8751         if name not in self.op.beparams:
8752           self.op.beparams[name] = value
8753     else:
8754       # try to read the parameters old style, from the main section
8755       for name in constants.BES_PARAMETERS:
8756         if (name not in self.op.beparams and
8757             einfo.has_option(constants.INISECT_INS, name)):
8758           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8759
8760     if einfo.has_section(constants.INISECT_OSP):
8761       # use the parameters, without overriding
8762       for name, value in einfo.items(constants.INISECT_OSP):
8763         if name not in self.op.osparams:
8764           self.op.osparams[name] = value
8765
8766   def _RevertToDefaults(self, cluster):
8767     """Revert the instance parameters to the default values.
8768
8769     """
8770     # hvparams
8771     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8772     for name in self.op.hvparams.keys():
8773       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8774         del self.op.hvparams[name]
8775     # beparams
8776     be_defs = cluster.SimpleFillBE({})
8777     for name in self.op.beparams.keys():
8778       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8779         del self.op.beparams[name]
8780     # nic params
8781     nic_defs = cluster.SimpleFillNIC({})
8782     for nic in self.op.nics:
8783       for name in constants.NICS_PARAMETERS:
8784         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8785           del nic[name]
8786     # osparams
8787     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8788     for name in self.op.osparams.keys():
8789       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8790         del self.op.osparams[name]
8791
8792   def _CalculateFileStorageDir(self):
8793     """Calculate final instance file storage dir.
8794
8795     """
8796     # file storage dir calculation/check
8797     self.instance_file_storage_dir = None
8798     if self.op.disk_template in constants.DTS_FILEBASED:
8799       # build the full file storage dir path
8800       joinargs = []
8801
8802       if self.op.disk_template == constants.DT_SHARED_FILE:
8803         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8804       else:
8805         get_fsd_fn = self.cfg.GetFileStorageDir
8806
8807       cfg_storagedir = get_fsd_fn()
8808       if not cfg_storagedir:
8809         raise errors.OpPrereqError("Cluster file storage dir not defined")
8810       joinargs.append(cfg_storagedir)
8811
8812       if self.op.file_storage_dir is not None:
8813         joinargs.append(self.op.file_storage_dir)
8814
8815       joinargs.append(self.op.instance_name)
8816
8817       # pylint: disable=W0142
8818       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8819
8820   def CheckPrereq(self):
8821     """Check prerequisites.
8822
8823     """
8824     self._CalculateFileStorageDir()
8825
8826     if self.op.mode == constants.INSTANCE_IMPORT:
8827       export_info = self._ReadExportInfo()
8828       self._ReadExportParams(export_info)
8829
8830     if (not self.cfg.GetVGName() and
8831         self.op.disk_template not in constants.DTS_NOT_LVM):
8832       raise errors.OpPrereqError("Cluster does not support lvm-based"
8833                                  " instances", errors.ECODE_STATE)
8834
8835     if (self.op.hypervisor is None or
8836         self.op.hypervisor == constants.VALUE_AUTO):
8837       self.op.hypervisor = self.cfg.GetHypervisorType()
8838
8839     cluster = self.cfg.GetClusterInfo()
8840     enabled_hvs = cluster.enabled_hypervisors
8841     if self.op.hypervisor not in enabled_hvs:
8842       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8843                                  " cluster (%s)" % (self.op.hypervisor,
8844                                   ",".join(enabled_hvs)),
8845                                  errors.ECODE_STATE)
8846
8847     # Check tag validity
8848     for tag in self.op.tags:
8849       objects.TaggableObject.ValidateTag(tag)
8850
8851     # check hypervisor parameter syntax (locally)
8852     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8853     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8854                                       self.op.hvparams)
8855     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8856     hv_type.CheckParameterSyntax(filled_hvp)
8857     self.hv_full = filled_hvp
8858     # check that we don't specify global parameters on an instance
8859     _CheckGlobalHvParams(self.op.hvparams)
8860
8861     # fill and remember the beparams dict
8862     default_beparams = cluster.beparams[constants.PP_DEFAULT]
8863     for param, value in self.op.beparams.iteritems():
8864       if value == constants.VALUE_AUTO:
8865         self.op.beparams[param] = default_beparams[param]
8866     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8867     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8868
8869     # build os parameters
8870     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8871
8872     # now that hvp/bep are in final format, let's reset to defaults,
8873     # if told to do so
8874     if self.op.identify_defaults:
8875       self._RevertToDefaults(cluster)
8876
8877     # NIC buildup
8878     self.nics = []
8879     for idx, nic in enumerate(self.op.nics):
8880       nic_mode_req = nic.get(constants.INIC_MODE, None)
8881       nic_mode = nic_mode_req
8882       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8883         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8884
8885       # in routed mode, for the first nic, the default ip is 'auto'
8886       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8887         default_ip_mode = constants.VALUE_AUTO
8888       else:
8889         default_ip_mode = constants.VALUE_NONE
8890
8891       # ip validity checks
8892       ip = nic.get(constants.INIC_IP, default_ip_mode)
8893       if ip is None or ip.lower() == constants.VALUE_NONE:
8894         nic_ip = None
8895       elif ip.lower() == constants.VALUE_AUTO:
8896         if not self.op.name_check:
8897           raise errors.OpPrereqError("IP address set to auto but name checks"
8898                                      " have been skipped",
8899                                      errors.ECODE_INVAL)
8900         nic_ip = self.hostname1.ip
8901       else:
8902         if not netutils.IPAddress.IsValid(ip):
8903           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8904                                      errors.ECODE_INVAL)
8905         nic_ip = ip
8906
8907       # TODO: check the ip address for uniqueness
8908       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8909         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8910                                    errors.ECODE_INVAL)
8911
8912       # MAC address verification
8913       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8914       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8915         mac = utils.NormalizeAndValidateMac(mac)
8916
8917         try:
8918           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8919         except errors.ReservationError:
8920           raise errors.OpPrereqError("MAC address %s already in use"
8921                                      " in cluster" % mac,
8922                                      errors.ECODE_NOTUNIQUE)
8923
8924       #  Build nic parameters
8925       link = nic.get(constants.INIC_LINK, None)
8926       if link == constants.VALUE_AUTO:
8927         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8928       nicparams = {}
8929       if nic_mode_req:
8930         nicparams[constants.NIC_MODE] = nic_mode
8931       if link:
8932         nicparams[constants.NIC_LINK] = link
8933
8934       check_params = cluster.SimpleFillNIC(nicparams)
8935       objects.NIC.CheckParameterSyntax(check_params)
8936       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8937
8938     # disk checks/pre-build
8939     default_vg = self.cfg.GetVGName()
8940     self.disks = []
8941     for disk in self.op.disks:
8942       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8943       if mode not in constants.DISK_ACCESS_SET:
8944         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8945                                    mode, errors.ECODE_INVAL)
8946       size = disk.get(constants.IDISK_SIZE, None)
8947       if size is None:
8948         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8949       try:
8950         size = int(size)
8951       except (TypeError, ValueError):
8952         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8953                                    errors.ECODE_INVAL)
8954
8955       data_vg = disk.get(constants.IDISK_VG, default_vg)
8956       new_disk = {
8957         constants.IDISK_SIZE: size,
8958         constants.IDISK_MODE: mode,
8959         constants.IDISK_VG: data_vg,
8960         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8961         }
8962       if constants.IDISK_ADOPT in disk:
8963         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8964       self.disks.append(new_disk)
8965
8966     if self.op.mode == constants.INSTANCE_IMPORT:
8967       disk_images = []
8968       for idx in range(len(self.disks)):
8969         option = "disk%d_dump" % idx
8970         if export_info.has_option(constants.INISECT_INS, option):
8971           # FIXME: are the old os-es, disk sizes, etc. useful?
8972           export_name = export_info.get(constants.INISECT_INS, option)
8973           image = utils.PathJoin(self.op.src_path, export_name)
8974           disk_images.append(image)
8975         else:
8976           disk_images.append(False)
8977
8978       self.src_images = disk_images
8979
8980       old_name = export_info.get(constants.INISECT_INS, "name")
8981       if self.op.instance_name == old_name:
8982         for idx, nic in enumerate(self.nics):
8983           if nic.mac == constants.VALUE_AUTO:
8984             nic_mac_ini = "nic%d_mac" % idx
8985             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8986
8987     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8988
8989     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8990     if self.op.ip_check:
8991       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8992         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8993                                    (self.check_ip, self.op.instance_name),
8994                                    errors.ECODE_NOTUNIQUE)
8995
8996     #### mac address generation
8997     # By generating here the mac address both the allocator and the hooks get
8998     # the real final mac address rather than the 'auto' or 'generate' value.
8999     # There is a race condition between the generation and the instance object
9000     # creation, which means that we know the mac is valid now, but we're not
9001     # sure it will be when we actually add the instance. If things go bad
9002     # adding the instance will abort because of a duplicate mac, and the
9003     # creation job will fail.
9004     for nic in self.nics:
9005       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9006         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9007
9008     #### allocator run
9009
9010     if self.op.iallocator is not None:
9011       self._RunAllocator()
9012
9013     #### node related checks
9014
9015     # check primary node
9016     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9017     assert self.pnode is not None, \
9018       "Cannot retrieve locked node %s" % self.op.pnode
9019     if pnode.offline:
9020       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9021                                  pnode.name, errors.ECODE_STATE)
9022     if pnode.drained:
9023       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9024                                  pnode.name, errors.ECODE_STATE)
9025     if not pnode.vm_capable:
9026       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9027                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9028
9029     self.secondaries = []
9030
9031     # mirror node verification
9032     if self.op.disk_template in constants.DTS_INT_MIRROR:
9033       if self.op.snode == pnode.name:
9034         raise errors.OpPrereqError("The secondary node cannot be the"
9035                                    " primary node", errors.ECODE_INVAL)
9036       _CheckNodeOnline(self, self.op.snode)
9037       _CheckNodeNotDrained(self, self.op.snode)
9038       _CheckNodeVmCapable(self, self.op.snode)
9039       self.secondaries.append(self.op.snode)
9040
9041     nodenames = [pnode.name] + self.secondaries
9042
9043     if not self.adopt_disks:
9044       # Check lv size requirements, if not adopting
9045       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9046       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9047
9048     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9049       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9050                                 disk[constants.IDISK_ADOPT])
9051                      for disk in self.disks])
9052       if len(all_lvs) != len(self.disks):
9053         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9054                                    errors.ECODE_INVAL)
9055       for lv_name in all_lvs:
9056         try:
9057           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9058           # to ReserveLV uses the same syntax
9059           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9060         except errors.ReservationError:
9061           raise errors.OpPrereqError("LV named %s used by another instance" %
9062                                      lv_name, errors.ECODE_NOTUNIQUE)
9063
9064       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9065       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9066
9067       node_lvs = self.rpc.call_lv_list([pnode.name],
9068                                        vg_names.payload.keys())[pnode.name]
9069       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9070       node_lvs = node_lvs.payload
9071
9072       delta = all_lvs.difference(node_lvs.keys())
9073       if delta:
9074         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9075                                    utils.CommaJoin(delta),
9076                                    errors.ECODE_INVAL)
9077       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9078       if online_lvs:
9079         raise errors.OpPrereqError("Online logical volumes found, cannot"
9080                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9081                                    errors.ECODE_STATE)
9082       # update the size of disk based on what is found
9083       for dsk in self.disks:
9084         dsk[constants.IDISK_SIZE] = \
9085           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9086                                         dsk[constants.IDISK_ADOPT])][0]))
9087
9088     elif self.op.disk_template == constants.DT_BLOCK:
9089       # Normalize and de-duplicate device paths
9090       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9091                        for disk in self.disks])
9092       if len(all_disks) != len(self.disks):
9093         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9094                                    errors.ECODE_INVAL)
9095       baddisks = [d for d in all_disks
9096                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9097       if baddisks:
9098         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9099                                    " cannot be adopted" %
9100                                    (", ".join(baddisks),
9101                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9102                                    errors.ECODE_INVAL)
9103
9104       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9105                                             list(all_disks))[pnode.name]
9106       node_disks.Raise("Cannot get block device information from node %s" %
9107                        pnode.name)
9108       node_disks = node_disks.payload
9109       delta = all_disks.difference(node_disks.keys())
9110       if delta:
9111         raise errors.OpPrereqError("Missing block device(s): %s" %
9112                                    utils.CommaJoin(delta),
9113                                    errors.ECODE_INVAL)
9114       for dsk in self.disks:
9115         dsk[constants.IDISK_SIZE] = \
9116           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9117
9118     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9119
9120     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9121     # check OS parameters (remotely)
9122     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9123
9124     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9125
9126     # memory check on primary node
9127     if self.op.start:
9128       _CheckNodeFreeMemory(self, self.pnode.name,
9129                            "creating instance %s" % self.op.instance_name,
9130                            self.be_full[constants.BE_MEMORY],
9131                            self.op.hypervisor)
9132
9133     self.dry_run_result = list(nodenames)
9134
9135   def Exec(self, feedback_fn):
9136     """Create and add the instance to the cluster.
9137
9138     """
9139     instance = self.op.instance_name
9140     pnode_name = self.pnode.name
9141
9142     ht_kind = self.op.hypervisor
9143     if ht_kind in constants.HTS_REQ_PORT:
9144       network_port = self.cfg.AllocatePort()
9145     else:
9146       network_port = None
9147
9148     disks = _GenerateDiskTemplate(self,
9149                                   self.op.disk_template,
9150                                   instance, pnode_name,
9151                                   self.secondaries,
9152                                   self.disks,
9153                                   self.instance_file_storage_dir,
9154                                   self.op.file_driver,
9155                                   0,
9156                                   feedback_fn)
9157
9158     iobj = objects.Instance(name=instance, os=self.op.os_type,
9159                             primary_node=pnode_name,
9160                             nics=self.nics, disks=disks,
9161                             disk_template=self.op.disk_template,
9162                             admin_up=False,
9163                             network_port=network_port,
9164                             beparams=self.op.beparams,
9165                             hvparams=self.op.hvparams,
9166                             hypervisor=self.op.hypervisor,
9167                             osparams=self.op.osparams,
9168                             )
9169
9170     if self.op.tags:
9171       for tag in self.op.tags:
9172         iobj.AddTag(tag)
9173
9174     if self.adopt_disks:
9175       if self.op.disk_template == constants.DT_PLAIN:
9176         # rename LVs to the newly-generated names; we need to construct
9177         # 'fake' LV disks with the old data, plus the new unique_id
9178         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9179         rename_to = []
9180         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9181           rename_to.append(t_dsk.logical_id)
9182           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9183           self.cfg.SetDiskID(t_dsk, pnode_name)
9184         result = self.rpc.call_blockdev_rename(pnode_name,
9185                                                zip(tmp_disks, rename_to))
9186         result.Raise("Failed to rename adoped LVs")
9187     else:
9188       feedback_fn("* creating instance disks...")
9189       try:
9190         _CreateDisks(self, iobj)
9191       except errors.OpExecError:
9192         self.LogWarning("Device creation failed, reverting...")
9193         try:
9194           _RemoveDisks(self, iobj)
9195         finally:
9196           self.cfg.ReleaseDRBDMinors(instance)
9197           raise
9198
9199     feedback_fn("adding instance %s to cluster config" % instance)
9200
9201     self.cfg.AddInstance(iobj, self.proc.GetECId())
9202
9203     # Declare that we don't want to remove the instance lock anymore, as we've
9204     # added the instance to the config
9205     del self.remove_locks[locking.LEVEL_INSTANCE]
9206
9207     if self.op.mode == constants.INSTANCE_IMPORT:
9208       # Release unused nodes
9209       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9210     else:
9211       # Release all nodes
9212       _ReleaseLocks(self, locking.LEVEL_NODE)
9213
9214     disk_abort = False
9215     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9216       feedback_fn("* wiping instance disks...")
9217       try:
9218         _WipeDisks(self, iobj)
9219       except errors.OpExecError, err:
9220         logging.exception("Wiping disks failed")
9221         self.LogWarning("Wiping instance disks failed (%s)", err)
9222         disk_abort = True
9223
9224     if disk_abort:
9225       # Something is already wrong with the disks, don't do anything else
9226       pass
9227     elif self.op.wait_for_sync:
9228       disk_abort = not _WaitForSync(self, iobj)
9229     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9230       # make sure the disks are not degraded (still sync-ing is ok)
9231       feedback_fn("* checking mirrors status")
9232       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9233     else:
9234       disk_abort = False
9235
9236     if disk_abort:
9237       _RemoveDisks(self, iobj)
9238       self.cfg.RemoveInstance(iobj.name)
9239       # Make sure the instance lock gets removed
9240       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9241       raise errors.OpExecError("There are some degraded disks for"
9242                                " this instance")
9243
9244     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9245       if self.op.mode == constants.INSTANCE_CREATE:
9246         if not self.op.no_install:
9247           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9248                         not self.op.wait_for_sync)
9249           if pause_sync:
9250             feedback_fn("* pausing disk sync to install instance OS")
9251             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9252                                                               iobj.disks, True)
9253             for idx, success in enumerate(result.payload):
9254               if not success:
9255                 logging.warn("pause-sync of instance %s for disk %d failed",
9256                              instance, idx)
9257
9258           feedback_fn("* running the instance OS create scripts...")
9259           # FIXME: pass debug option from opcode to backend
9260           os_add_result = \
9261             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9262                                           self.op.debug_level)
9263           if pause_sync:
9264             feedback_fn("* resuming disk sync")
9265             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9266                                                               iobj.disks, False)
9267             for idx, success in enumerate(result.payload):
9268               if not success:
9269                 logging.warn("resume-sync of instance %s for disk %d failed",
9270                              instance, idx)
9271
9272           os_add_result.Raise("Could not add os for instance %s"
9273                               " on node %s" % (instance, pnode_name))
9274
9275       elif self.op.mode == constants.INSTANCE_IMPORT:
9276         feedback_fn("* running the instance OS import scripts...")
9277
9278         transfers = []
9279
9280         for idx, image in enumerate(self.src_images):
9281           if not image:
9282             continue
9283
9284           # FIXME: pass debug option from opcode to backend
9285           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9286                                              constants.IEIO_FILE, (image, ),
9287                                              constants.IEIO_SCRIPT,
9288                                              (iobj.disks[idx], idx),
9289                                              None)
9290           transfers.append(dt)
9291
9292         import_result = \
9293           masterd.instance.TransferInstanceData(self, feedback_fn,
9294                                                 self.op.src_node, pnode_name,
9295                                                 self.pnode.secondary_ip,
9296                                                 iobj, transfers)
9297         if not compat.all(import_result):
9298           self.LogWarning("Some disks for instance %s on node %s were not"
9299                           " imported successfully" % (instance, pnode_name))
9300
9301       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9302         feedback_fn("* preparing remote import...")
9303         # The source cluster will stop the instance before attempting to make a
9304         # connection. In some cases stopping an instance can take a long time,
9305         # hence the shutdown timeout is added to the connection timeout.
9306         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9307                            self.op.source_shutdown_timeout)
9308         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9309
9310         assert iobj.primary_node == self.pnode.name
9311         disk_results = \
9312           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9313                                         self.source_x509_ca,
9314                                         self._cds, timeouts)
9315         if not compat.all(disk_results):
9316           # TODO: Should the instance still be started, even if some disks
9317           # failed to import (valid for local imports, too)?
9318           self.LogWarning("Some disks for instance %s on node %s were not"
9319                           " imported successfully" % (instance, pnode_name))
9320
9321         # Run rename script on newly imported instance
9322         assert iobj.name == instance
9323         feedback_fn("Running rename script for %s" % instance)
9324         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9325                                                    self.source_instance_name,
9326                                                    self.op.debug_level)
9327         if result.fail_msg:
9328           self.LogWarning("Failed to run rename script for %s on node"
9329                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9330
9331       else:
9332         # also checked in the prereq part
9333         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9334                                      % self.op.mode)
9335
9336     if self.op.start:
9337       iobj.admin_up = True
9338       self.cfg.Update(iobj, feedback_fn)
9339       logging.info("Starting instance %s on node %s", instance, pnode_name)
9340       feedback_fn("* starting instance...")
9341       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9342                                             False)
9343       result.Raise("Could not start instance")
9344
9345     return list(iobj.all_nodes)
9346
9347
9348 class LUInstanceConsole(NoHooksLU):
9349   """Connect to an instance's console.
9350
9351   This is somewhat special in that it returns the command line that
9352   you need to run on the master node in order to connect to the
9353   console.
9354
9355   """
9356   REQ_BGL = False
9357
9358   def ExpandNames(self):
9359     self.share_locks = _ShareAll()
9360     self._ExpandAndLockInstance()
9361
9362   def CheckPrereq(self):
9363     """Check prerequisites.
9364
9365     This checks that the instance is in the cluster.
9366
9367     """
9368     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9369     assert self.instance is not None, \
9370       "Cannot retrieve locked instance %s" % self.op.instance_name
9371     _CheckNodeOnline(self, self.instance.primary_node)
9372
9373   def Exec(self, feedback_fn):
9374     """Connect to the console of an instance
9375
9376     """
9377     instance = self.instance
9378     node = instance.primary_node
9379
9380     node_insts = self.rpc.call_instance_list([node],
9381                                              [instance.hypervisor])[node]
9382     node_insts.Raise("Can't get node information from %s" % node)
9383
9384     if instance.name not in node_insts.payload:
9385       if instance.admin_up:
9386         state = constants.INSTST_ERRORDOWN
9387       else:
9388         state = constants.INSTST_ADMINDOWN
9389       raise errors.OpExecError("Instance %s is not running (state %s)" %
9390                                (instance.name, state))
9391
9392     logging.debug("Connecting to console of %s on %s", instance.name, node)
9393
9394     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9395
9396
9397 def _GetInstanceConsole(cluster, instance):
9398   """Returns console information for an instance.
9399
9400   @type cluster: L{objects.Cluster}
9401   @type instance: L{objects.Instance}
9402   @rtype: dict
9403
9404   """
9405   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9406   # beparams and hvparams are passed separately, to avoid editing the
9407   # instance and then saving the defaults in the instance itself.
9408   hvparams = cluster.FillHV(instance)
9409   beparams = cluster.FillBE(instance)
9410   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9411
9412   assert console.instance == instance.name
9413   assert console.Validate()
9414
9415   return console.ToDict()
9416
9417
9418 class LUInstanceReplaceDisks(LogicalUnit):
9419   """Replace the disks of an instance.
9420
9421   """
9422   HPATH = "mirrors-replace"
9423   HTYPE = constants.HTYPE_INSTANCE
9424   REQ_BGL = False
9425
9426   def CheckArguments(self):
9427     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9428                                   self.op.iallocator)
9429
9430   def ExpandNames(self):
9431     self._ExpandAndLockInstance()
9432
9433     assert locking.LEVEL_NODE not in self.needed_locks
9434     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9435
9436     assert self.op.iallocator is None or self.op.remote_node is None, \
9437       "Conflicting options"
9438
9439     if self.op.remote_node is not None:
9440       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9441
9442       # Warning: do not remove the locking of the new secondary here
9443       # unless DRBD8.AddChildren is changed to work in parallel;
9444       # currently it doesn't since parallel invocations of
9445       # FindUnusedMinor will conflict
9446       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9447       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9448     else:
9449       self.needed_locks[locking.LEVEL_NODE] = []
9450       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9451
9452       if self.op.iallocator is not None:
9453         # iallocator will select a new node in the same group
9454         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9455
9456     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9457                                    self.op.iallocator, self.op.remote_node,
9458                                    self.op.disks, False, self.op.early_release)
9459
9460     self.tasklets = [self.replacer]
9461
9462   def DeclareLocks(self, level):
9463     if level == locking.LEVEL_NODEGROUP:
9464       assert self.op.remote_node is None
9465       assert self.op.iallocator is not None
9466       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9467
9468       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9469       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9470         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9471
9472     elif level == locking.LEVEL_NODE:
9473       if self.op.iallocator is not None:
9474         assert self.op.remote_node is None
9475         assert not self.needed_locks[locking.LEVEL_NODE]
9476
9477         # Lock member nodes of all locked groups
9478         self.needed_locks[locking.LEVEL_NODE] = [node_name
9479           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9480           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9481       else:
9482         self._LockInstancesNodes()
9483
9484   def BuildHooksEnv(self):
9485     """Build hooks env.
9486
9487     This runs on the master, the primary and all the secondaries.
9488
9489     """
9490     instance = self.replacer.instance
9491     env = {
9492       "MODE": self.op.mode,
9493       "NEW_SECONDARY": self.op.remote_node,
9494       "OLD_SECONDARY": instance.secondary_nodes[0],
9495       }
9496     env.update(_BuildInstanceHookEnvByObject(self, instance))
9497     return env
9498
9499   def BuildHooksNodes(self):
9500     """Build hooks nodes.
9501
9502     """
9503     instance = self.replacer.instance
9504     nl = [
9505       self.cfg.GetMasterNode(),
9506       instance.primary_node,
9507       ]
9508     if self.op.remote_node is not None:
9509       nl.append(self.op.remote_node)
9510     return nl, nl
9511
9512   def CheckPrereq(self):
9513     """Check prerequisites.
9514
9515     """
9516     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9517             self.op.iallocator is None)
9518
9519     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9520     if owned_groups:
9521       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9522
9523     return LogicalUnit.CheckPrereq(self)
9524
9525
9526 class TLReplaceDisks(Tasklet):
9527   """Replaces disks for an instance.
9528
9529   Note: Locking is not within the scope of this class.
9530
9531   """
9532   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9533                disks, delay_iallocator, early_release):
9534     """Initializes this class.
9535
9536     """
9537     Tasklet.__init__(self, lu)
9538
9539     # Parameters
9540     self.instance_name = instance_name
9541     self.mode = mode
9542     self.iallocator_name = iallocator_name
9543     self.remote_node = remote_node
9544     self.disks = disks
9545     self.delay_iallocator = delay_iallocator
9546     self.early_release = early_release
9547
9548     # Runtime data
9549     self.instance = None
9550     self.new_node = None
9551     self.target_node = None
9552     self.other_node = None
9553     self.remote_node_info = None
9554     self.node_secondary_ip = None
9555
9556   @staticmethod
9557   def CheckArguments(mode, remote_node, iallocator):
9558     """Helper function for users of this class.
9559
9560     """
9561     # check for valid parameter combination
9562     if mode == constants.REPLACE_DISK_CHG:
9563       if remote_node is None and iallocator is None:
9564         raise errors.OpPrereqError("When changing the secondary either an"
9565                                    " iallocator script must be used or the"
9566                                    " new node given", errors.ECODE_INVAL)
9567
9568       if remote_node is not None and iallocator is not None:
9569         raise errors.OpPrereqError("Give either the iallocator or the new"
9570                                    " secondary, not both", errors.ECODE_INVAL)
9571
9572     elif remote_node is not None or iallocator is not None:
9573       # Not replacing the secondary
9574       raise errors.OpPrereqError("The iallocator and new node options can"
9575                                  " only be used when changing the"
9576                                  " secondary node", errors.ECODE_INVAL)
9577
9578   @staticmethod
9579   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9580     """Compute a new secondary node using an IAllocator.
9581
9582     """
9583     ial = IAllocator(lu.cfg, lu.rpc,
9584                      mode=constants.IALLOCATOR_MODE_RELOC,
9585                      name=instance_name,
9586                      relocate_from=list(relocate_from))
9587
9588     ial.Run(iallocator_name)
9589
9590     if not ial.success:
9591       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9592                                  " %s" % (iallocator_name, ial.info),
9593                                  errors.ECODE_NORES)
9594
9595     if len(ial.result) != ial.required_nodes:
9596       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9597                                  " of nodes (%s), required %s" %
9598                                  (iallocator_name,
9599                                   len(ial.result), ial.required_nodes),
9600                                  errors.ECODE_FAULT)
9601
9602     remote_node_name = ial.result[0]
9603
9604     lu.LogInfo("Selected new secondary for instance '%s': %s",
9605                instance_name, remote_node_name)
9606
9607     return remote_node_name
9608
9609   def _FindFaultyDisks(self, node_name):
9610     """Wrapper for L{_FindFaultyInstanceDisks}.
9611
9612     """
9613     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9614                                     node_name, True)
9615
9616   def _CheckDisksActivated(self, instance):
9617     """Checks if the instance disks are activated.
9618
9619     @param instance: The instance to check disks
9620     @return: True if they are activated, False otherwise
9621
9622     """
9623     nodes = instance.all_nodes
9624
9625     for idx, dev in enumerate(instance.disks):
9626       for node in nodes:
9627         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9628         self.cfg.SetDiskID(dev, node)
9629
9630         result = self.rpc.call_blockdev_find(node, dev)
9631
9632         if result.offline:
9633           continue
9634         elif result.fail_msg or not result.payload:
9635           return False
9636
9637     return True
9638
9639   def CheckPrereq(self):
9640     """Check prerequisites.
9641
9642     This checks that the instance is in the cluster.
9643
9644     """
9645     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9646     assert instance is not None, \
9647       "Cannot retrieve locked instance %s" % self.instance_name
9648
9649     if instance.disk_template != constants.DT_DRBD8:
9650       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9651                                  " instances", errors.ECODE_INVAL)
9652
9653     if len(instance.secondary_nodes) != 1:
9654       raise errors.OpPrereqError("The instance has a strange layout,"
9655                                  " expected one secondary but found %d" %
9656                                  len(instance.secondary_nodes),
9657                                  errors.ECODE_FAULT)
9658
9659     if not self.delay_iallocator:
9660       self._CheckPrereq2()
9661
9662   def _CheckPrereq2(self):
9663     """Check prerequisites, second part.
9664
9665     This function should always be part of CheckPrereq. It was separated and is
9666     now called from Exec because during node evacuation iallocator was only
9667     called with an unmodified cluster model, not taking planned changes into
9668     account.
9669
9670     """
9671     instance = self.instance
9672     secondary_node = instance.secondary_nodes[0]
9673
9674     if self.iallocator_name is None:
9675       remote_node = self.remote_node
9676     else:
9677       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9678                                        instance.name, instance.secondary_nodes)
9679
9680     if remote_node is None:
9681       self.remote_node_info = None
9682     else:
9683       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9684              "Remote node '%s' is not locked" % remote_node
9685
9686       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9687       assert self.remote_node_info is not None, \
9688         "Cannot retrieve locked node %s" % remote_node
9689
9690     if remote_node == self.instance.primary_node:
9691       raise errors.OpPrereqError("The specified node is the primary node of"
9692                                  " the instance", errors.ECODE_INVAL)
9693
9694     if remote_node == secondary_node:
9695       raise errors.OpPrereqError("The specified node is already the"
9696                                  " secondary node of the instance",
9697                                  errors.ECODE_INVAL)
9698
9699     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9700                                     constants.REPLACE_DISK_CHG):
9701       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9702                                  errors.ECODE_INVAL)
9703
9704     if self.mode == constants.REPLACE_DISK_AUTO:
9705       if not self._CheckDisksActivated(instance):
9706         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9707                                    " first" % self.instance_name,
9708                                    errors.ECODE_STATE)
9709       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9710       faulty_secondary = self._FindFaultyDisks(secondary_node)
9711
9712       if faulty_primary and faulty_secondary:
9713         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9714                                    " one node and can not be repaired"
9715                                    " automatically" % self.instance_name,
9716                                    errors.ECODE_STATE)
9717
9718       if faulty_primary:
9719         self.disks = faulty_primary
9720         self.target_node = instance.primary_node
9721         self.other_node = secondary_node
9722         check_nodes = [self.target_node, self.other_node]
9723       elif faulty_secondary:
9724         self.disks = faulty_secondary
9725         self.target_node = secondary_node
9726         self.other_node = instance.primary_node
9727         check_nodes = [self.target_node, self.other_node]
9728       else:
9729         self.disks = []
9730         check_nodes = []
9731
9732     else:
9733       # Non-automatic modes
9734       if self.mode == constants.REPLACE_DISK_PRI:
9735         self.target_node = instance.primary_node
9736         self.other_node = secondary_node
9737         check_nodes = [self.target_node, self.other_node]
9738
9739       elif self.mode == constants.REPLACE_DISK_SEC:
9740         self.target_node = secondary_node
9741         self.other_node = instance.primary_node
9742         check_nodes = [self.target_node, self.other_node]
9743
9744       elif self.mode == constants.REPLACE_DISK_CHG:
9745         self.new_node = remote_node
9746         self.other_node = instance.primary_node
9747         self.target_node = secondary_node
9748         check_nodes = [self.new_node, self.other_node]
9749
9750         _CheckNodeNotDrained(self.lu, remote_node)
9751         _CheckNodeVmCapable(self.lu, remote_node)
9752
9753         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9754         assert old_node_info is not None
9755         if old_node_info.offline and not self.early_release:
9756           # doesn't make sense to delay the release
9757           self.early_release = True
9758           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9759                           " early-release mode", secondary_node)
9760
9761       else:
9762         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9763                                      self.mode)
9764
9765       # If not specified all disks should be replaced
9766       if not self.disks:
9767         self.disks = range(len(self.instance.disks))
9768
9769     for node in check_nodes:
9770       _CheckNodeOnline(self.lu, node)
9771
9772     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9773                                                           self.other_node,
9774                                                           self.target_node]
9775                               if node_name is not None)
9776
9777     # Release unneeded node locks
9778     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9779
9780     # Release any owned node group
9781     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9782       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9783
9784     # Check whether disks are valid
9785     for disk_idx in self.disks:
9786       instance.FindDisk(disk_idx)
9787
9788     # Get secondary node IP addresses
9789     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9790                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9791
9792   def Exec(self, feedback_fn):
9793     """Execute disk replacement.
9794
9795     This dispatches the disk replacement to the appropriate handler.
9796
9797     """
9798     if self.delay_iallocator:
9799       self._CheckPrereq2()
9800
9801     if __debug__:
9802       # Verify owned locks before starting operation
9803       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9804       assert set(owned_nodes) == set(self.node_secondary_ip), \
9805           ("Incorrect node locks, owning %s, expected %s" %
9806            (owned_nodes, self.node_secondary_ip.keys()))
9807
9808       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9809       assert list(owned_instances) == [self.instance_name], \
9810           "Instance '%s' not locked" % self.instance_name
9811
9812       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9813           "Should not own any node group lock at this point"
9814
9815     if not self.disks:
9816       feedback_fn("No disks need replacement")
9817       return
9818
9819     feedback_fn("Replacing disk(s) %s for %s" %
9820                 (utils.CommaJoin(self.disks), self.instance.name))
9821
9822     activate_disks = (not self.instance.admin_up)
9823
9824     # Activate the instance disks if we're replacing them on a down instance
9825     if activate_disks:
9826       _StartInstanceDisks(self.lu, self.instance, True)
9827
9828     try:
9829       # Should we replace the secondary node?
9830       if self.new_node is not None:
9831         fn = self._ExecDrbd8Secondary
9832       else:
9833         fn = self._ExecDrbd8DiskOnly
9834
9835       result = fn(feedback_fn)
9836     finally:
9837       # Deactivate the instance disks if we're replacing them on a
9838       # down instance
9839       if activate_disks:
9840         _SafeShutdownInstanceDisks(self.lu, self.instance)
9841
9842     if __debug__:
9843       # Verify owned locks
9844       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9845       nodes = frozenset(self.node_secondary_ip)
9846       assert ((self.early_release and not owned_nodes) or
9847               (not self.early_release and not (set(owned_nodes) - nodes))), \
9848         ("Not owning the correct locks, early_release=%s, owned=%r,"
9849          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9850
9851     return result
9852
9853   def _CheckVolumeGroup(self, nodes):
9854     self.lu.LogInfo("Checking volume groups")
9855
9856     vgname = self.cfg.GetVGName()
9857
9858     # Make sure volume group exists on all involved nodes
9859     results = self.rpc.call_vg_list(nodes)
9860     if not results:
9861       raise errors.OpExecError("Can't list volume groups on the nodes")
9862
9863     for node in nodes:
9864       res = results[node]
9865       res.Raise("Error checking node %s" % node)
9866       if vgname not in res.payload:
9867         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9868                                  (vgname, node))
9869
9870   def _CheckDisksExistence(self, nodes):
9871     # Check disk existence
9872     for idx, dev in enumerate(self.instance.disks):
9873       if idx not in self.disks:
9874         continue
9875
9876       for node in nodes:
9877         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9878         self.cfg.SetDiskID(dev, node)
9879
9880         result = self.rpc.call_blockdev_find(node, dev)
9881
9882         msg = result.fail_msg
9883         if msg or not result.payload:
9884           if not msg:
9885             msg = "disk not found"
9886           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9887                                    (idx, node, msg))
9888
9889   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9890     for idx, dev in enumerate(self.instance.disks):
9891       if idx not in self.disks:
9892         continue
9893
9894       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9895                       (idx, node_name))
9896
9897       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9898                                    ldisk=ldisk):
9899         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9900                                  " replace disks for instance %s" %
9901                                  (node_name, self.instance.name))
9902
9903   def _CreateNewStorage(self, node_name):
9904     """Create new storage on the primary or secondary node.
9905
9906     This is only used for same-node replaces, not for changing the
9907     secondary node, hence we don't want to modify the existing disk.
9908
9909     """
9910     iv_names = {}
9911
9912     for idx, dev in enumerate(self.instance.disks):
9913       if idx not in self.disks:
9914         continue
9915
9916       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9917
9918       self.cfg.SetDiskID(dev, node_name)
9919
9920       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9921       names = _GenerateUniqueNames(self.lu, lv_names)
9922
9923       vg_data = dev.children[0].logical_id[0]
9924       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9925                              logical_id=(vg_data, names[0]))
9926       vg_meta = dev.children[1].logical_id[0]
9927       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9928                              logical_id=(vg_meta, names[1]))
9929
9930       new_lvs = [lv_data, lv_meta]
9931       old_lvs = [child.Copy() for child in dev.children]
9932       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9933
9934       # we pass force_create=True to force the LVM creation
9935       for new_lv in new_lvs:
9936         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9937                         _GetInstanceInfoText(self.instance), False)
9938
9939     return iv_names
9940
9941   def _CheckDevices(self, node_name, iv_names):
9942     for name, (dev, _, _) in iv_names.iteritems():
9943       self.cfg.SetDiskID(dev, node_name)
9944
9945       result = self.rpc.call_blockdev_find(node_name, dev)
9946
9947       msg = result.fail_msg
9948       if msg or not result.payload:
9949         if not msg:
9950           msg = "disk not found"
9951         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9952                                  (name, msg))
9953
9954       if result.payload.is_degraded:
9955         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9956
9957   def _RemoveOldStorage(self, node_name, iv_names):
9958     for name, (_, old_lvs, _) in iv_names.iteritems():
9959       self.lu.LogInfo("Remove logical volumes for %s" % name)
9960
9961       for lv in old_lvs:
9962         self.cfg.SetDiskID(lv, node_name)
9963
9964         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9965         if msg:
9966           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9967                              hint="remove unused LVs manually")
9968
9969   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9970     """Replace a disk on the primary or secondary for DRBD 8.
9971
9972     The algorithm for replace is quite complicated:
9973
9974       1. for each disk to be replaced:
9975
9976         1. create new LVs on the target node with unique names
9977         1. detach old LVs from the drbd device
9978         1. rename old LVs to name_replaced.<time_t>
9979         1. rename new LVs to old LVs
9980         1. attach the new LVs (with the old names now) to the drbd device
9981
9982       1. wait for sync across all devices
9983
9984       1. for each modified disk:
9985
9986         1. remove old LVs (which have the name name_replaces.<time_t>)
9987
9988     Failures are not very well handled.
9989
9990     """
9991     steps_total = 6
9992
9993     # Step: check device activation
9994     self.lu.LogStep(1, steps_total, "Check device existence")
9995     self._CheckDisksExistence([self.other_node, self.target_node])
9996     self._CheckVolumeGroup([self.target_node, self.other_node])
9997
9998     # Step: check other node consistency
9999     self.lu.LogStep(2, steps_total, "Check peer consistency")
10000     self._CheckDisksConsistency(self.other_node,
10001                                 self.other_node == self.instance.primary_node,
10002                                 False)
10003
10004     # Step: create new storage
10005     self.lu.LogStep(3, steps_total, "Allocate new storage")
10006     iv_names = self._CreateNewStorage(self.target_node)
10007
10008     # Step: for each lv, detach+rename*2+attach
10009     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10010     for dev, old_lvs, new_lvs in iv_names.itervalues():
10011       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10012
10013       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10014                                                      old_lvs)
10015       result.Raise("Can't detach drbd from local storage on node"
10016                    " %s for device %s" % (self.target_node, dev.iv_name))
10017       #dev.children = []
10018       #cfg.Update(instance)
10019
10020       # ok, we created the new LVs, so now we know we have the needed
10021       # storage; as such, we proceed on the target node to rename
10022       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10023       # using the assumption that logical_id == physical_id (which in
10024       # turn is the unique_id on that node)
10025
10026       # FIXME(iustin): use a better name for the replaced LVs
10027       temp_suffix = int(time.time())
10028       ren_fn = lambda d, suff: (d.physical_id[0],
10029                                 d.physical_id[1] + "_replaced-%s" % suff)
10030
10031       # Build the rename list based on what LVs exist on the node
10032       rename_old_to_new = []
10033       for to_ren in old_lvs:
10034         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10035         if not result.fail_msg and result.payload:
10036           # device exists
10037           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10038
10039       self.lu.LogInfo("Renaming the old LVs on the target node")
10040       result = self.rpc.call_blockdev_rename(self.target_node,
10041                                              rename_old_to_new)
10042       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10043
10044       # Now we rename the new LVs to the old LVs
10045       self.lu.LogInfo("Renaming the new LVs on the target node")
10046       rename_new_to_old = [(new, old.physical_id)
10047                            for old, new in zip(old_lvs, new_lvs)]
10048       result = self.rpc.call_blockdev_rename(self.target_node,
10049                                              rename_new_to_old)
10050       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10051
10052       # Intermediate steps of in memory modifications
10053       for old, new in zip(old_lvs, new_lvs):
10054         new.logical_id = old.logical_id
10055         self.cfg.SetDiskID(new, self.target_node)
10056
10057       # We need to modify old_lvs so that removal later removes the
10058       # right LVs, not the newly added ones; note that old_lvs is a
10059       # copy here
10060       for disk in old_lvs:
10061         disk.logical_id = ren_fn(disk, temp_suffix)
10062         self.cfg.SetDiskID(disk, self.target_node)
10063
10064       # Now that the new lvs have the old name, we can add them to the device
10065       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10066       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10067                                                   new_lvs)
10068       msg = result.fail_msg
10069       if msg:
10070         for new_lv in new_lvs:
10071           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10072                                                new_lv).fail_msg
10073           if msg2:
10074             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10075                                hint=("cleanup manually the unused logical"
10076                                      "volumes"))
10077         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10078
10079     cstep = 5
10080     if self.early_release:
10081       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10082       cstep += 1
10083       self._RemoveOldStorage(self.target_node, iv_names)
10084       # WARNING: we release both node locks here, do not do other RPCs
10085       # than WaitForSync to the primary node
10086       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10087                     names=[self.target_node, self.other_node])
10088
10089     # Wait for sync
10090     # This can fail as the old devices are degraded and _WaitForSync
10091     # does a combined result over all disks, so we don't check its return value
10092     self.lu.LogStep(cstep, steps_total, "Sync devices")
10093     cstep += 1
10094     _WaitForSync(self.lu, self.instance)
10095
10096     # Check all devices manually
10097     self._CheckDevices(self.instance.primary_node, iv_names)
10098
10099     # Step: remove old storage
10100     if not self.early_release:
10101       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10102       cstep += 1
10103       self._RemoveOldStorage(self.target_node, iv_names)
10104
10105   def _ExecDrbd8Secondary(self, feedback_fn):
10106     """Replace the secondary node for DRBD 8.
10107
10108     The algorithm for replace is quite complicated:
10109       - for all disks of the instance:
10110         - create new LVs on the new node with same names
10111         - shutdown the drbd device on the old secondary
10112         - disconnect the drbd network on the primary
10113         - create the drbd device on the new secondary
10114         - network attach the drbd on the primary, using an artifice:
10115           the drbd code for Attach() will connect to the network if it
10116           finds a device which is connected to the good local disks but
10117           not network enabled
10118       - wait for sync across all devices
10119       - remove all disks from the old secondary
10120
10121     Failures are not very well handled.
10122
10123     """
10124     steps_total = 6
10125
10126     pnode = self.instance.primary_node
10127
10128     # Step: check device activation
10129     self.lu.LogStep(1, steps_total, "Check device existence")
10130     self._CheckDisksExistence([self.instance.primary_node])
10131     self._CheckVolumeGroup([self.instance.primary_node])
10132
10133     # Step: check other node consistency
10134     self.lu.LogStep(2, steps_total, "Check peer consistency")
10135     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10136
10137     # Step: create new storage
10138     self.lu.LogStep(3, steps_total, "Allocate new storage")
10139     for idx, dev in enumerate(self.instance.disks):
10140       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10141                       (self.new_node, idx))
10142       # we pass force_create=True to force LVM creation
10143       for new_lv in dev.children:
10144         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10145                         _GetInstanceInfoText(self.instance), False)
10146
10147     # Step 4: dbrd minors and drbd setups changes
10148     # after this, we must manually remove the drbd minors on both the
10149     # error and the success paths
10150     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10151     minors = self.cfg.AllocateDRBDMinor([self.new_node
10152                                          for dev in self.instance.disks],
10153                                         self.instance.name)
10154     logging.debug("Allocated minors %r", minors)
10155
10156     iv_names = {}
10157     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10158       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10159                       (self.new_node, idx))
10160       # create new devices on new_node; note that we create two IDs:
10161       # one without port, so the drbd will be activated without
10162       # networking information on the new node at this stage, and one
10163       # with network, for the latter activation in step 4
10164       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10165       if self.instance.primary_node == o_node1:
10166         p_minor = o_minor1
10167       else:
10168         assert self.instance.primary_node == o_node2, "Three-node instance?"
10169         p_minor = o_minor2
10170
10171       new_alone_id = (self.instance.primary_node, self.new_node, None,
10172                       p_minor, new_minor, o_secret)
10173       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10174                     p_minor, new_minor, o_secret)
10175
10176       iv_names[idx] = (dev, dev.children, new_net_id)
10177       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10178                     new_net_id)
10179       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10180                               logical_id=new_alone_id,
10181                               children=dev.children,
10182                               size=dev.size)
10183       try:
10184         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10185                               _GetInstanceInfoText(self.instance), False)
10186       except errors.GenericError:
10187         self.cfg.ReleaseDRBDMinors(self.instance.name)
10188         raise
10189
10190     # We have new devices, shutdown the drbd on the old secondary
10191     for idx, dev in enumerate(self.instance.disks):
10192       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10193       self.cfg.SetDiskID(dev, self.target_node)
10194       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10195       if msg:
10196         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10197                            "node: %s" % (idx, msg),
10198                            hint=("Please cleanup this device manually as"
10199                                  " soon as possible"))
10200
10201     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10202     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10203                                                self.instance.disks)[pnode]
10204
10205     msg = result.fail_msg
10206     if msg:
10207       # detaches didn't succeed (unlikely)
10208       self.cfg.ReleaseDRBDMinors(self.instance.name)
10209       raise errors.OpExecError("Can't detach the disks from the network on"
10210                                " old node: %s" % (msg,))
10211
10212     # if we managed to detach at least one, we update all the disks of
10213     # the instance to point to the new secondary
10214     self.lu.LogInfo("Updating instance configuration")
10215     for dev, _, new_logical_id in iv_names.itervalues():
10216       dev.logical_id = new_logical_id
10217       self.cfg.SetDiskID(dev, self.instance.primary_node)
10218
10219     self.cfg.Update(self.instance, feedback_fn)
10220
10221     # and now perform the drbd attach
10222     self.lu.LogInfo("Attaching primary drbds to new secondary"
10223                     " (standalone => connected)")
10224     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10225                                             self.new_node],
10226                                            self.node_secondary_ip,
10227                                            self.instance.disks,
10228                                            self.instance.name,
10229                                            False)
10230     for to_node, to_result in result.items():
10231       msg = to_result.fail_msg
10232       if msg:
10233         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10234                            to_node, msg,
10235                            hint=("please do a gnt-instance info to see the"
10236                                  " status of disks"))
10237     cstep = 5
10238     if self.early_release:
10239       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10240       cstep += 1
10241       self._RemoveOldStorage(self.target_node, iv_names)
10242       # WARNING: we release all node locks here, do not do other RPCs
10243       # than WaitForSync to the primary node
10244       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10245                     names=[self.instance.primary_node,
10246                            self.target_node,
10247                            self.new_node])
10248
10249     # Wait for sync
10250     # This can fail as the old devices are degraded and _WaitForSync
10251     # does a combined result over all disks, so we don't check its return value
10252     self.lu.LogStep(cstep, steps_total, "Sync devices")
10253     cstep += 1
10254     _WaitForSync(self.lu, self.instance)
10255
10256     # Check all devices manually
10257     self._CheckDevices(self.instance.primary_node, iv_names)
10258
10259     # Step: remove old storage
10260     if not self.early_release:
10261       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10262       self._RemoveOldStorage(self.target_node, iv_names)
10263
10264
10265 class LURepairNodeStorage(NoHooksLU):
10266   """Repairs the volume group on a node.
10267
10268   """
10269   REQ_BGL = False
10270
10271   def CheckArguments(self):
10272     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10273
10274     storage_type = self.op.storage_type
10275
10276     if (constants.SO_FIX_CONSISTENCY not in
10277         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10278       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10279                                  " repaired" % storage_type,
10280                                  errors.ECODE_INVAL)
10281
10282   def ExpandNames(self):
10283     self.needed_locks = {
10284       locking.LEVEL_NODE: [self.op.node_name],
10285       }
10286
10287   def _CheckFaultyDisks(self, instance, node_name):
10288     """Ensure faulty disks abort the opcode or at least warn."""
10289     try:
10290       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10291                                   node_name, True):
10292         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10293                                    " node '%s'" % (instance.name, node_name),
10294                                    errors.ECODE_STATE)
10295     except errors.OpPrereqError, err:
10296       if self.op.ignore_consistency:
10297         self.proc.LogWarning(str(err.args[0]))
10298       else:
10299         raise
10300
10301   def CheckPrereq(self):
10302     """Check prerequisites.
10303
10304     """
10305     # Check whether any instance on this node has faulty disks
10306     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10307       if not inst.admin_up:
10308         continue
10309       check_nodes = set(inst.all_nodes)
10310       check_nodes.discard(self.op.node_name)
10311       for inst_node_name in check_nodes:
10312         self._CheckFaultyDisks(inst, inst_node_name)
10313
10314   def Exec(self, feedback_fn):
10315     feedback_fn("Repairing storage unit '%s' on %s ..." %
10316                 (self.op.name, self.op.node_name))
10317
10318     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10319     result = self.rpc.call_storage_execute(self.op.node_name,
10320                                            self.op.storage_type, st_args,
10321                                            self.op.name,
10322                                            constants.SO_FIX_CONSISTENCY)
10323     result.Raise("Failed to repair storage unit '%s' on %s" %
10324                  (self.op.name, self.op.node_name))
10325
10326
10327 class LUNodeEvacuate(NoHooksLU):
10328   """Evacuates instances off a list of nodes.
10329
10330   """
10331   REQ_BGL = False
10332
10333   def CheckArguments(self):
10334     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10335
10336   def ExpandNames(self):
10337     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10338
10339     if self.op.remote_node is not None:
10340       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10341       assert self.op.remote_node
10342
10343       if self.op.remote_node == self.op.node_name:
10344         raise errors.OpPrereqError("Can not use evacuated node as a new"
10345                                    " secondary node", errors.ECODE_INVAL)
10346
10347       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10348         raise errors.OpPrereqError("Without the use of an iallocator only"
10349                                    " secondary instances can be evacuated",
10350                                    errors.ECODE_INVAL)
10351
10352     # Declare locks
10353     self.share_locks = _ShareAll()
10354     self.needed_locks = {
10355       locking.LEVEL_INSTANCE: [],
10356       locking.LEVEL_NODEGROUP: [],
10357       locking.LEVEL_NODE: [],
10358       }
10359
10360     if self.op.remote_node is None:
10361       # Iallocator will choose any node(s) in the same group
10362       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10363     else:
10364       group_nodes = frozenset([self.op.remote_node])
10365
10366     # Determine nodes to be locked
10367     self.lock_nodes = set([self.op.node_name]) | group_nodes
10368
10369   def _DetermineInstances(self):
10370     """Builds list of instances to operate on.
10371
10372     """
10373     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10374
10375     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10376       # Primary instances only
10377       inst_fn = _GetNodePrimaryInstances
10378       assert self.op.remote_node is None, \
10379         "Evacuating primary instances requires iallocator"
10380     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10381       # Secondary instances only
10382       inst_fn = _GetNodeSecondaryInstances
10383     else:
10384       # All instances
10385       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10386       inst_fn = _GetNodeInstances
10387
10388     return inst_fn(self.cfg, self.op.node_name)
10389
10390   def DeclareLocks(self, level):
10391     if level == locking.LEVEL_INSTANCE:
10392       # Lock instances optimistically, needs verification once node and group
10393       # locks have been acquired
10394       self.needed_locks[locking.LEVEL_INSTANCE] = \
10395         set(i.name for i in self._DetermineInstances())
10396
10397     elif level == locking.LEVEL_NODEGROUP:
10398       # Lock node groups optimistically, needs verification once nodes have
10399       # been acquired
10400       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10401         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10402
10403     elif level == locking.LEVEL_NODE:
10404       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10405
10406   def CheckPrereq(self):
10407     # Verify locks
10408     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10409     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10410     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10411
10412     assert owned_nodes == self.lock_nodes
10413
10414     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10415     if owned_groups != wanted_groups:
10416       raise errors.OpExecError("Node groups changed since locks were acquired,"
10417                                " current groups are '%s', used to be '%s'" %
10418                                (utils.CommaJoin(wanted_groups),
10419                                 utils.CommaJoin(owned_groups)))
10420
10421     # Determine affected instances
10422     self.instances = self._DetermineInstances()
10423     self.instance_names = [i.name for i in self.instances]
10424
10425     if set(self.instance_names) != owned_instances:
10426       raise errors.OpExecError("Instances on node '%s' changed since locks"
10427                                " were acquired, current instances are '%s',"
10428                                " used to be '%s'" %
10429                                (self.op.node_name,
10430                                 utils.CommaJoin(self.instance_names),
10431                                 utils.CommaJoin(owned_instances)))
10432
10433     if self.instance_names:
10434       self.LogInfo("Evacuating instances from node '%s': %s",
10435                    self.op.node_name,
10436                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10437     else:
10438       self.LogInfo("No instances to evacuate from node '%s'",
10439                    self.op.node_name)
10440
10441     if self.op.remote_node is not None:
10442       for i in self.instances:
10443         if i.primary_node == self.op.remote_node:
10444           raise errors.OpPrereqError("Node %s is the primary node of"
10445                                      " instance %s, cannot use it as"
10446                                      " secondary" %
10447                                      (self.op.remote_node, i.name),
10448                                      errors.ECODE_INVAL)
10449
10450   def Exec(self, feedback_fn):
10451     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10452
10453     if not self.instance_names:
10454       # No instances to evacuate
10455       jobs = []
10456
10457     elif self.op.iallocator is not None:
10458       # TODO: Implement relocation to other group
10459       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10460                        evac_mode=self.op.mode,
10461                        instances=list(self.instance_names))
10462
10463       ial.Run(self.op.iallocator)
10464
10465       if not ial.success:
10466         raise errors.OpPrereqError("Can't compute node evacuation using"
10467                                    " iallocator '%s': %s" %
10468                                    (self.op.iallocator, ial.info),
10469                                    errors.ECODE_NORES)
10470
10471       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10472
10473     elif self.op.remote_node is not None:
10474       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10475       jobs = [
10476         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10477                                         remote_node=self.op.remote_node,
10478                                         disks=[],
10479                                         mode=constants.REPLACE_DISK_CHG,
10480                                         early_release=self.op.early_release)]
10481         for instance_name in self.instance_names
10482         ]
10483
10484     else:
10485       raise errors.ProgrammerError("No iallocator or remote node")
10486
10487     return ResultWithJobs(jobs)
10488
10489
10490 def _SetOpEarlyRelease(early_release, op):
10491   """Sets C{early_release} flag on opcodes if available.
10492
10493   """
10494   try:
10495     op.early_release = early_release
10496   except AttributeError:
10497     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10498
10499   return op
10500
10501
10502 def _NodeEvacDest(use_nodes, group, nodes):
10503   """Returns group or nodes depending on caller's choice.
10504
10505   """
10506   if use_nodes:
10507     return utils.CommaJoin(nodes)
10508   else:
10509     return group
10510
10511
10512 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10513   """Unpacks the result of change-group and node-evacuate iallocator requests.
10514
10515   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10516   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10517
10518   @type lu: L{LogicalUnit}
10519   @param lu: Logical unit instance
10520   @type alloc_result: tuple/list
10521   @param alloc_result: Result from iallocator
10522   @type early_release: bool
10523   @param early_release: Whether to release locks early if possible
10524   @type use_nodes: bool
10525   @param use_nodes: Whether to display node names instead of groups
10526
10527   """
10528   (moved, failed, jobs) = alloc_result
10529
10530   if failed:
10531     lu.LogWarning("Unable to evacuate instances %s",
10532                   utils.CommaJoin("%s (%s)" % (name, reason)
10533                                   for (name, reason) in failed))
10534
10535   if moved:
10536     lu.LogInfo("Instances to be moved: %s",
10537                utils.CommaJoin("%s (to %s)" %
10538                                (name, _NodeEvacDest(use_nodes, group, nodes))
10539                                for (name, group, nodes) in moved))
10540
10541   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10542               map(opcodes.OpCode.LoadOpCode, ops))
10543           for ops in jobs]
10544
10545
10546 class LUInstanceGrowDisk(LogicalUnit):
10547   """Grow a disk of an instance.
10548
10549   """
10550   HPATH = "disk-grow"
10551   HTYPE = constants.HTYPE_INSTANCE
10552   REQ_BGL = False
10553
10554   def ExpandNames(self):
10555     self._ExpandAndLockInstance()
10556     self.needed_locks[locking.LEVEL_NODE] = []
10557     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10558
10559   def DeclareLocks(self, level):
10560     if level == locking.LEVEL_NODE:
10561       self._LockInstancesNodes()
10562
10563   def BuildHooksEnv(self):
10564     """Build hooks env.
10565
10566     This runs on the master, the primary and all the secondaries.
10567
10568     """
10569     env = {
10570       "DISK": self.op.disk,
10571       "AMOUNT": self.op.amount,
10572       }
10573     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10574     return env
10575
10576   def BuildHooksNodes(self):
10577     """Build hooks nodes.
10578
10579     """
10580     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10581     return (nl, nl)
10582
10583   def CheckPrereq(self):
10584     """Check prerequisites.
10585
10586     This checks that the instance is in the cluster.
10587
10588     """
10589     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10590     assert instance is not None, \
10591       "Cannot retrieve locked instance %s" % self.op.instance_name
10592     nodenames = list(instance.all_nodes)
10593     for node in nodenames:
10594       _CheckNodeOnline(self, node)
10595
10596     self.instance = instance
10597
10598     if instance.disk_template not in constants.DTS_GROWABLE:
10599       raise errors.OpPrereqError("Instance's disk layout does not support"
10600                                  " growing", errors.ECODE_INVAL)
10601
10602     self.disk = instance.FindDisk(self.op.disk)
10603
10604     if instance.disk_template not in (constants.DT_FILE,
10605                                       constants.DT_SHARED_FILE):
10606       # TODO: check the free disk space for file, when that feature will be
10607       # supported
10608       _CheckNodesFreeDiskPerVG(self, nodenames,
10609                                self.disk.ComputeGrowth(self.op.amount))
10610
10611   def Exec(self, feedback_fn):
10612     """Execute disk grow.
10613
10614     """
10615     instance = self.instance
10616     disk = self.disk
10617
10618     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10619     if not disks_ok:
10620       raise errors.OpExecError("Cannot activate block device to grow")
10621
10622     # First run all grow ops in dry-run mode
10623     for node in instance.all_nodes:
10624       self.cfg.SetDiskID(disk, node)
10625       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10626       result.Raise("Grow request failed to node %s" % node)
10627
10628     # We know that (as far as we can test) operations across different
10629     # nodes will succeed, time to run it for real
10630     for node in instance.all_nodes:
10631       self.cfg.SetDiskID(disk, node)
10632       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10633       result.Raise("Grow request failed to node %s" % node)
10634
10635       # TODO: Rewrite code to work properly
10636       # DRBD goes into sync mode for a short amount of time after executing the
10637       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10638       # calling "resize" in sync mode fails. Sleeping for a short amount of
10639       # time is a work-around.
10640       time.sleep(5)
10641
10642     disk.RecordGrow(self.op.amount)
10643     self.cfg.Update(instance, feedback_fn)
10644     if self.op.wait_for_sync:
10645       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10646       if disk_abort:
10647         self.proc.LogWarning("Disk sync-ing has not returned a good"
10648                              " status; please check the instance")
10649       if not instance.admin_up:
10650         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10651     elif not instance.admin_up:
10652       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10653                            " not supposed to be running because no wait for"
10654                            " sync mode was requested")
10655
10656
10657 class LUInstanceQueryData(NoHooksLU):
10658   """Query runtime instance data.
10659
10660   """
10661   REQ_BGL = False
10662
10663   def ExpandNames(self):
10664     self.needed_locks = {}
10665
10666     # Use locking if requested or when non-static information is wanted
10667     if not (self.op.static or self.op.use_locking):
10668       self.LogWarning("Non-static data requested, locks need to be acquired")
10669       self.op.use_locking = True
10670
10671     if self.op.instances or not self.op.use_locking:
10672       # Expand instance names right here
10673       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10674     else:
10675       # Will use acquired locks
10676       self.wanted_names = None
10677
10678     if self.op.use_locking:
10679       self.share_locks = _ShareAll()
10680
10681       if self.wanted_names is None:
10682         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10683       else:
10684         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10685
10686       self.needed_locks[locking.LEVEL_NODE] = []
10687       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10688
10689   def DeclareLocks(self, level):
10690     if self.op.use_locking and level == locking.LEVEL_NODE:
10691       self._LockInstancesNodes()
10692
10693   def CheckPrereq(self):
10694     """Check prerequisites.
10695
10696     This only checks the optional instance list against the existing names.
10697
10698     """
10699     if self.wanted_names is None:
10700       assert self.op.use_locking, "Locking was not used"
10701       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10702
10703     self.wanted_instances = \
10704         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10705
10706   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10707     """Returns the status of a block device
10708
10709     """
10710     if self.op.static or not node:
10711       return None
10712
10713     self.cfg.SetDiskID(dev, node)
10714
10715     result = self.rpc.call_blockdev_find(node, dev)
10716     if result.offline:
10717       return None
10718
10719     result.Raise("Can't compute disk status for %s" % instance_name)
10720
10721     status = result.payload
10722     if status is None:
10723       return None
10724
10725     return (status.dev_path, status.major, status.minor,
10726             status.sync_percent, status.estimated_time,
10727             status.is_degraded, status.ldisk_status)
10728
10729   def _ComputeDiskStatus(self, instance, snode, dev):
10730     """Compute block device status.
10731
10732     """
10733     if dev.dev_type in constants.LDS_DRBD:
10734       # we change the snode then (otherwise we use the one passed in)
10735       if dev.logical_id[0] == instance.primary_node:
10736         snode = dev.logical_id[1]
10737       else:
10738         snode = dev.logical_id[0]
10739
10740     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10741                                               instance.name, dev)
10742     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10743
10744     if dev.children:
10745       dev_children = map(compat.partial(self._ComputeDiskStatus,
10746                                         instance, snode),
10747                          dev.children)
10748     else:
10749       dev_children = []
10750
10751     return {
10752       "iv_name": dev.iv_name,
10753       "dev_type": dev.dev_type,
10754       "logical_id": dev.logical_id,
10755       "physical_id": dev.physical_id,
10756       "pstatus": dev_pstatus,
10757       "sstatus": dev_sstatus,
10758       "children": dev_children,
10759       "mode": dev.mode,
10760       "size": dev.size,
10761       }
10762
10763   def Exec(self, feedback_fn):
10764     """Gather and return data"""
10765     result = {}
10766
10767     cluster = self.cfg.GetClusterInfo()
10768
10769     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10770                                           for i in self.wanted_instances)
10771     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10772       if self.op.static or pnode.offline:
10773         remote_state = None
10774         if pnode.offline:
10775           self.LogWarning("Primary node %s is marked offline, returning static"
10776                           " information only for instance %s" %
10777                           (pnode.name, instance.name))
10778       else:
10779         remote_info = self.rpc.call_instance_info(instance.primary_node,
10780                                                   instance.name,
10781                                                   instance.hypervisor)
10782         remote_info.Raise("Error checking node %s" % instance.primary_node)
10783         remote_info = remote_info.payload
10784         if remote_info and "state" in remote_info:
10785           remote_state = "up"
10786         else:
10787           remote_state = "down"
10788
10789       if instance.admin_up:
10790         config_state = "up"
10791       else:
10792         config_state = "down"
10793
10794       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10795                   instance.disks)
10796
10797       result[instance.name] = {
10798         "name": instance.name,
10799         "config_state": config_state,
10800         "run_state": remote_state,
10801         "pnode": instance.primary_node,
10802         "snodes": instance.secondary_nodes,
10803         "os": instance.os,
10804         # this happens to be the same format used for hooks
10805         "nics": _NICListToTuple(self, instance.nics),
10806         "disk_template": instance.disk_template,
10807         "disks": disks,
10808         "hypervisor": instance.hypervisor,
10809         "network_port": instance.network_port,
10810         "hv_instance": instance.hvparams,
10811         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10812         "be_instance": instance.beparams,
10813         "be_actual": cluster.FillBE(instance),
10814         "os_instance": instance.osparams,
10815         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10816         "serial_no": instance.serial_no,
10817         "mtime": instance.mtime,
10818         "ctime": instance.ctime,
10819         "uuid": instance.uuid,
10820         }
10821
10822     return result
10823
10824
10825 class LUInstanceSetParams(LogicalUnit):
10826   """Modifies an instances's parameters.
10827
10828   """
10829   HPATH = "instance-modify"
10830   HTYPE = constants.HTYPE_INSTANCE
10831   REQ_BGL = False
10832
10833   def CheckArguments(self):
10834     if not (self.op.nics or self.op.disks or self.op.disk_template or
10835             self.op.hvparams or self.op.beparams or self.op.os_name):
10836       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10837
10838     if self.op.hvparams:
10839       _CheckGlobalHvParams(self.op.hvparams)
10840
10841     # Disk validation
10842     disk_addremove = 0
10843     for disk_op, disk_dict in self.op.disks:
10844       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10845       if disk_op == constants.DDM_REMOVE:
10846         disk_addremove += 1
10847         continue
10848       elif disk_op == constants.DDM_ADD:
10849         disk_addremove += 1
10850       else:
10851         if not isinstance(disk_op, int):
10852           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10853         if not isinstance(disk_dict, dict):
10854           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10855           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10856
10857       if disk_op == constants.DDM_ADD:
10858         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10859         if mode not in constants.DISK_ACCESS_SET:
10860           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10861                                      errors.ECODE_INVAL)
10862         size = disk_dict.get(constants.IDISK_SIZE, None)
10863         if size is None:
10864           raise errors.OpPrereqError("Required disk parameter size missing",
10865                                      errors.ECODE_INVAL)
10866         try:
10867           size = int(size)
10868         except (TypeError, ValueError), err:
10869           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10870                                      str(err), errors.ECODE_INVAL)
10871         disk_dict[constants.IDISK_SIZE] = size
10872       else:
10873         # modification of disk
10874         if constants.IDISK_SIZE in disk_dict:
10875           raise errors.OpPrereqError("Disk size change not possible, use"
10876                                      " grow-disk", errors.ECODE_INVAL)
10877
10878     if disk_addremove > 1:
10879       raise errors.OpPrereqError("Only one disk add or remove operation"
10880                                  " supported at a time", errors.ECODE_INVAL)
10881
10882     if self.op.disks and self.op.disk_template is not None:
10883       raise errors.OpPrereqError("Disk template conversion and other disk"
10884                                  " changes not supported at the same time",
10885                                  errors.ECODE_INVAL)
10886
10887     if (self.op.disk_template and
10888         self.op.disk_template in constants.DTS_INT_MIRROR and
10889         self.op.remote_node is None):
10890       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10891                                  " one requires specifying a secondary node",
10892                                  errors.ECODE_INVAL)
10893
10894     # NIC validation
10895     nic_addremove = 0
10896     for nic_op, nic_dict in self.op.nics:
10897       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10898       if nic_op == constants.DDM_REMOVE:
10899         nic_addremove += 1
10900         continue
10901       elif nic_op == constants.DDM_ADD:
10902         nic_addremove += 1
10903       else:
10904         if not isinstance(nic_op, int):
10905           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10906         if not isinstance(nic_dict, dict):
10907           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10908           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10909
10910       # nic_dict should be a dict
10911       nic_ip = nic_dict.get(constants.INIC_IP, None)
10912       if nic_ip is not None:
10913         if nic_ip.lower() == constants.VALUE_NONE:
10914           nic_dict[constants.INIC_IP] = None
10915         else:
10916           if not netutils.IPAddress.IsValid(nic_ip):
10917             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10918                                        errors.ECODE_INVAL)
10919
10920       nic_bridge = nic_dict.get("bridge", None)
10921       nic_link = nic_dict.get(constants.INIC_LINK, None)
10922       if nic_bridge and nic_link:
10923         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10924                                    " at the same time", errors.ECODE_INVAL)
10925       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10926         nic_dict["bridge"] = None
10927       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10928         nic_dict[constants.INIC_LINK] = None
10929
10930       if nic_op == constants.DDM_ADD:
10931         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10932         if nic_mac is None:
10933           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10934
10935       if constants.INIC_MAC in nic_dict:
10936         nic_mac = nic_dict[constants.INIC_MAC]
10937         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10938           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10939
10940         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10941           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10942                                      " modifying an existing nic",
10943                                      errors.ECODE_INVAL)
10944
10945     if nic_addremove > 1:
10946       raise errors.OpPrereqError("Only one NIC add or remove operation"
10947                                  " supported at a time", errors.ECODE_INVAL)
10948
10949   def ExpandNames(self):
10950     self._ExpandAndLockInstance()
10951     self.needed_locks[locking.LEVEL_NODE] = []
10952     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10953
10954   def DeclareLocks(self, level):
10955     if level == locking.LEVEL_NODE:
10956       self._LockInstancesNodes()
10957       if self.op.disk_template and self.op.remote_node:
10958         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10959         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10960
10961   def BuildHooksEnv(self):
10962     """Build hooks env.
10963
10964     This runs on the master, primary and secondaries.
10965
10966     """
10967     args = dict()
10968     if constants.BE_MEMORY in self.be_new:
10969       args["memory"] = self.be_new[constants.BE_MEMORY]
10970     if constants.BE_VCPUS in self.be_new:
10971       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10972     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10973     # information at all.
10974     if self.op.nics:
10975       args["nics"] = []
10976       nic_override = dict(self.op.nics)
10977       for idx, nic in enumerate(self.instance.nics):
10978         if idx in nic_override:
10979           this_nic_override = nic_override[idx]
10980         else:
10981           this_nic_override = {}
10982         if constants.INIC_IP in this_nic_override:
10983           ip = this_nic_override[constants.INIC_IP]
10984         else:
10985           ip = nic.ip
10986         if constants.INIC_MAC in this_nic_override:
10987           mac = this_nic_override[constants.INIC_MAC]
10988         else:
10989           mac = nic.mac
10990         if idx in self.nic_pnew:
10991           nicparams = self.nic_pnew[idx]
10992         else:
10993           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10994         mode = nicparams[constants.NIC_MODE]
10995         link = nicparams[constants.NIC_LINK]
10996         args["nics"].append((ip, mac, mode, link))
10997       if constants.DDM_ADD in nic_override:
10998         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10999         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11000         nicparams = self.nic_pnew[constants.DDM_ADD]
11001         mode = nicparams[constants.NIC_MODE]
11002         link = nicparams[constants.NIC_LINK]
11003         args["nics"].append((ip, mac, mode, link))
11004       elif constants.DDM_REMOVE in nic_override:
11005         del args["nics"][-1]
11006
11007     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11008     if self.op.disk_template:
11009       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11010
11011     return env
11012
11013   def BuildHooksNodes(self):
11014     """Build hooks nodes.
11015
11016     """
11017     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11018     return (nl, nl)
11019
11020   def CheckPrereq(self):
11021     """Check prerequisites.
11022
11023     This only checks the instance list against the existing names.
11024
11025     """
11026     # checking the new params on the primary/secondary nodes
11027
11028     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11029     cluster = self.cluster = self.cfg.GetClusterInfo()
11030     assert self.instance is not None, \
11031       "Cannot retrieve locked instance %s" % self.op.instance_name
11032     pnode = instance.primary_node
11033     nodelist = list(instance.all_nodes)
11034
11035     # OS change
11036     if self.op.os_name and not self.op.force:
11037       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11038                       self.op.force_variant)
11039       instance_os = self.op.os_name
11040     else:
11041       instance_os = instance.os
11042
11043     if self.op.disk_template:
11044       if instance.disk_template == self.op.disk_template:
11045         raise errors.OpPrereqError("Instance already has disk template %s" %
11046                                    instance.disk_template, errors.ECODE_INVAL)
11047
11048       if (instance.disk_template,
11049           self.op.disk_template) not in self._DISK_CONVERSIONS:
11050         raise errors.OpPrereqError("Unsupported disk template conversion from"
11051                                    " %s to %s" % (instance.disk_template,
11052                                                   self.op.disk_template),
11053                                    errors.ECODE_INVAL)
11054       _CheckInstanceDown(self, instance, "cannot change disk template")
11055       if self.op.disk_template in constants.DTS_INT_MIRROR:
11056         if self.op.remote_node == pnode:
11057           raise errors.OpPrereqError("Given new secondary node %s is the same"
11058                                      " as the primary node of the instance" %
11059                                      self.op.remote_node, errors.ECODE_STATE)
11060         _CheckNodeOnline(self, self.op.remote_node)
11061         _CheckNodeNotDrained(self, self.op.remote_node)
11062         # FIXME: here we assume that the old instance type is DT_PLAIN
11063         assert instance.disk_template == constants.DT_PLAIN
11064         disks = [{constants.IDISK_SIZE: d.size,
11065                   constants.IDISK_VG: d.logical_id[0]}
11066                  for d in instance.disks]
11067         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11068         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11069
11070     # hvparams processing
11071     if self.op.hvparams:
11072       hv_type = instance.hypervisor
11073       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11074       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11075       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11076
11077       # local check
11078       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11079       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11080       self.hv_proposed = self.hv_new = hv_new # the new actual values
11081       self.hv_inst = i_hvdict # the new dict (without defaults)
11082     else:
11083       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11084                                               instance.hvparams)
11085       self.hv_new = self.hv_inst = {}
11086
11087     # beparams processing
11088     if self.op.beparams:
11089       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11090                                    use_none=True)
11091       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11092       be_new = cluster.SimpleFillBE(i_bedict)
11093       self.be_proposed = self.be_new = be_new # the new actual values
11094       self.be_inst = i_bedict # the new dict (without defaults)
11095     else:
11096       self.be_new = self.be_inst = {}
11097       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11098     be_old = cluster.FillBE(instance)
11099
11100     # CPU param validation -- checking every time a paramtere is
11101     # changed to cover all cases where either CPU mask or vcpus have
11102     # changed
11103     if (constants.BE_VCPUS in self.be_proposed and
11104         constants.HV_CPU_MASK in self.hv_proposed):
11105       cpu_list = \
11106         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11107       # Verify mask is consistent with number of vCPUs. Can skip this
11108       # test if only 1 entry in the CPU mask, which means same mask
11109       # is applied to all vCPUs.
11110       if (len(cpu_list) > 1 and
11111           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11112         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11113                                    " CPU mask [%s]" %
11114                                    (self.be_proposed[constants.BE_VCPUS],
11115                                     self.hv_proposed[constants.HV_CPU_MASK]),
11116                                    errors.ECODE_INVAL)
11117
11118       # Only perform this test if a new CPU mask is given
11119       if constants.HV_CPU_MASK in self.hv_new:
11120         # Calculate the largest CPU number requested
11121         max_requested_cpu = max(map(max, cpu_list))
11122         # Check that all of the instance's nodes have enough physical CPUs to
11123         # satisfy the requested CPU mask
11124         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11125                                 max_requested_cpu + 1, instance.hypervisor)
11126
11127     # osparams processing
11128     if self.op.osparams:
11129       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11130       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11131       self.os_inst = i_osdict # the new dict (without defaults)
11132     else:
11133       self.os_inst = {}
11134
11135     self.warn = []
11136
11137     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11138         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11139       mem_check_list = [pnode]
11140       if be_new[constants.BE_AUTO_BALANCE]:
11141         # either we changed auto_balance to yes or it was from before
11142         mem_check_list.extend(instance.secondary_nodes)
11143       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11144                                                   instance.hypervisor)
11145       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11146                                          instance.hypervisor)
11147       pninfo = nodeinfo[pnode]
11148       msg = pninfo.fail_msg
11149       if msg:
11150         # Assume the primary node is unreachable and go ahead
11151         self.warn.append("Can't get info from primary node %s: %s" %
11152                          (pnode, msg))
11153       elif not isinstance(pninfo.payload.get("memory_free", None), int):
11154         self.warn.append("Node data from primary node %s doesn't contain"
11155                          " free memory information" % pnode)
11156       elif instance_info.fail_msg:
11157         self.warn.append("Can't get instance runtime information: %s" %
11158                         instance_info.fail_msg)
11159       else:
11160         if instance_info.payload:
11161           current_mem = int(instance_info.payload["memory"])
11162         else:
11163           # Assume instance not running
11164           # (there is a slight race condition here, but it's not very probable,
11165           # and we have no other way to check)
11166           current_mem = 0
11167         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11168                     pninfo.payload["memory_free"])
11169         if miss_mem > 0:
11170           raise errors.OpPrereqError("This change will prevent the instance"
11171                                      " from starting, due to %d MB of memory"
11172                                      " missing on its primary node" % miss_mem,
11173                                      errors.ECODE_NORES)
11174
11175       if be_new[constants.BE_AUTO_BALANCE]:
11176         for node, nres in nodeinfo.items():
11177           if node not in instance.secondary_nodes:
11178             continue
11179           nres.Raise("Can't get info from secondary node %s" % node,
11180                      prereq=True, ecode=errors.ECODE_STATE)
11181           if not isinstance(nres.payload.get("memory_free", None), int):
11182             raise errors.OpPrereqError("Secondary node %s didn't return free"
11183                                        " memory information" % node,
11184                                        errors.ECODE_STATE)
11185           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11186             raise errors.OpPrereqError("This change will prevent the instance"
11187                                        " from failover to its secondary node"
11188                                        " %s, due to not enough memory" % node,
11189                                        errors.ECODE_STATE)
11190
11191     # NIC processing
11192     self.nic_pnew = {}
11193     self.nic_pinst = {}
11194     for nic_op, nic_dict in self.op.nics:
11195       if nic_op == constants.DDM_REMOVE:
11196         if not instance.nics:
11197           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11198                                      errors.ECODE_INVAL)
11199         continue
11200       if nic_op != constants.DDM_ADD:
11201         # an existing nic
11202         if not instance.nics:
11203           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11204                                      " no NICs" % nic_op,
11205                                      errors.ECODE_INVAL)
11206         if nic_op < 0 or nic_op >= len(instance.nics):
11207           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11208                                      " are 0 to %d" %
11209                                      (nic_op, len(instance.nics) - 1),
11210                                      errors.ECODE_INVAL)
11211         old_nic_params = instance.nics[nic_op].nicparams
11212         old_nic_ip = instance.nics[nic_op].ip
11213       else:
11214         old_nic_params = {}
11215         old_nic_ip = None
11216
11217       update_params_dict = dict([(key, nic_dict[key])
11218                                  for key in constants.NICS_PARAMETERS
11219                                  if key in nic_dict])
11220
11221       if "bridge" in nic_dict:
11222         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11223
11224       new_nic_params = _GetUpdatedParams(old_nic_params,
11225                                          update_params_dict)
11226       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11227       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11228       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11229       self.nic_pinst[nic_op] = new_nic_params
11230       self.nic_pnew[nic_op] = new_filled_nic_params
11231       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11232
11233       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11234         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11235         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11236         if msg:
11237           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11238           if self.op.force:
11239             self.warn.append(msg)
11240           else:
11241             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11242       if new_nic_mode == constants.NIC_MODE_ROUTED:
11243         if constants.INIC_IP in nic_dict:
11244           nic_ip = nic_dict[constants.INIC_IP]
11245         else:
11246           nic_ip = old_nic_ip
11247         if nic_ip is None:
11248           raise errors.OpPrereqError("Cannot set the nic ip to None"
11249                                      " on a routed nic", errors.ECODE_INVAL)
11250       if constants.INIC_MAC in nic_dict:
11251         nic_mac = nic_dict[constants.INIC_MAC]
11252         if nic_mac is None:
11253           raise errors.OpPrereqError("Cannot set the nic mac to None",
11254                                      errors.ECODE_INVAL)
11255         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11256           # otherwise generate the mac
11257           nic_dict[constants.INIC_MAC] = \
11258             self.cfg.GenerateMAC(self.proc.GetECId())
11259         else:
11260           # or validate/reserve the current one
11261           try:
11262             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11263           except errors.ReservationError:
11264             raise errors.OpPrereqError("MAC address %s already in use"
11265                                        " in cluster" % nic_mac,
11266                                        errors.ECODE_NOTUNIQUE)
11267
11268     # DISK processing
11269     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11270       raise errors.OpPrereqError("Disk operations not supported for"
11271                                  " diskless instances",
11272                                  errors.ECODE_INVAL)
11273     for disk_op, _ in self.op.disks:
11274       if disk_op == constants.DDM_REMOVE:
11275         if len(instance.disks) == 1:
11276           raise errors.OpPrereqError("Cannot remove the last disk of"
11277                                      " an instance", errors.ECODE_INVAL)
11278         _CheckInstanceDown(self, instance, "cannot remove disks")
11279
11280       if (disk_op == constants.DDM_ADD and
11281           len(instance.disks) >= constants.MAX_DISKS):
11282         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11283                                    " add more" % constants.MAX_DISKS,
11284                                    errors.ECODE_STATE)
11285       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11286         # an existing disk
11287         if disk_op < 0 or disk_op >= len(instance.disks):
11288           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11289                                      " are 0 to %d" %
11290                                      (disk_op, len(instance.disks)),
11291                                      errors.ECODE_INVAL)
11292
11293     return
11294
11295   def _ConvertPlainToDrbd(self, feedback_fn):
11296     """Converts an instance from plain to drbd.
11297
11298     """
11299     feedback_fn("Converting template to drbd")
11300     instance = self.instance
11301     pnode = instance.primary_node
11302     snode = self.op.remote_node
11303
11304     # create a fake disk info for _GenerateDiskTemplate
11305     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11306                   constants.IDISK_VG: d.logical_id[0]}
11307                  for d in instance.disks]
11308     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11309                                       instance.name, pnode, [snode],
11310                                       disk_info, None, None, 0, feedback_fn)
11311     info = _GetInstanceInfoText(instance)
11312     feedback_fn("Creating aditional volumes...")
11313     # first, create the missing data and meta devices
11314     for disk in new_disks:
11315       # unfortunately this is... not too nice
11316       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11317                             info, True)
11318       for child in disk.children:
11319         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11320     # at this stage, all new LVs have been created, we can rename the
11321     # old ones
11322     feedback_fn("Renaming original volumes...")
11323     rename_list = [(o, n.children[0].logical_id)
11324                    for (o, n) in zip(instance.disks, new_disks)]
11325     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11326     result.Raise("Failed to rename original LVs")
11327
11328     feedback_fn("Initializing DRBD devices...")
11329     # all child devices are in place, we can now create the DRBD devices
11330     for disk in new_disks:
11331       for node in [pnode, snode]:
11332         f_create = node == pnode
11333         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11334
11335     # at this point, the instance has been modified
11336     instance.disk_template = constants.DT_DRBD8
11337     instance.disks = new_disks
11338     self.cfg.Update(instance, feedback_fn)
11339
11340     # disks are created, waiting for sync
11341     disk_abort = not _WaitForSync(self, instance,
11342                                   oneshot=not self.op.wait_for_sync)
11343     if disk_abort:
11344       raise errors.OpExecError("There are some degraded disks for"
11345                                " this instance, please cleanup manually")
11346
11347   def _ConvertDrbdToPlain(self, feedback_fn):
11348     """Converts an instance from drbd to plain.
11349
11350     """
11351     instance = self.instance
11352     assert len(instance.secondary_nodes) == 1
11353     pnode = instance.primary_node
11354     snode = instance.secondary_nodes[0]
11355     feedback_fn("Converting template to plain")
11356
11357     old_disks = instance.disks
11358     new_disks = [d.children[0] for d in old_disks]
11359
11360     # copy over size and mode
11361     for parent, child in zip(old_disks, new_disks):
11362       child.size = parent.size
11363       child.mode = parent.mode
11364
11365     # update instance structure
11366     instance.disks = new_disks
11367     instance.disk_template = constants.DT_PLAIN
11368     self.cfg.Update(instance, feedback_fn)
11369
11370     feedback_fn("Removing volumes on the secondary node...")
11371     for disk in old_disks:
11372       self.cfg.SetDiskID(disk, snode)
11373       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11374       if msg:
11375         self.LogWarning("Could not remove block device %s on node %s,"
11376                         " continuing anyway: %s", disk.iv_name, snode, msg)
11377
11378     feedback_fn("Removing unneeded volumes on the primary node...")
11379     for idx, disk in enumerate(old_disks):
11380       meta = disk.children[1]
11381       self.cfg.SetDiskID(meta, pnode)
11382       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11383       if msg:
11384         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11385                         " continuing anyway: %s", idx, pnode, msg)
11386
11387   def Exec(self, feedback_fn):
11388     """Modifies an instance.
11389
11390     All parameters take effect only at the next restart of the instance.
11391
11392     """
11393     # Process here the warnings from CheckPrereq, as we don't have a
11394     # feedback_fn there.
11395     for warn in self.warn:
11396       feedback_fn("WARNING: %s" % warn)
11397
11398     result = []
11399     instance = self.instance
11400     # disk changes
11401     for disk_op, disk_dict in self.op.disks:
11402       if disk_op == constants.DDM_REMOVE:
11403         # remove the last disk
11404         device = instance.disks.pop()
11405         device_idx = len(instance.disks)
11406         for node, disk in device.ComputeNodeTree(instance.primary_node):
11407           self.cfg.SetDiskID(disk, node)
11408           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11409           if msg:
11410             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11411                             " continuing anyway", device_idx, node, msg)
11412         result.append(("disk/%d" % device_idx, "remove"))
11413       elif disk_op == constants.DDM_ADD:
11414         # add a new disk
11415         if instance.disk_template in (constants.DT_FILE,
11416                                         constants.DT_SHARED_FILE):
11417           file_driver, file_path = instance.disks[0].logical_id
11418           file_path = os.path.dirname(file_path)
11419         else:
11420           file_driver = file_path = None
11421         disk_idx_base = len(instance.disks)
11422         new_disk = _GenerateDiskTemplate(self,
11423                                          instance.disk_template,
11424                                          instance.name, instance.primary_node,
11425                                          instance.secondary_nodes,
11426                                          [disk_dict],
11427                                          file_path,
11428                                          file_driver,
11429                                          disk_idx_base, feedback_fn)[0]
11430         instance.disks.append(new_disk)
11431         info = _GetInstanceInfoText(instance)
11432
11433         logging.info("Creating volume %s for instance %s",
11434                      new_disk.iv_name, instance.name)
11435         # Note: this needs to be kept in sync with _CreateDisks
11436         #HARDCODE
11437         for node in instance.all_nodes:
11438           f_create = node == instance.primary_node
11439           try:
11440             _CreateBlockDev(self, node, instance, new_disk,
11441                             f_create, info, f_create)
11442           except errors.OpExecError, err:
11443             self.LogWarning("Failed to create volume %s (%s) on"
11444                             " node %s: %s",
11445                             new_disk.iv_name, new_disk, node, err)
11446         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11447                        (new_disk.size, new_disk.mode)))
11448       else:
11449         # change a given disk
11450         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11451         result.append(("disk.mode/%d" % disk_op,
11452                        disk_dict[constants.IDISK_MODE]))
11453
11454     if self.op.disk_template:
11455       r_shut = _ShutdownInstanceDisks(self, instance)
11456       if not r_shut:
11457         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11458                                  " proceed with disk template conversion")
11459       mode = (instance.disk_template, self.op.disk_template)
11460       try:
11461         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11462       except:
11463         self.cfg.ReleaseDRBDMinors(instance.name)
11464         raise
11465       result.append(("disk_template", self.op.disk_template))
11466
11467     # NIC changes
11468     for nic_op, nic_dict in self.op.nics:
11469       if nic_op == constants.DDM_REMOVE:
11470         # remove the last nic
11471         del instance.nics[-1]
11472         result.append(("nic.%d" % len(instance.nics), "remove"))
11473       elif nic_op == constants.DDM_ADD:
11474         # mac and bridge should be set, by now
11475         mac = nic_dict[constants.INIC_MAC]
11476         ip = nic_dict.get(constants.INIC_IP, None)
11477         nicparams = self.nic_pinst[constants.DDM_ADD]
11478         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11479         instance.nics.append(new_nic)
11480         result.append(("nic.%d" % (len(instance.nics) - 1),
11481                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11482                        (new_nic.mac, new_nic.ip,
11483                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11484                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11485                        )))
11486       else:
11487         for key in (constants.INIC_MAC, constants.INIC_IP):
11488           if key in nic_dict:
11489             setattr(instance.nics[nic_op], key, nic_dict[key])
11490         if nic_op in self.nic_pinst:
11491           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11492         for key, val in nic_dict.iteritems():
11493           result.append(("nic.%s/%d" % (key, nic_op), val))
11494
11495     # hvparams changes
11496     if self.op.hvparams:
11497       instance.hvparams = self.hv_inst
11498       for key, val in self.op.hvparams.iteritems():
11499         result.append(("hv/%s" % key, val))
11500
11501     # beparams changes
11502     if self.op.beparams:
11503       instance.beparams = self.be_inst
11504       for key, val in self.op.beparams.iteritems():
11505         result.append(("be/%s" % key, val))
11506
11507     # OS change
11508     if self.op.os_name:
11509       instance.os = self.op.os_name
11510
11511     # osparams changes
11512     if self.op.osparams:
11513       instance.osparams = self.os_inst
11514       for key, val in self.op.osparams.iteritems():
11515         result.append(("os/%s" % key, val))
11516
11517     self.cfg.Update(instance, feedback_fn)
11518
11519     return result
11520
11521   _DISK_CONVERSIONS = {
11522     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11523     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11524     }
11525
11526
11527 class LUInstanceChangeGroup(LogicalUnit):
11528   HPATH = "instance-change-group"
11529   HTYPE = constants.HTYPE_INSTANCE
11530   REQ_BGL = False
11531
11532   def ExpandNames(self):
11533     self.share_locks = _ShareAll()
11534     self.needed_locks = {
11535       locking.LEVEL_NODEGROUP: [],
11536       locking.LEVEL_NODE: [],
11537       }
11538
11539     self._ExpandAndLockInstance()
11540
11541     if self.op.target_groups:
11542       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11543                                   self.op.target_groups)
11544     else:
11545       self.req_target_uuids = None
11546
11547     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11548
11549   def DeclareLocks(self, level):
11550     if level == locking.LEVEL_NODEGROUP:
11551       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11552
11553       if self.req_target_uuids:
11554         lock_groups = set(self.req_target_uuids)
11555
11556         # Lock all groups used by instance optimistically; this requires going
11557         # via the node before it's locked, requiring verification later on
11558         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11559         lock_groups.update(instance_groups)
11560       else:
11561         # No target groups, need to lock all of them
11562         lock_groups = locking.ALL_SET
11563
11564       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11565
11566     elif level == locking.LEVEL_NODE:
11567       if self.req_target_uuids:
11568         # Lock all nodes used by instances
11569         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11570         self._LockInstancesNodes()
11571
11572         # Lock all nodes in all potential target groups
11573         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11574                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11575         member_nodes = [node_name
11576                         for group in lock_groups
11577                         for node_name in self.cfg.GetNodeGroup(group).members]
11578         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11579       else:
11580         # Lock all nodes as all groups are potential targets
11581         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11582
11583   def CheckPrereq(self):
11584     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11585     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11586     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11587
11588     assert (self.req_target_uuids is None or
11589             owned_groups.issuperset(self.req_target_uuids))
11590     assert owned_instances == set([self.op.instance_name])
11591
11592     # Get instance information
11593     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11594
11595     # Check if node groups for locked instance are still correct
11596     assert owned_nodes.issuperset(self.instance.all_nodes), \
11597       ("Instance %s's nodes changed while we kept the lock" %
11598        self.op.instance_name)
11599
11600     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11601                                            owned_groups)
11602
11603     if self.req_target_uuids:
11604       # User requested specific target groups
11605       self.target_uuids = self.req_target_uuids
11606     else:
11607       # All groups except those used by the instance are potential targets
11608       self.target_uuids = owned_groups - inst_groups
11609
11610     conflicting_groups = self.target_uuids & inst_groups
11611     if conflicting_groups:
11612       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11613                                  " used by the instance '%s'" %
11614                                  (utils.CommaJoin(conflicting_groups),
11615                                   self.op.instance_name),
11616                                  errors.ECODE_INVAL)
11617
11618     if not self.target_uuids:
11619       raise errors.OpPrereqError("There are no possible target groups",
11620                                  errors.ECODE_INVAL)
11621
11622   def BuildHooksEnv(self):
11623     """Build hooks env.
11624
11625     """
11626     assert self.target_uuids
11627
11628     env = {
11629       "TARGET_GROUPS": " ".join(self.target_uuids),
11630       }
11631
11632     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11633
11634     return env
11635
11636   def BuildHooksNodes(self):
11637     """Build hooks nodes.
11638
11639     """
11640     mn = self.cfg.GetMasterNode()
11641     return ([mn], [mn])
11642
11643   def Exec(self, feedback_fn):
11644     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11645
11646     assert instances == [self.op.instance_name], "Instance not locked"
11647
11648     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11649                      instances=instances, target_groups=list(self.target_uuids))
11650
11651     ial.Run(self.op.iallocator)
11652
11653     if not ial.success:
11654       raise errors.OpPrereqError("Can't compute solution for changing group of"
11655                                  " instance '%s' using iallocator '%s': %s" %
11656                                  (self.op.instance_name, self.op.iallocator,
11657                                   ial.info),
11658                                  errors.ECODE_NORES)
11659
11660     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11661
11662     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11663                  " instance '%s'", len(jobs), self.op.instance_name)
11664
11665     return ResultWithJobs(jobs)
11666
11667
11668 class LUBackupQuery(NoHooksLU):
11669   """Query the exports list
11670
11671   """
11672   REQ_BGL = False
11673
11674   def ExpandNames(self):
11675     self.needed_locks = {}
11676     self.share_locks[locking.LEVEL_NODE] = 1
11677     if not self.op.nodes:
11678       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11679     else:
11680       self.needed_locks[locking.LEVEL_NODE] = \
11681         _GetWantedNodes(self, self.op.nodes)
11682
11683   def Exec(self, feedback_fn):
11684     """Compute the list of all the exported system images.
11685
11686     @rtype: dict
11687     @return: a dictionary with the structure node->(export-list)
11688         where export-list is a list of the instances exported on
11689         that node.
11690
11691     """
11692     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11693     rpcresult = self.rpc.call_export_list(self.nodes)
11694     result = {}
11695     for node in rpcresult:
11696       if rpcresult[node].fail_msg:
11697         result[node] = False
11698       else:
11699         result[node] = rpcresult[node].payload
11700
11701     return result
11702
11703
11704 class LUBackupPrepare(NoHooksLU):
11705   """Prepares an instance for an export and returns useful information.
11706
11707   """
11708   REQ_BGL = False
11709
11710   def ExpandNames(self):
11711     self._ExpandAndLockInstance()
11712
11713   def CheckPrereq(self):
11714     """Check prerequisites.
11715
11716     """
11717     instance_name = self.op.instance_name
11718
11719     self.instance = self.cfg.GetInstanceInfo(instance_name)
11720     assert self.instance is not None, \
11721           "Cannot retrieve locked instance %s" % self.op.instance_name
11722     _CheckNodeOnline(self, self.instance.primary_node)
11723
11724     self._cds = _GetClusterDomainSecret()
11725
11726   def Exec(self, feedback_fn):
11727     """Prepares an instance for an export.
11728
11729     """
11730     instance = self.instance
11731
11732     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11733       salt = utils.GenerateSecret(8)
11734
11735       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11736       result = self.rpc.call_x509_cert_create(instance.primary_node,
11737                                               constants.RIE_CERT_VALIDITY)
11738       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11739
11740       (name, cert_pem) = result.payload
11741
11742       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11743                                              cert_pem)
11744
11745       return {
11746         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11747         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11748                           salt),
11749         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11750         }
11751
11752     return None
11753
11754
11755 class LUBackupExport(LogicalUnit):
11756   """Export an instance to an image in the cluster.
11757
11758   """
11759   HPATH = "instance-export"
11760   HTYPE = constants.HTYPE_INSTANCE
11761   REQ_BGL = False
11762
11763   def CheckArguments(self):
11764     """Check the arguments.
11765
11766     """
11767     self.x509_key_name = self.op.x509_key_name
11768     self.dest_x509_ca_pem = self.op.destination_x509_ca
11769
11770     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11771       if not self.x509_key_name:
11772         raise errors.OpPrereqError("Missing X509 key name for encryption",
11773                                    errors.ECODE_INVAL)
11774
11775       if not self.dest_x509_ca_pem:
11776         raise errors.OpPrereqError("Missing destination X509 CA",
11777                                    errors.ECODE_INVAL)
11778
11779   def ExpandNames(self):
11780     self._ExpandAndLockInstance()
11781
11782     # Lock all nodes for local exports
11783     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11784       # FIXME: lock only instance primary and destination node
11785       #
11786       # Sad but true, for now we have do lock all nodes, as we don't know where
11787       # the previous export might be, and in this LU we search for it and
11788       # remove it from its current node. In the future we could fix this by:
11789       #  - making a tasklet to search (share-lock all), then create the
11790       #    new one, then one to remove, after
11791       #  - removing the removal operation altogether
11792       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11793
11794   def DeclareLocks(self, level):
11795     """Last minute lock declaration."""
11796     # All nodes are locked anyway, so nothing to do here.
11797
11798   def BuildHooksEnv(self):
11799     """Build hooks env.
11800
11801     This will run on the master, primary node and target node.
11802
11803     """
11804     env = {
11805       "EXPORT_MODE": self.op.mode,
11806       "EXPORT_NODE": self.op.target_node,
11807       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11808       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11809       # TODO: Generic function for boolean env variables
11810       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11811       }
11812
11813     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11814
11815     return env
11816
11817   def BuildHooksNodes(self):
11818     """Build hooks nodes.
11819
11820     """
11821     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11822
11823     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11824       nl.append(self.op.target_node)
11825
11826     return (nl, nl)
11827
11828   def CheckPrereq(self):
11829     """Check prerequisites.
11830
11831     This checks that the instance and node names are valid.
11832
11833     """
11834     instance_name = self.op.instance_name
11835
11836     self.instance = self.cfg.GetInstanceInfo(instance_name)
11837     assert self.instance is not None, \
11838           "Cannot retrieve locked instance %s" % self.op.instance_name
11839     _CheckNodeOnline(self, self.instance.primary_node)
11840
11841     if (self.op.remove_instance and self.instance.admin_up and
11842         not self.op.shutdown):
11843       raise errors.OpPrereqError("Can not remove instance without shutting it"
11844                                  " down before")
11845
11846     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11847       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11848       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11849       assert self.dst_node is not None
11850
11851       _CheckNodeOnline(self, self.dst_node.name)
11852       _CheckNodeNotDrained(self, self.dst_node.name)
11853
11854       self._cds = None
11855       self.dest_disk_info = None
11856       self.dest_x509_ca = None
11857
11858     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11859       self.dst_node = None
11860
11861       if len(self.op.target_node) != len(self.instance.disks):
11862         raise errors.OpPrereqError(("Received destination information for %s"
11863                                     " disks, but instance %s has %s disks") %
11864                                    (len(self.op.target_node), instance_name,
11865                                     len(self.instance.disks)),
11866                                    errors.ECODE_INVAL)
11867
11868       cds = _GetClusterDomainSecret()
11869
11870       # Check X509 key name
11871       try:
11872         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11873       except (TypeError, ValueError), err:
11874         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11875
11876       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11877         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11878                                    errors.ECODE_INVAL)
11879
11880       # Load and verify CA
11881       try:
11882         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11883       except OpenSSL.crypto.Error, err:
11884         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11885                                    (err, ), errors.ECODE_INVAL)
11886
11887       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11888       if errcode is not None:
11889         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11890                                    (msg, ), errors.ECODE_INVAL)
11891
11892       self.dest_x509_ca = cert
11893
11894       # Verify target information
11895       disk_info = []
11896       for idx, disk_data in enumerate(self.op.target_node):
11897         try:
11898           (host, port, magic) = \
11899             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11900         except errors.GenericError, err:
11901           raise errors.OpPrereqError("Target info for disk %s: %s" %
11902                                      (idx, err), errors.ECODE_INVAL)
11903
11904         disk_info.append((host, port, magic))
11905
11906       assert len(disk_info) == len(self.op.target_node)
11907       self.dest_disk_info = disk_info
11908
11909     else:
11910       raise errors.ProgrammerError("Unhandled export mode %r" %
11911                                    self.op.mode)
11912
11913     # instance disk type verification
11914     # TODO: Implement export support for file-based disks
11915     for disk in self.instance.disks:
11916       if disk.dev_type == constants.LD_FILE:
11917         raise errors.OpPrereqError("Export not supported for instances with"
11918                                    " file-based disks", errors.ECODE_INVAL)
11919
11920   def _CleanupExports(self, feedback_fn):
11921     """Removes exports of current instance from all other nodes.
11922
11923     If an instance in a cluster with nodes A..D was exported to node C, its
11924     exports will be removed from the nodes A, B and D.
11925
11926     """
11927     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11928
11929     nodelist = self.cfg.GetNodeList()
11930     nodelist.remove(self.dst_node.name)
11931
11932     # on one-node clusters nodelist will be empty after the removal
11933     # if we proceed the backup would be removed because OpBackupQuery
11934     # substitutes an empty list with the full cluster node list.
11935     iname = self.instance.name
11936     if nodelist:
11937       feedback_fn("Removing old exports for instance %s" % iname)
11938       exportlist = self.rpc.call_export_list(nodelist)
11939       for node in exportlist:
11940         if exportlist[node].fail_msg:
11941           continue
11942         if iname in exportlist[node].payload:
11943           msg = self.rpc.call_export_remove(node, iname).fail_msg
11944           if msg:
11945             self.LogWarning("Could not remove older export for instance %s"
11946                             " on node %s: %s", iname, node, msg)
11947
11948   def Exec(self, feedback_fn):
11949     """Export an instance to an image in the cluster.
11950
11951     """
11952     assert self.op.mode in constants.EXPORT_MODES
11953
11954     instance = self.instance
11955     src_node = instance.primary_node
11956
11957     if self.op.shutdown:
11958       # shutdown the instance, but not the disks
11959       feedback_fn("Shutting down instance %s" % instance.name)
11960       result = self.rpc.call_instance_shutdown(src_node, instance,
11961                                                self.op.shutdown_timeout)
11962       # TODO: Maybe ignore failures if ignore_remove_failures is set
11963       result.Raise("Could not shutdown instance %s on"
11964                    " node %s" % (instance.name, src_node))
11965
11966     # set the disks ID correctly since call_instance_start needs the
11967     # correct drbd minor to create the symlinks
11968     for disk in instance.disks:
11969       self.cfg.SetDiskID(disk, src_node)
11970
11971     activate_disks = (not instance.admin_up)
11972
11973     if activate_disks:
11974       # Activate the instance disks if we'exporting a stopped instance
11975       feedback_fn("Activating disks for %s" % instance.name)
11976       _StartInstanceDisks(self, instance, None)
11977
11978     try:
11979       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11980                                                      instance)
11981
11982       helper.CreateSnapshots()
11983       try:
11984         if (self.op.shutdown and instance.admin_up and
11985             not self.op.remove_instance):
11986           assert not activate_disks
11987           feedback_fn("Starting instance %s" % instance.name)
11988           result = self.rpc.call_instance_start(src_node,
11989                                                 (instance, None, None), False)
11990           msg = result.fail_msg
11991           if msg:
11992             feedback_fn("Failed to start instance: %s" % msg)
11993             _ShutdownInstanceDisks(self, instance)
11994             raise errors.OpExecError("Could not start instance: %s" % msg)
11995
11996         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11997           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11998         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11999           connect_timeout = constants.RIE_CONNECT_TIMEOUT
12000           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12001
12002           (key_name, _, _) = self.x509_key_name
12003
12004           dest_ca_pem = \
12005             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12006                                             self.dest_x509_ca)
12007
12008           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12009                                                      key_name, dest_ca_pem,
12010                                                      timeouts)
12011       finally:
12012         helper.Cleanup()
12013
12014       # Check for backwards compatibility
12015       assert len(dresults) == len(instance.disks)
12016       assert compat.all(isinstance(i, bool) for i in dresults), \
12017              "Not all results are boolean: %r" % dresults
12018
12019     finally:
12020       if activate_disks:
12021         feedback_fn("Deactivating disks for %s" % instance.name)
12022         _ShutdownInstanceDisks(self, instance)
12023
12024     if not (compat.all(dresults) and fin_resu):
12025       failures = []
12026       if not fin_resu:
12027         failures.append("export finalization")
12028       if not compat.all(dresults):
12029         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12030                                if not dsk)
12031         failures.append("disk export: disk(s) %s" % fdsk)
12032
12033       raise errors.OpExecError("Export failed, errors in %s" %
12034                                utils.CommaJoin(failures))
12035
12036     # At this point, the export was successful, we can cleanup/finish
12037
12038     # Remove instance if requested
12039     if self.op.remove_instance:
12040       feedback_fn("Removing instance %s" % instance.name)
12041       _RemoveInstance(self, feedback_fn, instance,
12042                       self.op.ignore_remove_failures)
12043
12044     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12045       self._CleanupExports(feedback_fn)
12046
12047     return fin_resu, dresults
12048
12049
12050 class LUBackupRemove(NoHooksLU):
12051   """Remove exports related to the named instance.
12052
12053   """
12054   REQ_BGL = False
12055
12056   def ExpandNames(self):
12057     self.needed_locks = {}
12058     # We need all nodes to be locked in order for RemoveExport to work, but we
12059     # don't need to lock the instance itself, as nothing will happen to it (and
12060     # we can remove exports also for a removed instance)
12061     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12062
12063   def Exec(self, feedback_fn):
12064     """Remove any export.
12065
12066     """
12067     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12068     # If the instance was not found we'll try with the name that was passed in.
12069     # This will only work if it was an FQDN, though.
12070     fqdn_warn = False
12071     if not instance_name:
12072       fqdn_warn = True
12073       instance_name = self.op.instance_name
12074
12075     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12076     exportlist = self.rpc.call_export_list(locked_nodes)
12077     found = False
12078     for node in exportlist:
12079       msg = exportlist[node].fail_msg
12080       if msg:
12081         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12082         continue
12083       if instance_name in exportlist[node].payload:
12084         found = True
12085         result = self.rpc.call_export_remove(node, instance_name)
12086         msg = result.fail_msg
12087         if msg:
12088           logging.error("Could not remove export for instance %s"
12089                         " on node %s: %s", instance_name, node, msg)
12090
12091     if fqdn_warn and not found:
12092       feedback_fn("Export not found. If trying to remove an export belonging"
12093                   " to a deleted instance please use its Fully Qualified"
12094                   " Domain Name.")
12095
12096
12097 class LUGroupAdd(LogicalUnit):
12098   """Logical unit for creating node groups.
12099
12100   """
12101   HPATH = "group-add"
12102   HTYPE = constants.HTYPE_GROUP
12103   REQ_BGL = False
12104
12105   def ExpandNames(self):
12106     # We need the new group's UUID here so that we can create and acquire the
12107     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12108     # that it should not check whether the UUID exists in the configuration.
12109     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12110     self.needed_locks = {}
12111     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12112
12113   def CheckPrereq(self):
12114     """Check prerequisites.
12115
12116     This checks that the given group name is not an existing node group
12117     already.
12118
12119     """
12120     try:
12121       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12122     except errors.OpPrereqError:
12123       pass
12124     else:
12125       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12126                                  " node group (UUID: %s)" %
12127                                  (self.op.group_name, existing_uuid),
12128                                  errors.ECODE_EXISTS)
12129
12130     if self.op.ndparams:
12131       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12132
12133   def BuildHooksEnv(self):
12134     """Build hooks env.
12135
12136     """
12137     return {
12138       "GROUP_NAME": self.op.group_name,
12139       }
12140
12141   def BuildHooksNodes(self):
12142     """Build hooks nodes.
12143
12144     """
12145     mn = self.cfg.GetMasterNode()
12146     return ([mn], [mn])
12147
12148   def Exec(self, feedback_fn):
12149     """Add the node group to the cluster.
12150
12151     """
12152     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12153                                   uuid=self.group_uuid,
12154                                   alloc_policy=self.op.alloc_policy,
12155                                   ndparams=self.op.ndparams)
12156
12157     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12158     del self.remove_locks[locking.LEVEL_NODEGROUP]
12159
12160
12161 class LUGroupAssignNodes(NoHooksLU):
12162   """Logical unit for assigning nodes to groups.
12163
12164   """
12165   REQ_BGL = False
12166
12167   def ExpandNames(self):
12168     # These raise errors.OpPrereqError on their own:
12169     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12170     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12171
12172     # We want to lock all the affected nodes and groups. We have readily
12173     # available the list of nodes, and the *destination* group. To gather the
12174     # list of "source" groups, we need to fetch node information later on.
12175     self.needed_locks = {
12176       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12177       locking.LEVEL_NODE: self.op.nodes,
12178       }
12179
12180   def DeclareLocks(self, level):
12181     if level == locking.LEVEL_NODEGROUP:
12182       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12183
12184       # Try to get all affected nodes' groups without having the group or node
12185       # lock yet. Needs verification later in the code flow.
12186       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12187
12188       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12189
12190   def CheckPrereq(self):
12191     """Check prerequisites.
12192
12193     """
12194     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12195     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12196             frozenset(self.op.nodes))
12197
12198     expected_locks = (set([self.group_uuid]) |
12199                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12200     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12201     if actual_locks != expected_locks:
12202       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12203                                " current groups are '%s', used to be '%s'" %
12204                                (utils.CommaJoin(expected_locks),
12205                                 utils.CommaJoin(actual_locks)))
12206
12207     self.node_data = self.cfg.GetAllNodesInfo()
12208     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12209     instance_data = self.cfg.GetAllInstancesInfo()
12210
12211     if self.group is None:
12212       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12213                                (self.op.group_name, self.group_uuid))
12214
12215     (new_splits, previous_splits) = \
12216       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12217                                              for node in self.op.nodes],
12218                                             self.node_data, instance_data)
12219
12220     if new_splits:
12221       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12222
12223       if not self.op.force:
12224         raise errors.OpExecError("The following instances get split by this"
12225                                  " change and --force was not given: %s" %
12226                                  fmt_new_splits)
12227       else:
12228         self.LogWarning("This operation will split the following instances: %s",
12229                         fmt_new_splits)
12230
12231         if previous_splits:
12232           self.LogWarning("In addition, these already-split instances continue"
12233                           " to be split across groups: %s",
12234                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12235
12236   def Exec(self, feedback_fn):
12237     """Assign nodes to a new group.
12238
12239     """
12240     for node in self.op.nodes:
12241       self.node_data[node].group = self.group_uuid
12242
12243     # FIXME: Depends on side-effects of modifying the result of
12244     # C{cfg.GetAllNodesInfo}
12245
12246     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12247
12248   @staticmethod
12249   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12250     """Check for split instances after a node assignment.
12251
12252     This method considers a series of node assignments as an atomic operation,
12253     and returns information about split instances after applying the set of
12254     changes.
12255
12256     In particular, it returns information about newly split instances, and
12257     instances that were already split, and remain so after the change.
12258
12259     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12260     considered.
12261
12262     @type changes: list of (node_name, new_group_uuid) pairs.
12263     @param changes: list of node assignments to consider.
12264     @param node_data: a dict with data for all nodes
12265     @param instance_data: a dict with all instances to consider
12266     @rtype: a two-tuple
12267     @return: a list of instances that were previously okay and result split as a
12268       consequence of this change, and a list of instances that were previously
12269       split and this change does not fix.
12270
12271     """
12272     changed_nodes = dict((node, group) for node, group in changes
12273                          if node_data[node].group != group)
12274
12275     all_split_instances = set()
12276     previously_split_instances = set()
12277
12278     def InstanceNodes(instance):
12279       return [instance.primary_node] + list(instance.secondary_nodes)
12280
12281     for inst in instance_data.values():
12282       if inst.disk_template not in constants.DTS_INT_MIRROR:
12283         continue
12284
12285       instance_nodes = InstanceNodes(inst)
12286
12287       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12288         previously_split_instances.add(inst.name)
12289
12290       if len(set(changed_nodes.get(node, node_data[node].group)
12291                  for node in instance_nodes)) > 1:
12292         all_split_instances.add(inst.name)
12293
12294     return (list(all_split_instances - previously_split_instances),
12295             list(previously_split_instances & all_split_instances))
12296
12297
12298 class _GroupQuery(_QueryBase):
12299   FIELDS = query.GROUP_FIELDS
12300
12301   def ExpandNames(self, lu):
12302     lu.needed_locks = {}
12303
12304     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12305     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12306
12307     if not self.names:
12308       self.wanted = [name_to_uuid[name]
12309                      for name in utils.NiceSort(name_to_uuid.keys())]
12310     else:
12311       # Accept names to be either names or UUIDs.
12312       missing = []
12313       self.wanted = []
12314       all_uuid = frozenset(self._all_groups.keys())
12315
12316       for name in self.names:
12317         if name in all_uuid:
12318           self.wanted.append(name)
12319         elif name in name_to_uuid:
12320           self.wanted.append(name_to_uuid[name])
12321         else:
12322           missing.append(name)
12323
12324       if missing:
12325         raise errors.OpPrereqError("Some groups do not exist: %s" %
12326                                    utils.CommaJoin(missing),
12327                                    errors.ECODE_NOENT)
12328
12329   def DeclareLocks(self, lu, level):
12330     pass
12331
12332   def _GetQueryData(self, lu):
12333     """Computes the list of node groups and their attributes.
12334
12335     """
12336     do_nodes = query.GQ_NODE in self.requested_data
12337     do_instances = query.GQ_INST in self.requested_data
12338
12339     group_to_nodes = None
12340     group_to_instances = None
12341
12342     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12343     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12344     # latter GetAllInstancesInfo() is not enough, for we have to go through
12345     # instance->node. Hence, we will need to process nodes even if we only need
12346     # instance information.
12347     if do_nodes or do_instances:
12348       all_nodes = lu.cfg.GetAllNodesInfo()
12349       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12350       node_to_group = {}
12351
12352       for node in all_nodes.values():
12353         if node.group in group_to_nodes:
12354           group_to_nodes[node.group].append(node.name)
12355           node_to_group[node.name] = node.group
12356
12357       if do_instances:
12358         all_instances = lu.cfg.GetAllInstancesInfo()
12359         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12360
12361         for instance in all_instances.values():
12362           node = instance.primary_node
12363           if node in node_to_group:
12364             group_to_instances[node_to_group[node]].append(instance.name)
12365
12366         if not do_nodes:
12367           # Do not pass on node information if it was not requested.
12368           group_to_nodes = None
12369
12370     return query.GroupQueryData([self._all_groups[uuid]
12371                                  for uuid in self.wanted],
12372                                 group_to_nodes, group_to_instances)
12373
12374
12375 class LUGroupQuery(NoHooksLU):
12376   """Logical unit for querying node groups.
12377
12378   """
12379   REQ_BGL = False
12380
12381   def CheckArguments(self):
12382     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12383                           self.op.output_fields, False)
12384
12385   def ExpandNames(self):
12386     self.gq.ExpandNames(self)
12387
12388   def DeclareLocks(self, level):
12389     self.gq.DeclareLocks(self, level)
12390
12391   def Exec(self, feedback_fn):
12392     return self.gq.OldStyleQuery(self)
12393
12394
12395 class LUGroupSetParams(LogicalUnit):
12396   """Modifies the parameters of a node group.
12397
12398   """
12399   HPATH = "group-modify"
12400   HTYPE = constants.HTYPE_GROUP
12401   REQ_BGL = False
12402
12403   def CheckArguments(self):
12404     all_changes = [
12405       self.op.ndparams,
12406       self.op.alloc_policy,
12407       ]
12408
12409     if all_changes.count(None) == len(all_changes):
12410       raise errors.OpPrereqError("Please pass at least one modification",
12411                                  errors.ECODE_INVAL)
12412
12413   def ExpandNames(self):
12414     # This raises errors.OpPrereqError on its own:
12415     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12416
12417     self.needed_locks = {
12418       locking.LEVEL_NODEGROUP: [self.group_uuid],
12419       }
12420
12421   def CheckPrereq(self):
12422     """Check prerequisites.
12423
12424     """
12425     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12426
12427     if self.group is None:
12428       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12429                                (self.op.group_name, self.group_uuid))
12430
12431     if self.op.ndparams:
12432       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12433       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12434       self.new_ndparams = new_ndparams
12435
12436   def BuildHooksEnv(self):
12437     """Build hooks env.
12438
12439     """
12440     return {
12441       "GROUP_NAME": self.op.group_name,
12442       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12443       }
12444
12445   def BuildHooksNodes(self):
12446     """Build hooks nodes.
12447
12448     """
12449     mn = self.cfg.GetMasterNode()
12450     return ([mn], [mn])
12451
12452   def Exec(self, feedback_fn):
12453     """Modifies the node group.
12454
12455     """
12456     result = []
12457
12458     if self.op.ndparams:
12459       self.group.ndparams = self.new_ndparams
12460       result.append(("ndparams", str(self.group.ndparams)))
12461
12462     if self.op.alloc_policy:
12463       self.group.alloc_policy = self.op.alloc_policy
12464
12465     self.cfg.Update(self.group, feedback_fn)
12466     return result
12467
12468
12469 class LUGroupRemove(LogicalUnit):
12470   HPATH = "group-remove"
12471   HTYPE = constants.HTYPE_GROUP
12472   REQ_BGL = False
12473
12474   def ExpandNames(self):
12475     # This will raises errors.OpPrereqError on its own:
12476     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12477     self.needed_locks = {
12478       locking.LEVEL_NODEGROUP: [self.group_uuid],
12479       }
12480
12481   def CheckPrereq(self):
12482     """Check prerequisites.
12483
12484     This checks that the given group name exists as a node group, that is
12485     empty (i.e., contains no nodes), and that is not the last group of the
12486     cluster.
12487
12488     """
12489     # Verify that the group is empty.
12490     group_nodes = [node.name
12491                    for node in self.cfg.GetAllNodesInfo().values()
12492                    if node.group == self.group_uuid]
12493
12494     if group_nodes:
12495       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12496                                  " nodes: %s" %
12497                                  (self.op.group_name,
12498                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12499                                  errors.ECODE_STATE)
12500
12501     # Verify the cluster would not be left group-less.
12502     if len(self.cfg.GetNodeGroupList()) == 1:
12503       raise errors.OpPrereqError("Group '%s' is the only group,"
12504                                  " cannot be removed" %
12505                                  self.op.group_name,
12506                                  errors.ECODE_STATE)
12507
12508   def BuildHooksEnv(self):
12509     """Build hooks env.
12510
12511     """
12512     return {
12513       "GROUP_NAME": self.op.group_name,
12514       }
12515
12516   def BuildHooksNodes(self):
12517     """Build hooks nodes.
12518
12519     """
12520     mn = self.cfg.GetMasterNode()
12521     return ([mn], [mn])
12522
12523   def Exec(self, feedback_fn):
12524     """Remove the node group.
12525
12526     """
12527     try:
12528       self.cfg.RemoveNodeGroup(self.group_uuid)
12529     except errors.ConfigurationError:
12530       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12531                                (self.op.group_name, self.group_uuid))
12532
12533     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12534
12535
12536 class LUGroupRename(LogicalUnit):
12537   HPATH = "group-rename"
12538   HTYPE = constants.HTYPE_GROUP
12539   REQ_BGL = False
12540
12541   def ExpandNames(self):
12542     # This raises errors.OpPrereqError on its own:
12543     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12544
12545     self.needed_locks = {
12546       locking.LEVEL_NODEGROUP: [self.group_uuid],
12547       }
12548
12549   def CheckPrereq(self):
12550     """Check prerequisites.
12551
12552     Ensures requested new name is not yet used.
12553
12554     """
12555     try:
12556       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12557     except errors.OpPrereqError:
12558       pass
12559     else:
12560       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12561                                  " node group (UUID: %s)" %
12562                                  (self.op.new_name, new_name_uuid),
12563                                  errors.ECODE_EXISTS)
12564
12565   def BuildHooksEnv(self):
12566     """Build hooks env.
12567
12568     """
12569     return {
12570       "OLD_NAME": self.op.group_name,
12571       "NEW_NAME": self.op.new_name,
12572       }
12573
12574   def BuildHooksNodes(self):
12575     """Build hooks nodes.
12576
12577     """
12578     mn = self.cfg.GetMasterNode()
12579
12580     all_nodes = self.cfg.GetAllNodesInfo()
12581     all_nodes.pop(mn, None)
12582
12583     run_nodes = [mn]
12584     run_nodes.extend(node.name for node in all_nodes.values()
12585                      if node.group == self.group_uuid)
12586
12587     return (run_nodes, run_nodes)
12588
12589   def Exec(self, feedback_fn):
12590     """Rename the node group.
12591
12592     """
12593     group = self.cfg.GetNodeGroup(self.group_uuid)
12594
12595     if group is None:
12596       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12597                                (self.op.group_name, self.group_uuid))
12598
12599     group.name = self.op.new_name
12600     self.cfg.Update(group, feedback_fn)
12601
12602     return self.op.new_name
12603
12604
12605 class LUGroupEvacuate(LogicalUnit):
12606   HPATH = "group-evacuate"
12607   HTYPE = constants.HTYPE_GROUP
12608   REQ_BGL = False
12609
12610   def ExpandNames(self):
12611     # This raises errors.OpPrereqError on its own:
12612     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12613
12614     if self.op.target_groups:
12615       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12616                                   self.op.target_groups)
12617     else:
12618       self.req_target_uuids = []
12619
12620     if self.group_uuid in self.req_target_uuids:
12621       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12622                                  " as a target group (targets are %s)" %
12623                                  (self.group_uuid,
12624                                   utils.CommaJoin(self.req_target_uuids)),
12625                                  errors.ECODE_INVAL)
12626
12627     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12628
12629     self.share_locks = _ShareAll()
12630     self.needed_locks = {
12631       locking.LEVEL_INSTANCE: [],
12632       locking.LEVEL_NODEGROUP: [],
12633       locking.LEVEL_NODE: [],
12634       }
12635
12636   def DeclareLocks(self, level):
12637     if level == locking.LEVEL_INSTANCE:
12638       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12639
12640       # Lock instances optimistically, needs verification once node and group
12641       # locks have been acquired
12642       self.needed_locks[locking.LEVEL_INSTANCE] = \
12643         self.cfg.GetNodeGroupInstances(self.group_uuid)
12644
12645     elif level == locking.LEVEL_NODEGROUP:
12646       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12647
12648       if self.req_target_uuids:
12649         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12650
12651         # Lock all groups used by instances optimistically; this requires going
12652         # via the node before it's locked, requiring verification later on
12653         lock_groups.update(group_uuid
12654                            for instance_name in
12655                              self.owned_locks(locking.LEVEL_INSTANCE)
12656                            for group_uuid in
12657                              self.cfg.GetInstanceNodeGroups(instance_name))
12658       else:
12659         # No target groups, need to lock all of them
12660         lock_groups = locking.ALL_SET
12661
12662       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12663
12664     elif level == locking.LEVEL_NODE:
12665       # This will only lock the nodes in the group to be evacuated which
12666       # contain actual instances
12667       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12668       self._LockInstancesNodes()
12669
12670       # Lock all nodes in group to be evacuated and target groups
12671       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12672       assert self.group_uuid in owned_groups
12673       member_nodes = [node_name
12674                       for group in owned_groups
12675                       for node_name in self.cfg.GetNodeGroup(group).members]
12676       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12677
12678   def CheckPrereq(self):
12679     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12680     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12681     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12682
12683     assert owned_groups.issuperset(self.req_target_uuids)
12684     assert self.group_uuid in owned_groups
12685
12686     # Check if locked instances are still correct
12687     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12688
12689     # Get instance information
12690     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12691
12692     # Check if node groups for locked instances are still correct
12693     for instance_name in owned_instances:
12694       inst = self.instances[instance_name]
12695       assert owned_nodes.issuperset(inst.all_nodes), \
12696         "Instance %s's nodes changed while we kept the lock" % instance_name
12697
12698       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12699                                              owned_groups)
12700
12701       assert self.group_uuid in inst_groups, \
12702         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12703
12704     if self.req_target_uuids:
12705       # User requested specific target groups
12706       self.target_uuids = self.req_target_uuids
12707     else:
12708       # All groups except the one to be evacuated are potential targets
12709       self.target_uuids = [group_uuid for group_uuid in owned_groups
12710                            if group_uuid != self.group_uuid]
12711
12712       if not self.target_uuids:
12713         raise errors.OpPrereqError("There are no possible target groups",
12714                                    errors.ECODE_INVAL)
12715
12716   def BuildHooksEnv(self):
12717     """Build hooks env.
12718
12719     """
12720     return {
12721       "GROUP_NAME": self.op.group_name,
12722       "TARGET_GROUPS": " ".join(self.target_uuids),
12723       }
12724
12725   def BuildHooksNodes(self):
12726     """Build hooks nodes.
12727
12728     """
12729     mn = self.cfg.GetMasterNode()
12730
12731     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12732
12733     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12734
12735     return (run_nodes, run_nodes)
12736
12737   def Exec(self, feedback_fn):
12738     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12739
12740     assert self.group_uuid not in self.target_uuids
12741
12742     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12743                      instances=instances, target_groups=self.target_uuids)
12744
12745     ial.Run(self.op.iallocator)
12746
12747     if not ial.success:
12748       raise errors.OpPrereqError("Can't compute group evacuation using"
12749                                  " iallocator '%s': %s" %
12750                                  (self.op.iallocator, ial.info),
12751                                  errors.ECODE_NORES)
12752
12753     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12754
12755     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12756                  len(jobs), self.op.group_name)
12757
12758     return ResultWithJobs(jobs)
12759
12760
12761 class TagsLU(NoHooksLU): # pylint: disable=W0223
12762   """Generic tags LU.
12763
12764   This is an abstract class which is the parent of all the other tags LUs.
12765
12766   """
12767   def ExpandNames(self):
12768     self.group_uuid = None
12769     self.needed_locks = {}
12770     if self.op.kind == constants.TAG_NODE:
12771       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12772       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12773     elif self.op.kind == constants.TAG_INSTANCE:
12774       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12775       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12776     elif self.op.kind == constants.TAG_NODEGROUP:
12777       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12778
12779     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12780     # not possible to acquire the BGL based on opcode parameters)
12781
12782   def CheckPrereq(self):
12783     """Check prerequisites.
12784
12785     """
12786     if self.op.kind == constants.TAG_CLUSTER:
12787       self.target = self.cfg.GetClusterInfo()
12788     elif self.op.kind == constants.TAG_NODE:
12789       self.target = self.cfg.GetNodeInfo(self.op.name)
12790     elif self.op.kind == constants.TAG_INSTANCE:
12791       self.target = self.cfg.GetInstanceInfo(self.op.name)
12792     elif self.op.kind == constants.TAG_NODEGROUP:
12793       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12794     else:
12795       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12796                                  str(self.op.kind), errors.ECODE_INVAL)
12797
12798
12799 class LUTagsGet(TagsLU):
12800   """Returns the tags of a given object.
12801
12802   """
12803   REQ_BGL = False
12804
12805   def ExpandNames(self):
12806     TagsLU.ExpandNames(self)
12807
12808     # Share locks as this is only a read operation
12809     self.share_locks = _ShareAll()
12810
12811   def Exec(self, feedback_fn):
12812     """Returns the tag list.
12813
12814     """
12815     return list(self.target.GetTags())
12816
12817
12818 class LUTagsSearch(NoHooksLU):
12819   """Searches the tags for a given pattern.
12820
12821   """
12822   REQ_BGL = False
12823
12824   def ExpandNames(self):
12825     self.needed_locks = {}
12826
12827   def CheckPrereq(self):
12828     """Check prerequisites.
12829
12830     This checks the pattern passed for validity by compiling it.
12831
12832     """
12833     try:
12834       self.re = re.compile(self.op.pattern)
12835     except re.error, err:
12836       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12837                                  (self.op.pattern, err), errors.ECODE_INVAL)
12838
12839   def Exec(self, feedback_fn):
12840     """Returns the tag list.
12841
12842     """
12843     cfg = self.cfg
12844     tgts = [("/cluster", cfg.GetClusterInfo())]
12845     ilist = cfg.GetAllInstancesInfo().values()
12846     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12847     nlist = cfg.GetAllNodesInfo().values()
12848     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12849     tgts.extend(("/nodegroup/%s" % n.name, n)
12850                 for n in cfg.GetAllNodeGroupsInfo().values())
12851     results = []
12852     for path, target in tgts:
12853       for tag in target.GetTags():
12854         if self.re.search(tag):
12855           results.append((path, tag))
12856     return results
12857
12858
12859 class LUTagsSet(TagsLU):
12860   """Sets a tag on a given object.
12861
12862   """
12863   REQ_BGL = False
12864
12865   def CheckPrereq(self):
12866     """Check prerequisites.
12867
12868     This checks the type and length of the tag name and value.
12869
12870     """
12871     TagsLU.CheckPrereq(self)
12872     for tag in self.op.tags:
12873       objects.TaggableObject.ValidateTag(tag)
12874
12875   def Exec(self, feedback_fn):
12876     """Sets the tag.
12877
12878     """
12879     try:
12880       for tag in self.op.tags:
12881         self.target.AddTag(tag)
12882     except errors.TagError, err:
12883       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12884     self.cfg.Update(self.target, feedback_fn)
12885
12886
12887 class LUTagsDel(TagsLU):
12888   """Delete a list of tags from a given object.
12889
12890   """
12891   REQ_BGL = False
12892
12893   def CheckPrereq(self):
12894     """Check prerequisites.
12895
12896     This checks that we have the given tag.
12897
12898     """
12899     TagsLU.CheckPrereq(self)
12900     for tag in self.op.tags:
12901       objects.TaggableObject.ValidateTag(tag)
12902     del_tags = frozenset(self.op.tags)
12903     cur_tags = self.target.GetTags()
12904
12905     diff_tags = del_tags - cur_tags
12906     if diff_tags:
12907       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12908       raise errors.OpPrereqError("Tag(s) %s not found" %
12909                                  (utils.CommaJoin(diff_names), ),
12910                                  errors.ECODE_NOENT)
12911
12912   def Exec(self, feedback_fn):
12913     """Remove the tag from the object.
12914
12915     """
12916     for tag in self.op.tags:
12917       self.target.RemoveTag(tag)
12918     self.cfg.Update(self.target, feedback_fn)
12919
12920
12921 class LUTestDelay(NoHooksLU):
12922   """Sleep for a specified amount of time.
12923
12924   This LU sleeps on the master and/or nodes for a specified amount of
12925   time.
12926
12927   """
12928   REQ_BGL = False
12929
12930   def ExpandNames(self):
12931     """Expand names and set required locks.
12932
12933     This expands the node list, if any.
12934
12935     """
12936     self.needed_locks = {}
12937     if self.op.on_nodes:
12938       # _GetWantedNodes can be used here, but is not always appropriate to use
12939       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12940       # more information.
12941       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12942       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12943
12944   def _TestDelay(self):
12945     """Do the actual sleep.
12946
12947     """
12948     if self.op.on_master:
12949       if not utils.TestDelay(self.op.duration):
12950         raise errors.OpExecError("Error during master delay test")
12951     if self.op.on_nodes:
12952       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12953       for node, node_result in result.items():
12954         node_result.Raise("Failure during rpc call to node %s" % node)
12955
12956   def Exec(self, feedback_fn):
12957     """Execute the test delay opcode, with the wanted repetitions.
12958
12959     """
12960     if self.op.repeat == 0:
12961       self._TestDelay()
12962     else:
12963       top_value = self.op.repeat - 1
12964       for i in range(self.op.repeat):
12965         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12966         self._TestDelay()
12967
12968
12969 class LUTestJqueue(NoHooksLU):
12970   """Utility LU to test some aspects of the job queue.
12971
12972   """
12973   REQ_BGL = False
12974
12975   # Must be lower than default timeout for WaitForJobChange to see whether it
12976   # notices changed jobs
12977   _CLIENT_CONNECT_TIMEOUT = 20.0
12978   _CLIENT_CONFIRM_TIMEOUT = 60.0
12979
12980   @classmethod
12981   def _NotifyUsingSocket(cls, cb, errcls):
12982     """Opens a Unix socket and waits for another program to connect.
12983
12984     @type cb: callable
12985     @param cb: Callback to send socket name to client
12986     @type errcls: class
12987     @param errcls: Exception class to use for errors
12988
12989     """
12990     # Using a temporary directory as there's no easy way to create temporary
12991     # sockets without writing a custom loop around tempfile.mktemp and
12992     # socket.bind
12993     tmpdir = tempfile.mkdtemp()
12994     try:
12995       tmpsock = utils.PathJoin(tmpdir, "sock")
12996
12997       logging.debug("Creating temporary socket at %s", tmpsock)
12998       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12999       try:
13000         sock.bind(tmpsock)
13001         sock.listen(1)
13002
13003         # Send details to client
13004         cb(tmpsock)
13005
13006         # Wait for client to connect before continuing
13007         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13008         try:
13009           (conn, _) = sock.accept()
13010         except socket.error, err:
13011           raise errcls("Client didn't connect in time (%s)" % err)
13012       finally:
13013         sock.close()
13014     finally:
13015       # Remove as soon as client is connected
13016       shutil.rmtree(tmpdir)
13017
13018     # Wait for client to close
13019     try:
13020       try:
13021         # pylint: disable=E1101
13022         # Instance of '_socketobject' has no ... member
13023         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13024         conn.recv(1)
13025       except socket.error, err:
13026         raise errcls("Client failed to confirm notification (%s)" % err)
13027     finally:
13028       conn.close()
13029
13030   def _SendNotification(self, test, arg, sockname):
13031     """Sends a notification to the client.
13032
13033     @type test: string
13034     @param test: Test name
13035     @param arg: Test argument (depends on test)
13036     @type sockname: string
13037     @param sockname: Socket path
13038
13039     """
13040     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13041
13042   def _Notify(self, prereq, test, arg):
13043     """Notifies the client of a test.
13044
13045     @type prereq: bool
13046     @param prereq: Whether this is a prereq-phase test
13047     @type test: string
13048     @param test: Test name
13049     @param arg: Test argument (depends on test)
13050
13051     """
13052     if prereq:
13053       errcls = errors.OpPrereqError
13054     else:
13055       errcls = errors.OpExecError
13056
13057     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13058                                                   test, arg),
13059                                    errcls)
13060
13061   def CheckArguments(self):
13062     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13063     self.expandnames_calls = 0
13064
13065   def ExpandNames(self):
13066     checkargs_calls = getattr(self, "checkargs_calls", 0)
13067     if checkargs_calls < 1:
13068       raise errors.ProgrammerError("CheckArguments was not called")
13069
13070     self.expandnames_calls += 1
13071
13072     if self.op.notify_waitlock:
13073       self._Notify(True, constants.JQT_EXPANDNAMES, None)
13074
13075     self.LogInfo("Expanding names")
13076
13077     # Get lock on master node (just to get a lock, not for a particular reason)
13078     self.needed_locks = {
13079       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13080       }
13081
13082   def Exec(self, feedback_fn):
13083     if self.expandnames_calls < 1:
13084       raise errors.ProgrammerError("ExpandNames was not called")
13085
13086     if self.op.notify_exec:
13087       self._Notify(False, constants.JQT_EXEC, None)
13088
13089     self.LogInfo("Executing")
13090
13091     if self.op.log_messages:
13092       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13093       for idx, msg in enumerate(self.op.log_messages):
13094         self.LogInfo("Sending log message %s", idx + 1)
13095         feedback_fn(constants.JQT_MSGPREFIX + msg)
13096         # Report how many test messages have been sent
13097         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13098
13099     if self.op.fail:
13100       raise errors.OpExecError("Opcode failure was requested")
13101
13102     return True
13103
13104
13105 class IAllocator(object):
13106   """IAllocator framework.
13107
13108   An IAllocator instance has three sets of attributes:
13109     - cfg that is needed to query the cluster
13110     - input data (all members of the _KEYS class attribute are required)
13111     - four buffer attributes (in|out_data|text), that represent the
13112       input (to the external script) in text and data structure format,
13113       and the output from it, again in two formats
13114     - the result variables from the script (success, info, nodes) for
13115       easy usage
13116
13117   """
13118   # pylint: disable=R0902
13119   # lots of instance attributes
13120
13121   def __init__(self, cfg, rpc_runner, mode, **kwargs):
13122     self.cfg = cfg
13123     self.rpc = rpc_runner
13124     # init buffer variables
13125     self.in_text = self.out_text = self.in_data = self.out_data = None
13126     # init all input fields so that pylint is happy
13127     self.mode = mode
13128     self.memory = self.disks = self.disk_template = None
13129     self.os = self.tags = self.nics = self.vcpus = None
13130     self.hypervisor = None
13131     self.relocate_from = None
13132     self.name = None
13133     self.instances = None
13134     self.evac_mode = None
13135     self.target_groups = []
13136     # computed fields
13137     self.required_nodes = None
13138     # init result fields
13139     self.success = self.info = self.result = None
13140
13141     try:
13142       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13143     except KeyError:
13144       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13145                                    " IAllocator" % self.mode)
13146
13147     keyset = [n for (n, _) in keydata]
13148
13149     for key in kwargs:
13150       if key not in keyset:
13151         raise errors.ProgrammerError("Invalid input parameter '%s' to"
13152                                      " IAllocator" % key)
13153       setattr(self, key, kwargs[key])
13154
13155     for key in keyset:
13156       if key not in kwargs:
13157         raise errors.ProgrammerError("Missing input parameter '%s' to"
13158                                      " IAllocator" % key)
13159     self._BuildInputData(compat.partial(fn, self), keydata)
13160
13161   def _ComputeClusterData(self):
13162     """Compute the generic allocator input data.
13163
13164     This is the data that is independent of the actual operation.
13165
13166     """
13167     cfg = self.cfg
13168     cluster_info = cfg.GetClusterInfo()
13169     # cluster data
13170     data = {
13171       "version": constants.IALLOCATOR_VERSION,
13172       "cluster_name": cfg.GetClusterName(),
13173       "cluster_tags": list(cluster_info.GetTags()),
13174       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13175       # we don't have job IDs
13176       }
13177     ninfo = cfg.GetAllNodesInfo()
13178     iinfo = cfg.GetAllInstancesInfo().values()
13179     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13180
13181     # node data
13182     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13183
13184     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13185       hypervisor_name = self.hypervisor
13186     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13187       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13188     else:
13189       hypervisor_name = cluster_info.enabled_hypervisors[0]
13190
13191     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13192                                         hypervisor_name)
13193     node_iinfo = \
13194       self.rpc.call_all_instances_info(node_list,
13195                                        cluster_info.enabled_hypervisors)
13196
13197     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13198
13199     config_ndata = self._ComputeBasicNodeData(ninfo)
13200     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13201                                                  i_list, config_ndata)
13202     assert len(data["nodes"]) == len(ninfo), \
13203         "Incomplete node data computed"
13204
13205     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13206
13207     self.in_data = data
13208
13209   @staticmethod
13210   def _ComputeNodeGroupData(cfg):
13211     """Compute node groups data.
13212
13213     """
13214     ng = dict((guuid, {
13215       "name": gdata.name,
13216       "alloc_policy": gdata.alloc_policy,
13217       })
13218       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13219
13220     return ng
13221
13222   @staticmethod
13223   def _ComputeBasicNodeData(node_cfg):
13224     """Compute global node data.
13225
13226     @rtype: dict
13227     @returns: a dict of name: (node dict, node config)
13228
13229     """
13230     # fill in static (config-based) values
13231     node_results = dict((ninfo.name, {
13232       "tags": list(ninfo.GetTags()),
13233       "primary_ip": ninfo.primary_ip,
13234       "secondary_ip": ninfo.secondary_ip,
13235       "offline": ninfo.offline,
13236       "drained": ninfo.drained,
13237       "master_candidate": ninfo.master_candidate,
13238       "group": ninfo.group,
13239       "master_capable": ninfo.master_capable,
13240       "vm_capable": ninfo.vm_capable,
13241       })
13242       for ninfo in node_cfg.values())
13243
13244     return node_results
13245
13246   @staticmethod
13247   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13248                               node_results):
13249     """Compute global node data.
13250
13251     @param node_results: the basic node structures as filled from the config
13252
13253     """
13254     # make a copy of the current dict
13255     node_results = dict(node_results)
13256     for nname, nresult in node_data.items():
13257       assert nname in node_results, "Missing basic data for node %s" % nname
13258       ninfo = node_cfg[nname]
13259
13260       if not (ninfo.offline or ninfo.drained):
13261         nresult.Raise("Can't get data for node %s" % nname)
13262         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13263                                 nname)
13264         remote_info = nresult.payload
13265
13266         for attr in ["memory_total", "memory_free", "memory_dom0",
13267                      "vg_size", "vg_free", "cpu_total"]:
13268           if attr not in remote_info:
13269             raise errors.OpExecError("Node '%s' didn't return attribute"
13270                                      " '%s'" % (nname, attr))
13271           if not isinstance(remote_info[attr], int):
13272             raise errors.OpExecError("Node '%s' returned invalid value"
13273                                      " for '%s': %s" %
13274                                      (nname, attr, remote_info[attr]))
13275         # compute memory used by primary instances
13276         i_p_mem = i_p_up_mem = 0
13277         for iinfo, beinfo in i_list:
13278           if iinfo.primary_node == nname:
13279             i_p_mem += beinfo[constants.BE_MEMORY]
13280             if iinfo.name not in node_iinfo[nname].payload:
13281               i_used_mem = 0
13282             else:
13283               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13284             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13285             remote_info["memory_free"] -= max(0, i_mem_diff)
13286
13287             if iinfo.admin_up:
13288               i_p_up_mem += beinfo[constants.BE_MEMORY]
13289
13290         # compute memory used by instances
13291         pnr_dyn = {
13292           "total_memory": remote_info["memory_total"],
13293           "reserved_memory": remote_info["memory_dom0"],
13294           "free_memory": remote_info["memory_free"],
13295           "total_disk": remote_info["vg_size"],
13296           "free_disk": remote_info["vg_free"],
13297           "total_cpus": remote_info["cpu_total"],
13298           "i_pri_memory": i_p_mem,
13299           "i_pri_up_memory": i_p_up_mem,
13300           }
13301         pnr_dyn.update(node_results[nname])
13302         node_results[nname] = pnr_dyn
13303
13304     return node_results
13305
13306   @staticmethod
13307   def _ComputeInstanceData(cluster_info, i_list):
13308     """Compute global instance data.
13309
13310     """
13311     instance_data = {}
13312     for iinfo, beinfo in i_list:
13313       nic_data = []
13314       for nic in iinfo.nics:
13315         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13316         nic_dict = {
13317           "mac": nic.mac,
13318           "ip": nic.ip,
13319           "mode": filled_params[constants.NIC_MODE],
13320           "link": filled_params[constants.NIC_LINK],
13321           }
13322         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13323           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13324         nic_data.append(nic_dict)
13325       pir = {
13326         "tags": list(iinfo.GetTags()),
13327         "admin_up": iinfo.admin_up,
13328         "vcpus": beinfo[constants.BE_VCPUS],
13329         "memory": beinfo[constants.BE_MEMORY],
13330         "os": iinfo.os,
13331         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13332         "nics": nic_data,
13333         "disks": [{constants.IDISK_SIZE: dsk.size,
13334                    constants.IDISK_MODE: dsk.mode}
13335                   for dsk in iinfo.disks],
13336         "disk_template": iinfo.disk_template,
13337         "hypervisor": iinfo.hypervisor,
13338         }
13339       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13340                                                  pir["disks"])
13341       instance_data[iinfo.name] = pir
13342
13343     return instance_data
13344
13345   def _AddNewInstance(self):
13346     """Add new instance data to allocator structure.
13347
13348     This in combination with _AllocatorGetClusterData will create the
13349     correct structure needed as input for the allocator.
13350
13351     The checks for the completeness of the opcode must have already been
13352     done.
13353
13354     """
13355     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13356
13357     if self.disk_template in constants.DTS_INT_MIRROR:
13358       self.required_nodes = 2
13359     else:
13360       self.required_nodes = 1
13361
13362     request = {
13363       "name": self.name,
13364       "disk_template": self.disk_template,
13365       "tags": self.tags,
13366       "os": self.os,
13367       "vcpus": self.vcpus,
13368       "memory": self.memory,
13369       "disks": self.disks,
13370       "disk_space_total": disk_space,
13371       "nics": self.nics,
13372       "required_nodes": self.required_nodes,
13373       "hypervisor": self.hypervisor,
13374       }
13375
13376     return request
13377
13378   def _AddRelocateInstance(self):
13379     """Add relocate instance data to allocator structure.
13380
13381     This in combination with _IAllocatorGetClusterData will create the
13382     correct structure needed as input for the allocator.
13383
13384     The checks for the completeness of the opcode must have already been
13385     done.
13386
13387     """
13388     instance = self.cfg.GetInstanceInfo(self.name)
13389     if instance is None:
13390       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13391                                    " IAllocator" % self.name)
13392
13393     if instance.disk_template not in constants.DTS_MIRRORED:
13394       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13395                                  errors.ECODE_INVAL)
13396
13397     if instance.disk_template in constants.DTS_INT_MIRROR and \
13398         len(instance.secondary_nodes) != 1:
13399       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13400                                  errors.ECODE_STATE)
13401
13402     self.required_nodes = 1
13403     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13404     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13405
13406     request = {
13407       "name": self.name,
13408       "disk_space_total": disk_space,
13409       "required_nodes": self.required_nodes,
13410       "relocate_from": self.relocate_from,
13411       }
13412     return request
13413
13414   def _AddNodeEvacuate(self):
13415     """Get data for node-evacuate requests.
13416
13417     """
13418     return {
13419       "instances": self.instances,
13420       "evac_mode": self.evac_mode,
13421       }
13422
13423   def _AddChangeGroup(self):
13424     """Get data for node-evacuate requests.
13425
13426     """
13427     return {
13428       "instances": self.instances,
13429       "target_groups": self.target_groups,
13430       }
13431
13432   def _BuildInputData(self, fn, keydata):
13433     """Build input data structures.
13434
13435     """
13436     self._ComputeClusterData()
13437
13438     request = fn()
13439     request["type"] = self.mode
13440     for keyname, keytype in keydata:
13441       if keyname not in request:
13442         raise errors.ProgrammerError("Request parameter %s is missing" %
13443                                      keyname)
13444       val = request[keyname]
13445       if not keytype(val):
13446         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13447                                      " validation, value %s, expected"
13448                                      " type %s" % (keyname, val, keytype))
13449     self.in_data["request"] = request
13450
13451     self.in_text = serializer.Dump(self.in_data)
13452
13453   _STRING_LIST = ht.TListOf(ht.TString)
13454   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13455      # pylint: disable=E1101
13456      # Class '...' has no 'OP_ID' member
13457      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13458                           opcodes.OpInstanceMigrate.OP_ID,
13459                           opcodes.OpInstanceReplaceDisks.OP_ID])
13460      })))
13461
13462   _NEVAC_MOVED = \
13463     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13464                        ht.TItems([ht.TNonEmptyString,
13465                                   ht.TNonEmptyString,
13466                                   ht.TListOf(ht.TNonEmptyString),
13467                                  ])))
13468   _NEVAC_FAILED = \
13469     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13470                        ht.TItems([ht.TNonEmptyString,
13471                                   ht.TMaybeString,
13472                                  ])))
13473   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13474                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13475
13476   _MODE_DATA = {
13477     constants.IALLOCATOR_MODE_ALLOC:
13478       (_AddNewInstance,
13479        [
13480         ("name", ht.TString),
13481         ("memory", ht.TInt),
13482         ("disks", ht.TListOf(ht.TDict)),
13483         ("disk_template", ht.TString),
13484         ("os", ht.TString),
13485         ("tags", _STRING_LIST),
13486         ("nics", ht.TListOf(ht.TDict)),
13487         ("vcpus", ht.TInt),
13488         ("hypervisor", ht.TString),
13489         ], ht.TList),
13490     constants.IALLOCATOR_MODE_RELOC:
13491       (_AddRelocateInstance,
13492        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13493        ht.TList),
13494      constants.IALLOCATOR_MODE_NODE_EVAC:
13495       (_AddNodeEvacuate, [
13496         ("instances", _STRING_LIST),
13497         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13498         ], _NEVAC_RESULT),
13499      constants.IALLOCATOR_MODE_CHG_GROUP:
13500       (_AddChangeGroup, [
13501         ("instances", _STRING_LIST),
13502         ("target_groups", _STRING_LIST),
13503         ], _NEVAC_RESULT),
13504     }
13505
13506   def Run(self, name, validate=True, call_fn=None):
13507     """Run an instance allocator and return the results.
13508
13509     """
13510     if call_fn is None:
13511       call_fn = self.rpc.call_iallocator_runner
13512
13513     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13514     result.Raise("Failure while running the iallocator script")
13515
13516     self.out_text = result.payload
13517     if validate:
13518       self._ValidateResult()
13519
13520   def _ValidateResult(self):
13521     """Process the allocator results.
13522
13523     This will process and if successful save the result in
13524     self.out_data and the other parameters.
13525
13526     """
13527     try:
13528       rdict = serializer.Load(self.out_text)
13529     except Exception, err:
13530       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13531
13532     if not isinstance(rdict, dict):
13533       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13534
13535     # TODO: remove backwards compatiblity in later versions
13536     if "nodes" in rdict and "result" not in rdict:
13537       rdict["result"] = rdict["nodes"]
13538       del rdict["nodes"]
13539
13540     for key in "success", "info", "result":
13541       if key not in rdict:
13542         raise errors.OpExecError("Can't parse iallocator results:"
13543                                  " missing key '%s'" % key)
13544       setattr(self, key, rdict[key])
13545
13546     if not self._result_check(self.result):
13547       raise errors.OpExecError("Iallocator returned invalid result,"
13548                                " expected %s, got %s" %
13549                                (self._result_check, self.result),
13550                                errors.ECODE_INVAL)
13551
13552     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13553       assert self.relocate_from is not None
13554       assert self.required_nodes == 1
13555
13556       node2group = dict((name, ndata["group"])
13557                         for (name, ndata) in self.in_data["nodes"].items())
13558
13559       fn = compat.partial(self._NodesToGroups, node2group,
13560                           self.in_data["nodegroups"])
13561
13562       instance = self.cfg.GetInstanceInfo(self.name)
13563       request_groups = fn(self.relocate_from + [instance.primary_node])
13564       result_groups = fn(rdict["result"] + [instance.primary_node])
13565
13566       if self.success and not set(result_groups).issubset(request_groups):
13567         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13568                                  " differ from original groups (%s)" %
13569                                  (utils.CommaJoin(result_groups),
13570                                   utils.CommaJoin(request_groups)))
13571
13572     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13573       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13574
13575     self.out_data = rdict
13576
13577   @staticmethod
13578   def _NodesToGroups(node2group, groups, nodes):
13579     """Returns a list of unique group names for a list of nodes.
13580
13581     @type node2group: dict
13582     @param node2group: Map from node name to group UUID
13583     @type groups: dict
13584     @param groups: Group information
13585     @type nodes: list
13586     @param nodes: Node names
13587
13588     """
13589     result = set()
13590
13591     for node in nodes:
13592       try:
13593         group_uuid = node2group[node]
13594       except KeyError:
13595         # Ignore unknown node
13596         pass
13597       else:
13598         try:
13599           group = groups[group_uuid]
13600         except KeyError:
13601           # Can't find group, let's use UUID
13602           group_name = group_uuid
13603         else:
13604           group_name = group["name"]
13605
13606         result.add(group_name)
13607
13608     return sorted(result)
13609
13610
13611 class LUTestAllocator(NoHooksLU):
13612   """Run allocator tests.
13613
13614   This LU runs the allocator tests
13615
13616   """
13617   def CheckPrereq(self):
13618     """Check prerequisites.
13619
13620     This checks the opcode parameters depending on the director and mode test.
13621
13622     """
13623     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13624       for attr in ["memory", "disks", "disk_template",
13625                    "os", "tags", "nics", "vcpus"]:
13626         if not hasattr(self.op, attr):
13627           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13628                                      attr, errors.ECODE_INVAL)
13629       iname = self.cfg.ExpandInstanceName(self.op.name)
13630       if iname is not None:
13631         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13632                                    iname, errors.ECODE_EXISTS)
13633       if not isinstance(self.op.nics, list):
13634         raise errors.OpPrereqError("Invalid parameter 'nics'",
13635                                    errors.ECODE_INVAL)
13636       if not isinstance(self.op.disks, list):
13637         raise errors.OpPrereqError("Invalid parameter 'disks'",
13638                                    errors.ECODE_INVAL)
13639       for row in self.op.disks:
13640         if (not isinstance(row, dict) or
13641             constants.IDISK_SIZE not in row or
13642             not isinstance(row[constants.IDISK_SIZE], int) or
13643             constants.IDISK_MODE not in row or
13644             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13645           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13646                                      " parameter", errors.ECODE_INVAL)
13647       if self.op.hypervisor is None:
13648         self.op.hypervisor = self.cfg.GetHypervisorType()
13649     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13650       fname = _ExpandInstanceName(self.cfg, self.op.name)
13651       self.op.name = fname
13652       self.relocate_from = \
13653           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13654     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13655                           constants.IALLOCATOR_MODE_NODE_EVAC):
13656       if not self.op.instances:
13657         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13658       self.op.instances = _GetWantedInstances(self, self.op.instances)
13659     else:
13660       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13661                                  self.op.mode, errors.ECODE_INVAL)
13662
13663     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13664       if self.op.allocator is None:
13665         raise errors.OpPrereqError("Missing allocator name",
13666                                    errors.ECODE_INVAL)
13667     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13668       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13669                                  self.op.direction, errors.ECODE_INVAL)
13670
13671   def Exec(self, feedback_fn):
13672     """Run the allocator test.
13673
13674     """
13675     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13676       ial = IAllocator(self.cfg, self.rpc,
13677                        mode=self.op.mode,
13678                        name=self.op.name,
13679                        memory=self.op.memory,
13680                        disks=self.op.disks,
13681                        disk_template=self.op.disk_template,
13682                        os=self.op.os,
13683                        tags=self.op.tags,
13684                        nics=self.op.nics,
13685                        vcpus=self.op.vcpus,
13686                        hypervisor=self.op.hypervisor,
13687                        )
13688     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13689       ial = IAllocator(self.cfg, self.rpc,
13690                        mode=self.op.mode,
13691                        name=self.op.name,
13692                        relocate_from=list(self.relocate_from),
13693                        )
13694     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13695       ial = IAllocator(self.cfg, self.rpc,
13696                        mode=self.op.mode,
13697                        instances=self.op.instances,
13698                        target_groups=self.op.target_groups)
13699     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13700       ial = IAllocator(self.cfg, self.rpc,
13701                        mode=self.op.mode,
13702                        instances=self.op.instances,
13703                        evac_mode=self.op.evac_mode)
13704     else:
13705       raise errors.ProgrammerError("Uncatched mode %s in"
13706                                    " LUTestAllocator.Exec", self.op.mode)
13707
13708     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13709       result = ial.in_text
13710     else:
13711       ial.Run(self.op.allocator, validate=False)
13712       result = ial.out_text
13713     return result
13714
13715
13716 #: Query type implementations
13717 _QUERY_IMPL = {
13718   constants.QR_INSTANCE: _InstanceQuery,
13719   constants.QR_NODE: _NodeQuery,
13720   constants.QR_GROUP: _GroupQuery,
13721   constants.QR_OS: _OsQuery,
13722   }
13723
13724 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13725
13726
13727 def _GetQueryImplementation(name):
13728   """Returns the implemtnation for a query type.
13729
13730   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13731
13732   """
13733   try:
13734     return _QUERY_IMPL[name]
13735   except KeyError:
13736     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13737                                errors.ECODE_INVAL)