code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70
  71 class ResultWithJobs:
  72   """Data container for LU results with jobs.
  73
  74   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  75   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  76   contained in the C{jobs} attribute and include the job IDs in the opcode
  77   result.
  78
  79   """
  80   def __init__(self, jobs, **kwargs):
  81     """Initializes this class.
  82
  83     Additional return values can be specified as keyword arguments.
  84
  85     @type jobs: list of lists of L{opcode.OpCode}
  86     @param jobs: A list of lists of opcode objects
  87
  88     """
  89     self.jobs = jobs
  90     self.other = kwargs
  91
  92
  93 class LogicalUnit(object):
  94   """Logical Unit base class.
  95
  96   Subclasses must follow these rules:
  97     - implement ExpandNames
  98     - implement CheckPrereq (except when tasklets are used)
  99     - implement Exec (except when tasklets are used)
 100     - implement BuildHooksEnv
 101     - implement BuildHooksNodes
 102     - redefine HPATH and HTYPE
 103     - optionally redefine their run requirements:
 104         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 105
 106   Note that all commands require root permissions.
 107
 108   @ivar dry_run_result: the value (if any) that will be returned to the caller
 109       in dry-run mode (signalled by opcode dry_run parameter)
 110
 111   """
 112   HPATH = None
 113   HTYPE = None
 114   REQ_BGL = True
 115
 116   def __init__(self, processor, op, context, rpc_runner):
 117     """Constructor for LogicalUnit.
 118
 119     This needs to be overridden in derived classes in order to check op
 120     validity.
 121
 122     """
 123     self.proc = processor
 124     self.op = op
 125     self.cfg = context.cfg
 126     self.glm = context.glm
 127     # readability alias
 128     self.owned_locks = context.glm.list_owned
 129     self.context = context
 130     self.rpc = rpc_runner
 131     # Dicts used to declare locking needs to mcpu
 132     self.needed_locks = None
 133     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 134     self.add_locks = {}
 135     self.remove_locks = {}
 136     # Used to force good behavior when calling helper functions
 137     self.recalculate_locks = {}
 138     # logging
 139     self.Log = processor.Log # pylint: disable=C0103
 140     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 141     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 142     self.LogStep = processor.LogStep # pylint: disable=C0103
 143     # support for dry-run
 144     self.dry_run_result = None
 145     # support for generic debug attribute
 146     if (not hasattr(self.op, "debug_level") or
 147         not isinstance(self.op.debug_level, int)):
 148       self.op.debug_level = 0
 149
 150     # Tasklets
 151     self.tasklets = None
 152
 153     # Validate opcode parameters and set defaults
 154     self.op.Validate(True)
 155
 156     self.CheckArguments()
 157
 158   def CheckArguments(self):
 159     """Check syntactic validity for the opcode arguments.
 160
 161     This method is for doing a simple syntactic check and ensure
 162     validity of opcode parameters, without any cluster-related
 163     checks. While the same can be accomplished in ExpandNames and/or
 164     CheckPrereq, doing these separate is better because:
 165
 166       - ExpandNames is left as as purely a lock-related function
 167       - CheckPrereq is run after we have acquired locks (and possible
 168         waited for them)
 169
 170     The function is allowed to change the self.op attribute so that
 171     later methods can no longer worry about missing parameters.
 172
 173     """
 174     pass
 175
 176   def ExpandNames(self):
 177     """Expand names for this LU.
 178
 179     This method is called before starting to execute the opcode, and it should
 180     update all the parameters of the opcode to their canonical form (e.g. a
 181     short node name must be fully expanded after this method has successfully
 182     completed). This way locking, hooks, logging, etc. can work correctly.
 183
 184     LUs which implement this method must also populate the self.needed_locks
 185     member, as a dict with lock levels as keys, and a list of needed lock names
 186     as values. Rules:
 187
 188       - use an empty dict if you don't need any lock
 189       - if you don't need any lock at a particular level omit that level
 190       - don't put anything for the BGL level
 191       - if you want all locks at a level use locking.ALL_SET as a value
 192
 193     If you need to share locks (rather than acquire them exclusively) at one
 194     level you can modify self.share_locks, setting a true value (usually 1) for
 195     that level. By default locks are not shared.
 196
 197     This function can also define a list of tasklets, which then will be
 198     executed in order instead of the usual LU-level CheckPrereq and Exec
 199     functions, if those are not defined by the LU.
 200
 201     Examples::
 202
 203       # Acquire all nodes and one instance
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: locking.ALL_SET,
 206         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 207       }
 208       # Acquire just two nodes
 209       self.needed_locks = {
 210         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 211       }
 212       # Acquire no locks
 213       self.needed_locks = {} # No, you can't leave it to the default value None
 214
 215     """
 216     # The implementation of this method is mandatory only if the new LU is
 217     # concurrent, so that old LUs don't need to be changed all at the same
 218     # time.
 219     if self.REQ_BGL:
 220       self.needed_locks = {} # Exclusive LUs don't need locks.
 221     else:
 222       raise NotImplementedError
 223
 224   def DeclareLocks(self, level):
 225     """Declare LU locking needs for a level
 226
 227     While most LUs can just declare their locking needs at ExpandNames time,
 228     sometimes there's the need to calculate some locks after having acquired
 229     the ones before. This function is called just before acquiring locks at a
 230     particular level, but after acquiring the ones at lower levels, and permits
 231     such calculations. It can be used to modify self.needed_locks, and by
 232     default it does nothing.
 233
 234     This function is only called if you have something already set in
 235     self.needed_locks for the level.
 236
 237     @param level: Locking level which is going to be locked
 238     @type level: member of ganeti.locking.LEVELS
 239
 240     """
 241
 242   def CheckPrereq(self):
 243     """Check prerequisites for this LU.
 244
 245     This method should check that the prerequisites for the execution
 246     of this LU are fulfilled. It can do internode communication, but
 247     it should be idempotent - no cluster or system changes are
 248     allowed.
 249
 250     The method should raise errors.OpPrereqError in case something is
 251     not fulfilled. Its return value is ignored.
 252
 253     This method should also update all the parameters of the opcode to
 254     their canonical form if it hasn't been done by ExpandNames before.
 255
 256     """
 257     if self.tasklets is not None:
 258       for (idx, tl) in enumerate(self.tasklets):
 259         logging.debug("Checking prerequisites for tasklet %s/%s",
 260                       idx + 1, len(self.tasklets))
 261         tl.CheckPrereq()
 262     else:
 263       pass
 264
 265   def Exec(self, feedback_fn):
 266     """Execute the LU.
 267
 268     This method should implement the actual work. It should raise
 269     errors.OpExecError for failures that are somewhat dealt with in
 270     code, or expected.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 276         tl.Exec(feedback_fn)
 277     else:
 278       raise NotImplementedError
 279
 280   def BuildHooksEnv(self):
 281     """Build hooks environment for this LU.
 282
 283     @rtype: dict
 284     @return: Dictionary containing the environment that will be used for
 285       running the hooks for this LU. The keys of the dict must not be prefixed
 286       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 287       will extend the environment with additional variables. If no environment
 288       should be defined, an empty dictionary should be returned (not C{None}).
 289     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 290       will not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def BuildHooksNodes(self):
 296     """Build list of nodes to run LU's hooks.
 297
 298     @rtype: tuple; (list, list)
 299     @return: Tuple containing a list of node names on which the hook
 300       should run before the execution and a list of node names on which the
 301       hook should run after the execution. No nodes should be returned as an
 302       empty list (and not None).
 303     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 304       will not be called.
 305
 306     """
 307     raise NotImplementedError
 308
 309   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 310     """Notify the LU about the results of its hooks.
 311
 312     This method is called every time a hooks phase is executed, and notifies
 313     the Logical Unit about the hooks' result. The LU can then use it to alter
 314     its result based on the hooks.  By default the method does nothing and the
 315     previous result is passed back unchanged but any LU can define it if it
 316     wants to use the local cluster hook-scripts somehow.
 317
 318     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 319         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 320     @param hook_results: the results of the multi-node hooks rpc call
 321     @param feedback_fn: function used send feedback back to the caller
 322     @param lu_result: the previous Exec result this LU had, or None
 323         in the PRE phase
 324     @return: the new Exec result, based on the previous result
 325         and hook results
 326
 327     """
 328     # API must be kept, thus we ignore the unused argument and could
 329     # be a function warnings
 330     # pylint: disable=W0613,R0201
 331     return lu_result
 332
 333   def _ExpandAndLockInstance(self):
 334     """Helper function to expand and lock an instance.
 335
 336     Many LUs that work on an instance take its name in self.op.instance_name
 337     and need to expand it and then declare the expanded name for locking. This
 338     function does it, and then updates self.op.instance_name to the expanded
 339     name. It also initializes needed_locks as a dict, if this hasn't been done
 340     before.
 341
 342     """
 343     if self.needed_locks is None:
 344       self.needed_locks = {}
 345     else:
 346       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 347         "_ExpandAndLockInstance called with instance-level locks set"
 348     self.op.instance_name = _ExpandInstanceName(self.cfg,
 349                                                 self.op.instance_name)
 350     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 351
 352   def _LockInstancesNodes(self, primary_only=False):
 353     """Helper function to declare instances' nodes for locking.
 354
 355     This function should be called after locking one or more instances to lock
 356     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 357     with all primary or secondary nodes for instances already locked and
 358     present in self.needed_locks[locking.LEVEL_INSTANCE].
 359
 360     It should be called from DeclareLocks, and for safety only works if
 361     self.recalculate_locks[locking.LEVEL_NODE] is set.
 362
 363     In the future it may grow parameters to just lock some instance's nodes, or
 364     to just lock primaries or secondary nodes, if needed.
 365
 366     If should be called in DeclareLocks in a way similar to::
 367
 368       if level == locking.LEVEL_NODE:
 369         self._LockInstancesNodes()
 370
 371     @type primary_only: boolean
 372     @param primary_only: only lock primary nodes of locked instances
 373
 374     """
 375     assert locking.LEVEL_NODE in self.recalculate_locks, \
 376       "_LockInstancesNodes helper function called with no nodes to recalculate"
 377
 378     # TODO: check if we're really been called with the instance locks held
 379
 380     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 381     # future we might want to have different behaviors depending on the value
 382     # of self.recalculate_locks[locking.LEVEL_NODE]
 383     wanted_nodes = []
 384     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 385     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 386       wanted_nodes.append(instance.primary_node)
 387       if not primary_only:
 388         wanted_nodes.extend(instance.secondary_nodes)
 389
 390     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 391       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 392     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 393       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 394
 395     del self.recalculate_locks[locking.LEVEL_NODE]
 396
 397
 398 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 399   """Simple LU which runs no hooks.
 400
 401   This LU is intended as a parent for other LogicalUnits which will
 402   run no hooks, in order to reduce duplicate code.
 403
 404   """
 405   HPATH = None
 406   HTYPE = None
 407
 408   def BuildHooksEnv(self):
 409     """Empty BuildHooksEnv for NoHooksLu.
 410
 411     This just raises an error.
 412
 413     """
 414     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 415
 416   def BuildHooksNodes(self):
 417     """Empty BuildHooksNodes for NoHooksLU.
 418
 419     """
 420     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 421
 422
 423 class Tasklet:
 424   """Tasklet base class.
 425
 426   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 427   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 428   tasklets know nothing about locks.
 429
 430   Subclasses must follow these rules:
 431     - Implement CheckPrereq
 432     - Implement Exec
 433
 434   """
 435   def __init__(self, lu):
 436     self.lu = lu
 437
 438     # Shortcuts
 439     self.cfg = lu.cfg
 440     self.rpc = lu.rpc
 441
 442   def CheckPrereq(self):
 443     """Check prerequisites for this tasklets.
 444
 445     This method should check whether the prerequisites for the execution of
 446     this tasklet are fulfilled. It can do internode communication, but it
 447     should be idempotent - no cluster or system changes are allowed.
 448
 449     The method should raise errors.OpPrereqError in case something is not
 450     fulfilled. Its return value is ignored.
 451
 452     This method should also update all parameters to their canonical form if it
 453     hasn't been done before.
 454
 455     """
 456     pass
 457
 458   def Exec(self, feedback_fn):
 459     """Execute the tasklet.
 460
 461     This method should implement the actual work. It should raise
 462     errors.OpExecError for failures that are somewhat dealt with in code, or
 463     expected.
 464
 465     """
 466     raise NotImplementedError
 467
 468
 469 class _QueryBase:
 470   """Base for query utility classes.
 471
 472   """
 473   #: Attribute holding field definitions
 474   FIELDS = None
 475
 476   def __init__(self, qfilter, fields, use_locking):
 477     """Initializes this class.
 478
 479     """
 480     self.use_locking = use_locking
 481
 482     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 483                              namefield="name")
 484     self.requested_data = self.query.RequestedData()
 485     self.names = self.query.RequestedNames()
 486
 487     # Sort only if no names were requested
 488     self.sort_by_name = not self.names
 489
 490     self.do_locking = None
 491     self.wanted = None
 492
 493   def _GetNames(self, lu, all_names, lock_level):
 494     """Helper function to determine names asked for in the query.
 495
 496     """
 497     if self.do_locking:
 498       names = lu.owned_locks(lock_level)
 499     else:
 500       names = all_names
 501
 502     if self.wanted == locking.ALL_SET:
 503       assert not self.names
 504       # caller didn't specify names, so ordering is not important
 505       return utils.NiceSort(names)
 506
 507     # caller specified names and we must keep the same order
 508     assert self.names
 509     assert not self.do_locking or lu.glm.is_owned(lock_level)
 510
 511     missing = set(self.wanted).difference(names)
 512     if missing:
 513       raise errors.OpExecError("Some items were removed before retrieving"
 514                                " their data: %s" % missing)
 515
 516     # Return expanded names
 517     return self.wanted
 518
 519   def ExpandNames(self, lu):
 520     """Expand names for this query.
 521
 522     See L{LogicalUnit.ExpandNames}.
 523
 524     """
 525     raise NotImplementedError()
 526
 527   def DeclareLocks(self, lu, level):
 528     """Declare locks for this query.
 529
 530     See L{LogicalUnit.DeclareLocks}.
 531
 532     """
 533     raise NotImplementedError()
 534
 535   def _GetQueryData(self, lu):
 536     """Collects all data for this query.
 537
 538     @return: Query data object
 539
 540     """
 541     raise NotImplementedError()
 542
 543   def NewStyleQuery(self, lu):
 544     """Collect data and execute query.
 545
 546     """
 547     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 548                                   sort_by_name=self.sort_by_name)
 549
 550   def OldStyleQuery(self, lu):
 551     """Collect data and execute query.
 552
 553     """
 554     return self.query.OldStyleQuery(self._GetQueryData(lu),
 555                                     sort_by_name=self.sort_by_name)
 556
 557
 558 def _ShareAll():
 559   """Returns a dict declaring all lock levels shared.
 560
 561   """
 562   return dict.fromkeys(locking.LEVELS, 1)
 563
 564
 565 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 566   """Checks if the owned node groups are still correct for an instance.
 567
 568   @type cfg: L{config.ConfigWriter}
 569   @param cfg: The cluster configuration
 570   @type instance_name: string
 571   @param instance_name: Instance name
 572   @type owned_groups: set or frozenset
 573   @param owned_groups: List of currently owned node groups
 574
 575   """
 576   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 577
 578   if not owned_groups.issuperset(inst_groups):
 579     raise errors.OpPrereqError("Instance %s's node groups changed since"
 580                                " locks were acquired, current groups are"
 581                                " are '%s', owning groups '%s'; retry the"
 582                                " operation" %
 583                                (instance_name,
 584                                 utils.CommaJoin(inst_groups),
 585                                 utils.CommaJoin(owned_groups)),
 586                                errors.ECODE_STATE)
 587
 588   return inst_groups
 589
 590
 591 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 592   """Checks if the instances in a node group are still correct.
 593
 594   @type cfg: L{config.ConfigWriter}
 595   @param cfg: The cluster configuration
 596   @type group_uuid: string
 597   @param group_uuid: Node group UUID
 598   @type owned_instances: set or frozenset
 599   @param owned_instances: List of currently owned instances
 600
 601   """
 602   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 603   if owned_instances != wanted_instances:
 604     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 605                                " locks were acquired, wanted '%s', have '%s';"
 606                                " retry the operation" %
 607                                (group_uuid,
 608                                 utils.CommaJoin(wanted_instances),
 609                                 utils.CommaJoin(owned_instances)),
 610                                errors.ECODE_STATE)
 611
 612   return wanted_instances
 613
 614
 615 def _SupportsOob(cfg, node):
 616   """Tells if node supports OOB.
 617
 618   @type cfg: L{config.ConfigWriter}
 619   @param cfg: The cluster configuration
 620   @type node: L{objects.Node}
 621   @param node: The node
 622   @return: The OOB script if supported or an empty string otherwise
 623
 624   """
 625   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 626
 627
 628 def _GetWantedNodes(lu, nodes):
 629   """Returns list of checked and expanded node names.
 630
 631   @type lu: L{LogicalUnit}
 632   @param lu: the logical unit on whose behalf we execute
 633   @type nodes: list
 634   @param nodes: list of node names or None for all nodes
 635   @rtype: list
 636   @return: the list of nodes, sorted
 637   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 638
 639   """
 640   if nodes:
 641     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 642
 643   return utils.NiceSort(lu.cfg.GetNodeList())
 644
 645
 646 def _GetWantedInstances(lu, instances):
 647   """Returns list of checked and expanded instance names.
 648
 649   @type lu: L{LogicalUnit}
 650   @param lu: the logical unit on whose behalf we execute
 651   @type instances: list
 652   @param instances: list of instance names or None for all instances
 653   @rtype: list
 654   @return: the list of instances, sorted
 655   @raise errors.OpPrereqError: if the instances parameter is wrong type
 656   @raise errors.OpPrereqError: if any of the passed instances is not found
 657
 658   """
 659   if instances:
 660     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 661   else:
 662     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 663   return wanted
 664
 665
 666 def _GetUpdatedParams(old_params, update_dict,
 667                       use_default=True, use_none=False):
 668   """Return the new version of a parameter dictionary.
 669
 670   @type old_params: dict
 671   @param old_params: old parameters
 672   @type update_dict: dict
 673   @param update_dict: dict containing new parameter values, or
 674       constants.VALUE_DEFAULT to reset the parameter to its default
 675       value
 676   @param use_default: boolean
 677   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 678       values as 'to be deleted' values
 679   @param use_none: boolean
 680   @type use_none: whether to recognise C{None} values as 'to be
 681       deleted' values
 682   @rtype: dict
 683   @return: the new parameter dictionary
 684
 685   """
 686   params_copy = copy.deepcopy(old_params)
 687   for key, val in update_dict.iteritems():
 688     if ((use_default and val == constants.VALUE_DEFAULT) or
 689         (use_none and val is None)):
 690       try:
 691         del params_copy[key]
 692       except KeyError:
 693         pass
 694     else:
 695       params_copy[key] = val
 696   return params_copy
 697
 698
 699 def _ReleaseLocks(lu, level, names=None, keep=None):
 700   """Releases locks owned by an LU.
 701
 702   @type lu: L{LogicalUnit}
 703   @param level: Lock level
 704   @type names: list or None
 705   @param names: Names of locks to release
 706   @type keep: list or None
 707   @param keep: Names of locks to retain
 708
 709   """
 710   assert not (keep is not None and names is not None), \
 711          "Only one of the 'names' and the 'keep' parameters can be given"
 712
 713   if names is not None:
 714     should_release = names.__contains__
 715   elif keep:
 716     should_release = lambda name: name not in keep
 717   else:
 718     should_release = None
 719
 720   if should_release:
 721     retain = []
 722     release = []
 723
 724     # Determine which locks to release
 725     for name in lu.owned_locks(level):
 726       if should_release(name):
 727         release.append(name)
 728       else:
 729         retain.append(name)
 730
 731     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 732
 733     # Release just some locks
 734     lu.glm.release(level, names=release)
 735
 736     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 737   else:
 738     # Release everything
 739     lu.glm.release(level)
 740
 741     assert not lu.glm.is_owned(level), "No locks should be owned"
 742
 743
 744 def _MapInstanceDisksToNodes(instances):
 745   """Creates a map from (node, volume) to instance name.
 746
 747   @type instances: list of L{objects.Instance}
 748   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 749
 750   """
 751   return dict(((node, vol), inst.name)
 752               for inst in instances
 753               for (node, vols) in inst.MapLVsByNode().items()
 754               for vol in vols)
 755
 756
 757 def _RunPostHook(lu, node_name):
 758   """Runs the post-hook for an opcode on a single node.
 759
 760   """
 761   hm = lu.proc.BuildHooksManager(lu)
 762   try:
 763     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 764   except:
 765     # pylint: disable=W0702
 766     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 767
 768
 769 def _CheckOutputFields(static, dynamic, selected):
 770   """Checks whether all selected fields are valid.
 771
 772   @type static: L{utils.FieldSet}
 773   @param static: static fields set
 774   @type dynamic: L{utils.FieldSet}
 775   @param dynamic: dynamic fields set
 776
 777   """
 778   f = utils.FieldSet()
 779   f.Extend(static)
 780   f.Extend(dynamic)
 781
 782   delta = f.NonMatching(selected)
 783   if delta:
 784     raise errors.OpPrereqError("Unknown output fields selected: %s"
 785                                % ",".join(delta), errors.ECODE_INVAL)
 786
 787
 788 def _CheckGlobalHvParams(params):
 789   """Validates that given hypervisor params are not global ones.
 790
 791   This will ensure that instances don't get customised versions of
 792   global params.
 793
 794   """
 795   used_globals = constants.HVC_GLOBALS.intersection(params)
 796   if used_globals:
 797     msg = ("The following hypervisor parameters are global and cannot"
 798            " be customized at instance level, please modify them at"
 799            " cluster level: %s" % utils.CommaJoin(used_globals))
 800     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 801
 802
 803 def _CheckNodeOnline(lu, node, msg=None):
 804   """Ensure that a given node is online.
 805
 806   @param lu: the LU on behalf of which we make the check
 807   @param node: the node to check
 808   @param msg: if passed, should be a message to replace the default one
 809   @raise errors.OpPrereqError: if the node is offline
 810
 811   """
 812   if msg is None:
 813     msg = "Can't use offline node"
 814   if lu.cfg.GetNodeInfo(node).offline:
 815     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 816
 817
 818 def _CheckNodeNotDrained(lu, node):
 819   """Ensure that a given node is not drained.
 820
 821   @param lu: the LU on behalf of which we make the check
 822   @param node: the node to check
 823   @raise errors.OpPrereqError: if the node is drained
 824
 825   """
 826   if lu.cfg.GetNodeInfo(node).drained:
 827     raise errors.OpPrereqError("Can't use drained node %s" % node,
 828                                errors.ECODE_STATE)
 829
 830
 831 def _CheckNodeVmCapable(lu, node):
 832   """Ensure that a given node is vm capable.
 833
 834   @param lu: the LU on behalf of which we make the check
 835   @param node: the node to check
 836   @raise errors.OpPrereqError: if the node is not vm capable
 837
 838   """
 839   if not lu.cfg.GetNodeInfo(node).vm_capable:
 840     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 841                                errors.ECODE_STATE)
 842
 843
 844 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 845   """Ensure that a node supports a given OS.
 846
 847   @param lu: the LU on behalf of which we make the check
 848   @param node: the node to check
 849   @param os_name: the OS to query about
 850   @param force_variant: whether to ignore variant errors
 851   @raise errors.OpPrereqError: if the node is not supporting the OS
 852
 853   """
 854   result = lu.rpc.call_os_get(node, os_name)
 855   result.Raise("OS '%s' not in supported OS list for node %s" %
 856                (os_name, node),
 857                prereq=True, ecode=errors.ECODE_INVAL)
 858   if not force_variant:
 859     _CheckOSVariant(result.payload, os_name)
 860
 861
 862 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 863   """Ensure that a node has the given secondary ip.
 864
 865   @type lu: L{LogicalUnit}
 866   @param lu: the LU on behalf of which we make the check
 867   @type node: string
 868   @param node: the node to check
 869   @type secondary_ip: string
 870   @param secondary_ip: the ip to check
 871   @type prereq: boolean
 872   @param prereq: whether to throw a prerequisite or an execute error
 873   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 874   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 875
 876   """
 877   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 878   result.Raise("Failure checking secondary ip on node %s" % node,
 879                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 880   if not result.payload:
 881     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 882            " please fix and re-run this command" % secondary_ip)
 883     if prereq:
 884       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 885     else:
 886       raise errors.OpExecError(msg)
 887
 888
 889 def _GetClusterDomainSecret():
 890   """Reads the cluster domain secret.
 891
 892   """
 893   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 894                                strict=True)
 895
 896
 897 def _CheckInstanceDown(lu, instance, reason):
 898   """Ensure that an instance is not running."""
 899   if instance.admin_up:
 900     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 901                                (instance.name, reason), errors.ECODE_STATE)
 902
 903   pnode = instance.primary_node
 904   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 905   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 906               prereq=True, ecode=errors.ECODE_ENVIRON)
 907
 908   if instance.name in ins_l.payload:
 909     raise errors.OpPrereqError("Instance %s is running, %s" %
 910                                (instance.name, reason), errors.ECODE_STATE)
 911
 912
 913 def _ExpandItemName(fn, name, kind):
 914   """Expand an item name.
 915
 916   @param fn: the function to use for expansion
 917   @param name: requested item name
 918   @param kind: text description ('Node' or 'Instance')
 919   @return: the resolved (full) name
 920   @raise errors.OpPrereqError: if the item is not found
 921
 922   """
 923   full_name = fn(name)
 924   if full_name is None:
 925     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 926                                errors.ECODE_NOENT)
 927   return full_name
 928
 929
 930 def _ExpandNodeName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for nodes."""
 932   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 933
 934
 935 def _ExpandInstanceName(cfg, name):
 936   """Wrapper over L{_ExpandItemName} for instance."""
 937   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 938
 939
 940 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 941                           memory, vcpus, nics, disk_template, disks,
 942                           bep, hvp, hypervisor_name, tags):
 943   """Builds instance related env variables for hooks
 944
 945   This builds the hook environment from individual variables.
 946
 947   @type name: string
 948   @param name: the name of the instance
 949   @type primary_node: string
 950   @param primary_node: the name of the instance's primary node
 951   @type secondary_nodes: list
 952   @param secondary_nodes: list of secondary nodes as strings
 953   @type os_type: string
 954   @param os_type: the name of the instance's OS
 955   @type status: boolean
 956   @param status: the should_run status of the instance
 957   @type memory: string
 958   @param memory: the memory size of the instance
 959   @type vcpus: string
 960   @param vcpus: the count of VCPUs the instance has
 961   @type nics: list
 962   @param nics: list of tuples (ip, mac, mode, link) representing
 963       the NICs the instance has
 964   @type disk_template: string
 965   @param disk_template: the disk template of the instance
 966   @type disks: list
 967   @param disks: the list of (size, mode) pairs
 968   @type bep: dict
 969   @param bep: the backend parameters for the instance
 970   @type hvp: dict
 971   @param hvp: the hypervisor parameters for the instance
 972   @type hypervisor_name: string
 973   @param hypervisor_name: the hypervisor for the instance
 974   @type tags: list
 975   @param tags: list of instance tags as strings
 976   @rtype: dict
 977   @return: the hook environment for this instance
 978
 979   """
 980   if status:
 981     str_status = "up"
 982   else:
 983     str_status = "down"
 984   env = {
 985     "OP_TARGET": name,
 986     "INSTANCE_NAME": name,
 987     "INSTANCE_PRIMARY": primary_node,
 988     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 989     "INSTANCE_OS_TYPE": os_type,
 990     "INSTANCE_STATUS": str_status,
 991     "INSTANCE_MEMORY": memory,
 992     "INSTANCE_VCPUS": vcpus,
 993     "INSTANCE_DISK_TEMPLATE": disk_template,
 994     "INSTANCE_HYPERVISOR": hypervisor_name,
 995   }
 996
 997   if nics:
 998     nic_count = len(nics)
 999     for idx, (ip, mac, mode, link) in enumerate(nics):
1000       if ip is None:
1001         ip = ""
1002       env["INSTANCE_NIC%d_IP" % idx] = ip
1003       env["INSTANCE_NIC%d_MAC" % idx] = mac
1004       env["INSTANCE_NIC%d_MODE" % idx] = mode
1005       env["INSTANCE_NIC%d_LINK" % idx] = link
1006       if mode == constants.NIC_MODE_BRIDGED:
1007         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1008   else:
1009     nic_count = 0
1010
1011   env["INSTANCE_NIC_COUNT"] = nic_count
1012
1013   if disks:
1014     disk_count = len(disks)
1015     for idx, (size, mode) in enumerate(disks):
1016       env["INSTANCE_DISK%d_SIZE" % idx] = size
1017       env["INSTANCE_DISK%d_MODE" % idx] = mode
1018   else:
1019     disk_count = 0
1020
1021   env["INSTANCE_DISK_COUNT"] = disk_count
1022
1023   if not tags:
1024     tags = []
1025
1026   env["INSTANCE_TAGS"] = " ".join(tags)
1027
1028   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1029     for key, value in source.items():
1030       env["INSTANCE_%s_%s" % (kind, key)] = value
1031
1032   return env
1033
1034
1035 def _NICListToTuple(lu, nics):
1036   """Build a list of nic information tuples.
1037
1038   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1039   value in LUInstanceQueryData.
1040
1041   @type lu:  L{LogicalUnit}
1042   @param lu: the logical unit on whose behalf we execute
1043   @type nics: list of L{objects.NIC}
1044   @param nics: list of nics to convert to hooks tuples
1045
1046   """
1047   hooks_nics = []
1048   cluster = lu.cfg.GetClusterInfo()
1049   for nic in nics:
1050     ip = nic.ip
1051     mac = nic.mac
1052     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1053     mode = filled_params[constants.NIC_MODE]
1054     link = filled_params[constants.NIC_LINK]
1055     hooks_nics.append((ip, mac, mode, link))
1056   return hooks_nics
1057
1058
1059 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1060   """Builds instance related env variables for hooks from an object.
1061
1062   @type lu: L{LogicalUnit}
1063   @param lu: the logical unit on whose behalf we execute
1064   @type instance: L{objects.Instance}
1065   @param instance: the instance for which we should build the
1066       environment
1067   @type override: dict
1068   @param override: dictionary with key/values that will override
1069       our values
1070   @rtype: dict
1071   @return: the hook environment dictionary
1072
1073   """
1074   cluster = lu.cfg.GetClusterInfo()
1075   bep = cluster.FillBE(instance)
1076   hvp = cluster.FillHV(instance)
1077   args = {
1078     "name": instance.name,
1079     "primary_node": instance.primary_node,
1080     "secondary_nodes": instance.secondary_nodes,
1081     "os_type": instance.os,
1082     "status": instance.admin_up,
1083     "memory": bep[constants.BE_MEMORY],
1084     "vcpus": bep[constants.BE_VCPUS],
1085     "nics": _NICListToTuple(lu, instance.nics),
1086     "disk_template": instance.disk_template,
1087     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1088     "bep": bep,
1089     "hvp": hvp,
1090     "hypervisor_name": instance.hypervisor,
1091     "tags": instance.tags,
1092   }
1093   if override:
1094     args.update(override)
1095   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1096
1097
1098 def _AdjustCandidatePool(lu, exceptions):
1099   """Adjust the candidate pool after node operations.
1100
1101   """
1102   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1103   if mod_list:
1104     lu.LogInfo("Promoted nodes to master candidate role: %s",
1105                utils.CommaJoin(node.name for node in mod_list))
1106     for name in mod_list:
1107       lu.context.ReaddNode(name)
1108   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1109   if mc_now > mc_max:
1110     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1111                (mc_now, mc_max))
1112
1113
1114 def _DecideSelfPromotion(lu, exceptions=None):
1115   """Decide whether I should promote myself as a master candidate.
1116
1117   """
1118   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1119   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1120   # the new node will increase mc_max with one, so:
1121   mc_should = min(mc_should + 1, cp_size)
1122   return mc_now < mc_should
1123
1124
1125 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1126   """Check that the brigdes needed by a list of nics exist.
1127
1128   """
1129   cluster = lu.cfg.GetClusterInfo()
1130   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1131   brlist = [params[constants.NIC_LINK] for params in paramslist
1132             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1133   if brlist:
1134     result = lu.rpc.call_bridges_exist(target_node, brlist)
1135     result.Raise("Error checking bridges on destination node '%s'" %
1136                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1137
1138
1139 def _CheckInstanceBridgesExist(lu, instance, node=None):
1140   """Check that the brigdes needed by an instance exist.
1141
1142   """
1143   if node is None:
1144     node = instance.primary_node
1145   _CheckNicsBridgesExist(lu, instance.nics, node)
1146
1147
1148 def _CheckOSVariant(os_obj, name):
1149   """Check whether an OS name conforms to the os variants specification.
1150
1151   @type os_obj: L{objects.OS}
1152   @param os_obj: OS object to check
1153   @type name: string
1154   @param name: OS name passed by the user, to check for validity
1155
1156   """
1157   variant = objects.OS.GetVariant(name)
1158   if not os_obj.supported_variants:
1159     if variant:
1160       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1161                                  " passed)" % (os_obj.name, variant),
1162                                  errors.ECODE_INVAL)
1163     return
1164   if not variant:
1165     raise errors.OpPrereqError("OS name must include a variant",
1166                                errors.ECODE_INVAL)
1167
1168   if variant not in os_obj.supported_variants:
1169     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1170
1171
1172 def _GetNodeInstancesInner(cfg, fn):
1173   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1174
1175
1176 def _GetNodeInstances(cfg, node_name):
1177   """Returns a list of all primary and secondary instances on a node.
1178
1179   """
1180
1181   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1182
1183
1184 def _GetNodePrimaryInstances(cfg, node_name):
1185   """Returns primary instances on a node.
1186
1187   """
1188   return _GetNodeInstancesInner(cfg,
1189                                 lambda inst: node_name == inst.primary_node)
1190
1191
1192 def _GetNodeSecondaryInstances(cfg, node_name):
1193   """Returns secondary instances on a node.
1194
1195   """
1196   return _GetNodeInstancesInner(cfg,
1197                                 lambda inst: node_name in inst.secondary_nodes)
1198
1199
1200 def _GetStorageTypeArgs(cfg, storage_type):
1201   """Returns the arguments for a storage type.
1202
1203   """
1204   # Special case for file storage
1205   if storage_type == constants.ST_FILE:
1206     # storage.FileStorage wants a list of storage directories
1207     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1208
1209   return []
1210
1211
1212 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1213   faulty = []
1214
1215   for dev in instance.disks:
1216     cfg.SetDiskID(dev, node_name)
1217
1218   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1219   result.Raise("Failed to get disk status from node %s" % node_name,
1220                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1221
1222   for idx, bdev_status in enumerate(result.payload):
1223     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224       faulty.append(idx)
1225
1226   return faulty
1227
1228
1229 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1230   """Check the sanity of iallocator and node arguments and use the
1231   cluster-wide iallocator if appropriate.
1232
1233   Check that at most one of (iallocator, node) is specified. If none is
1234   specified, then the LU's opcode's iallocator slot is filled with the
1235   cluster-wide default iallocator.
1236
1237   @type iallocator_slot: string
1238   @param iallocator_slot: the name of the opcode iallocator slot
1239   @type node_slot: string
1240   @param node_slot: the name of the opcode target node slot
1241
1242   """
1243   node = getattr(lu.op, node_slot, None)
1244   iallocator = getattr(lu.op, iallocator_slot, None)
1245
1246   if node is not None and iallocator is not None:
1247     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1248                                errors.ECODE_INVAL)
1249   elif node is None and iallocator is None:
1250     default_iallocator = lu.cfg.GetDefaultIAllocator()
1251     if default_iallocator:
1252       setattr(lu.op, iallocator_slot, default_iallocator)
1253     else:
1254       raise errors.OpPrereqError("No iallocator or node given and no"
1255                                  " cluster-wide default iallocator found;"
1256                                  " please specify either an iallocator or a"
1257                                  " node, or set a cluster-wide default"
1258                                  " iallocator")
1259
1260
1261 def _GetDefaultIAllocator(cfg, iallocator):
1262   """Decides on which iallocator to use.
1263
1264   @type cfg: L{config.ConfigWriter}
1265   @param cfg: Cluster configuration object
1266   @type iallocator: string or None
1267   @param iallocator: Iallocator specified in opcode
1268   @rtype: string
1269   @return: Iallocator name
1270
1271   """
1272   if not iallocator:
1273     # Use default iallocator
1274     iallocator = cfg.GetDefaultIAllocator()
1275
1276   if not iallocator:
1277     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1278                                " opcode nor as a cluster-wide default",
1279                                errors.ECODE_INVAL)
1280
1281   return iallocator
1282
1283
1284 class LUClusterPostInit(LogicalUnit):
1285   """Logical unit for running hooks after cluster initialization.
1286
1287   """
1288   HPATH = "cluster-init"
1289   HTYPE = constants.HTYPE_CLUSTER
1290
1291   def BuildHooksEnv(self):
1292     """Build hooks env.
1293
1294     """
1295     return {
1296       "OP_TARGET": self.cfg.GetClusterName(),
1297       }
1298
1299   def BuildHooksNodes(self):
1300     """Build hooks nodes.
1301
1302     """
1303     return ([], [self.cfg.GetMasterNode()])
1304
1305   def Exec(self, feedback_fn):
1306     """Nothing to do.
1307
1308     """
1309     return True
1310
1311
1312 class LUClusterDestroy(LogicalUnit):
1313   """Logical unit for destroying the cluster.
1314
1315   """
1316   HPATH = "cluster-destroy"
1317   HTYPE = constants.HTYPE_CLUSTER
1318
1319   def BuildHooksEnv(self):
1320     """Build hooks env.
1321
1322     """
1323     return {
1324       "OP_TARGET": self.cfg.GetClusterName(),
1325       }
1326
1327   def BuildHooksNodes(self):
1328     """Build hooks nodes.
1329
1330     """
1331     return ([], [])
1332
1333   def CheckPrereq(self):
1334     """Check prerequisites.
1335
1336     This checks whether the cluster is empty.
1337
1338     Any errors are signaled by raising errors.OpPrereqError.
1339
1340     """
1341     master = self.cfg.GetMasterNode()
1342
1343     nodelist = self.cfg.GetNodeList()
1344     if len(nodelist) != 1 or nodelist[0] != master:
1345       raise errors.OpPrereqError("There are still %d node(s) in"
1346                                  " this cluster." % (len(nodelist) - 1),
1347                                  errors.ECODE_INVAL)
1348     instancelist = self.cfg.GetInstanceList()
1349     if instancelist:
1350       raise errors.OpPrereqError("There are still %d instance(s) in"
1351                                  " this cluster." % len(instancelist),
1352                                  errors.ECODE_INVAL)
1353
1354   def Exec(self, feedback_fn):
1355     """Destroys the cluster.
1356
1357     """
1358     master_params = self.cfg.GetMasterNetworkParameters()
1359
1360     # Run post hooks on master node before it's removed
1361     _RunPostHook(self, master_params.name)
1362
1363     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1364                                                      master_params.ip,
1365                                                      master_params.netmask,
1366                                                      master_params.netdev,
1367                                                      master_params.ip_family)
1368     result.Raise("Could not disable the master role")
1369
1370     return master_params.name
1371
1372
1373 def _VerifyCertificate(filename):
1374   """Verifies a certificate for L{LUClusterVerifyConfig}.
1375
1376   @type filename: string
1377   @param filename: Path to PEM file
1378
1379   """
1380   try:
1381     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1382                                            utils.ReadFile(filename))
1383   except Exception, err: # pylint: disable=W0703
1384     return (LUClusterVerifyConfig.ETYPE_ERROR,
1385             "Failed to load X509 certificate %s: %s" % (filename, err))
1386
1387   (errcode, msg) = \
1388     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1389                                 constants.SSL_CERT_EXPIRATION_ERROR)
1390
1391   if msg:
1392     fnamemsg = "While verifying %s: %s" % (filename, msg)
1393   else:
1394     fnamemsg = None
1395
1396   if errcode is None:
1397     return (None, fnamemsg)
1398   elif errcode == utils.CERT_WARNING:
1399     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1400   elif errcode == utils.CERT_ERROR:
1401     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1402
1403   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1404
1405
1406 def _GetAllHypervisorParameters(cluster, instances):
1407   """Compute the set of all hypervisor parameters.
1408
1409   @type cluster: L{objects.Cluster}
1410   @param cluster: the cluster object
1411   @param instances: list of L{objects.Instance}
1412   @param instances: additional instances from which to obtain parameters
1413   @rtype: list of (origin, hypervisor, parameters)
1414   @return: a list with all parameters found, indicating the hypervisor they
1415        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1416
1417   """
1418   hvp_data = []
1419
1420   for hv_name in cluster.enabled_hypervisors:
1421     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1422
1423   for os_name, os_hvp in cluster.os_hvp.items():
1424     for hv_name, hv_params in os_hvp.items():
1425       if hv_params:
1426         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1427         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1428
1429   # TODO: collapse identical parameter values in a single one
1430   for instance in instances:
1431     if instance.hvparams:
1432       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1433                        cluster.FillHV(instance)))
1434
1435   return hvp_data
1436
1437
1438 class _VerifyErrors(object):
1439   """Mix-in for cluster/group verify LUs.
1440
1441   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1442   self.op and self._feedback_fn to be available.)
1443
1444   """
1445
1446   ETYPE_FIELD = "code"
1447   ETYPE_ERROR = "ERROR"
1448   ETYPE_WARNING = "WARNING"
1449
1450   def _Error(self, ecode, item, msg, *args, **kwargs):
1451     """Format an error message.
1452
1453     Based on the opcode's error_codes parameter, either format a
1454     parseable error code, or a simpler error string.
1455
1456     This must be called only from Exec and functions called from Exec.
1457
1458     """
1459     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1460     itype, etxt, _ = ecode
1461     # first complete the msg
1462     if args:
1463       msg = msg % args
1464     # then format the whole message
1465     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1466       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1467     else:
1468       if item:
1469         item = " " + item
1470       else:
1471         item = ""
1472       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1473     # and finally report it via the feedback_fn
1474     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1475
1476   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1477     """Log an error message if the passed condition is True.
1478
1479     """
1480     cond = (bool(cond)
1481             or self.op.debug_simulate_errors) # pylint: disable=E1101
1482
1483     # If the error code is in the list of ignored errors, demote the error to a
1484     # warning
1485     (_, etxt, _) = ecode
1486     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1487       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1488
1489     if cond:
1490       self._Error(ecode, *args, **kwargs)
1491
1492     # do not mark the operation as failed for WARN cases only
1493     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1494       self.bad = self.bad or cond
1495
1496
1497 class LUClusterVerify(NoHooksLU):
1498   """Submits all jobs necessary to verify the cluster.
1499
1500   """
1501   REQ_BGL = False
1502
1503   def ExpandNames(self):
1504     self.needed_locks = {}
1505
1506   def Exec(self, feedback_fn):
1507     jobs = []
1508
1509     if self.op.group_name:
1510       groups = [self.op.group_name]
1511       depends_fn = lambda: None
1512     else:
1513       groups = self.cfg.GetNodeGroupList()
1514
1515       # Verify global configuration
1516       jobs.append([
1517         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1518         ])
1519
1520       # Always depend on global verification
1521       depends_fn = lambda: [(-len(jobs), [])]
1522
1523     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1524                                             ignore_errors=self.op.ignore_errors,
1525                                             depends=depends_fn())]
1526                 for group in groups)
1527
1528     # Fix up all parameters
1529     for op in itertools.chain(*jobs): # pylint: disable=W0142
1530       op.debug_simulate_errors = self.op.debug_simulate_errors
1531       op.verbose = self.op.verbose
1532       op.error_codes = self.op.error_codes
1533       try:
1534         op.skip_checks = self.op.skip_checks
1535       except AttributeError:
1536         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1537
1538     return ResultWithJobs(jobs)
1539
1540
1541 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1542   """Verifies the cluster config.
1543
1544   """
1545   REQ_BGL = True
1546
1547   def _VerifyHVP(self, hvp_data):
1548     """Verifies locally the syntax of the hypervisor parameters.
1549
1550     """
1551     for item, hv_name, hv_params in hvp_data:
1552       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1553              (item, hv_name))
1554       try:
1555         hv_class = hypervisor.GetHypervisor(hv_name)
1556         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1557         hv_class.CheckParameterSyntax(hv_params)
1558       except errors.GenericError, err:
1559         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1560
1561   def ExpandNames(self):
1562     # Information can be safely retrieved as the BGL is acquired in exclusive
1563     # mode
1564     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1565     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1566     self.all_node_info = self.cfg.GetAllNodesInfo()
1567     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1568     self.needed_locks = {}
1569
1570   def Exec(self, feedback_fn):
1571     """Verify integrity of cluster, performing various test on nodes.
1572
1573     """
1574     self.bad = False
1575     self._feedback_fn = feedback_fn
1576
1577     feedback_fn("* Verifying cluster config")
1578
1579     for msg in self.cfg.VerifyConfig():
1580       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1581
1582     feedback_fn("* Verifying cluster certificate files")
1583
1584     for cert_filename in constants.ALL_CERT_FILES:
1585       (errcode, msg) = _VerifyCertificate(cert_filename)
1586       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1587
1588     feedback_fn("* Verifying hypervisor parameters")
1589
1590     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1591                                                 self.all_inst_info.values()))
1592
1593     feedback_fn("* Verifying all nodes belong to an existing group")
1594
1595     # We do this verification here because, should this bogus circumstance
1596     # occur, it would never be caught by VerifyGroup, which only acts on
1597     # nodes/instances reachable from existing node groups.
1598
1599     dangling_nodes = set(node.name for node in self.all_node_info.values()
1600                          if node.group not in self.all_group_info)
1601
1602     dangling_instances = {}
1603     no_node_instances = []
1604
1605     for inst in self.all_inst_info.values():
1606       if inst.primary_node in dangling_nodes:
1607         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1608       elif inst.primary_node not in self.all_node_info:
1609         no_node_instances.append(inst.name)
1610
1611     pretty_dangling = [
1612         "%s (%s)" %
1613         (node.name,
1614          utils.CommaJoin(dangling_instances.get(node.name,
1615                                                 ["no instances"])))
1616         for node in dangling_nodes]
1617
1618     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1619                   None,
1620                   "the following nodes (and their instances) belong to a non"
1621                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1622
1623     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1624                   None,
1625                   "the following instances have a non-existing primary-node:"
1626                   " %s", utils.CommaJoin(no_node_instances))
1627
1628     return not self.bad
1629
1630
1631 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1632   """Verifies the status of a node group.
1633
1634   """
1635   HPATH = "cluster-verify"
1636   HTYPE = constants.HTYPE_CLUSTER
1637   REQ_BGL = False
1638
1639   _HOOKS_INDENT_RE = re.compile("^", re.M)
1640
1641   class NodeImage(object):
1642     """A class representing the logical and physical status of a node.
1643
1644     @type name: string
1645     @ivar name: the node name to which this object refers
1646     @ivar volumes: a structure as returned from
1647         L{ganeti.backend.GetVolumeList} (runtime)
1648     @ivar instances: a list of running instances (runtime)
1649     @ivar pinst: list of configured primary instances (config)
1650     @ivar sinst: list of configured secondary instances (config)
1651     @ivar sbp: dictionary of {primary-node: list of instances} for all
1652         instances for which this node is secondary (config)
1653     @ivar mfree: free memory, as reported by hypervisor (runtime)
1654     @ivar dfree: free disk, as reported by the node (runtime)
1655     @ivar offline: the offline status (config)
1656     @type rpc_fail: boolean
1657     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1658         not whether the individual keys were correct) (runtime)
1659     @type lvm_fail: boolean
1660     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1661     @type hyp_fail: boolean
1662     @ivar hyp_fail: whether the RPC call didn't return the instance list
1663     @type ghost: boolean
1664     @ivar ghost: whether this is a known node or not (config)
1665     @type os_fail: boolean
1666     @ivar os_fail: whether the RPC call didn't return valid OS data
1667     @type oslist: list
1668     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1669     @type vm_capable: boolean
1670     @ivar vm_capable: whether the node can host instances
1671
1672     """
1673     def __init__(self, offline=False, name=None, vm_capable=True):
1674       self.name = name
1675       self.volumes = {}
1676       self.instances = []
1677       self.pinst = []
1678       self.sinst = []
1679       self.sbp = {}
1680       self.mfree = 0
1681       self.dfree = 0
1682       self.offline = offline
1683       self.vm_capable = vm_capable
1684       self.rpc_fail = False
1685       self.lvm_fail = False
1686       self.hyp_fail = False
1687       self.ghost = False
1688       self.os_fail = False
1689       self.oslist = {}
1690
1691   def ExpandNames(self):
1692     # This raises errors.OpPrereqError on its own:
1693     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1694
1695     # Get instances in node group; this is unsafe and needs verification later
1696     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1697
1698     self.needed_locks = {
1699       locking.LEVEL_INSTANCE: inst_names,
1700       locking.LEVEL_NODEGROUP: [self.group_uuid],
1701       locking.LEVEL_NODE: [],
1702       }
1703
1704     self.share_locks = _ShareAll()
1705
1706   def DeclareLocks(self, level):
1707     if level == locking.LEVEL_NODE:
1708       # Get members of node group; this is unsafe and needs verification later
1709       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1710
1711       all_inst_info = self.cfg.GetAllInstancesInfo()
1712
1713       # In Exec(), we warn about mirrored instances that have primary and
1714       # secondary living in separate node groups. To fully verify that
1715       # volumes for these instances are healthy, we will need to do an
1716       # extra call to their secondaries. We ensure here those nodes will
1717       # be locked.
1718       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1719         # Important: access only the instances whose lock is owned
1720         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1721           nodes.update(all_inst_info[inst].secondary_nodes)
1722
1723       self.needed_locks[locking.LEVEL_NODE] = nodes
1724
1725   def CheckPrereq(self):
1726     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1727     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1728
1729     group_nodes = set(self.group_info.members)
1730     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1731
1732     unlocked_nodes = \
1733         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1734
1735     unlocked_instances = \
1736         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1737
1738     if unlocked_nodes:
1739       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1740                                  utils.CommaJoin(unlocked_nodes))
1741
1742     if unlocked_instances:
1743       raise errors.OpPrereqError("Missing lock for instances: %s" %
1744                                  utils.CommaJoin(unlocked_instances))
1745
1746     self.all_node_info = self.cfg.GetAllNodesInfo()
1747     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1748
1749     self.my_node_names = utils.NiceSort(group_nodes)
1750     self.my_inst_names = utils.NiceSort(group_instances)
1751
1752     self.my_node_info = dict((name, self.all_node_info[name])
1753                              for name in self.my_node_names)
1754
1755     self.my_inst_info = dict((name, self.all_inst_info[name])
1756                              for name in self.my_inst_names)
1757
1758     # We detect here the nodes that will need the extra RPC calls for verifying
1759     # split LV volumes; they should be locked.
1760     extra_lv_nodes = set()
1761
1762     for inst in self.my_inst_info.values():
1763       if inst.disk_template in constants.DTS_INT_MIRROR:
1764         group = self.my_node_info[inst.primary_node].group
1765         for nname in inst.secondary_nodes:
1766           if self.all_node_info[nname].group != group:
1767             extra_lv_nodes.add(nname)
1768
1769     unlocked_lv_nodes = \
1770         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1771
1772     if unlocked_lv_nodes:
1773       raise errors.OpPrereqError("these nodes could be locked: %s" %
1774                                  utils.CommaJoin(unlocked_lv_nodes))
1775     self.extra_lv_nodes = list(extra_lv_nodes)
1776
1777   def _VerifyNode(self, ninfo, nresult):
1778     """Perform some basic validation on data returned from a node.
1779
1780       - check the result data structure is well formed and has all the
1781         mandatory fields
1782       - check ganeti version
1783
1784     @type ninfo: L{objects.Node}
1785     @param ninfo: the node to check
1786     @param nresult: the results from the node
1787     @rtype: boolean
1788     @return: whether overall this call was successful (and we can expect
1789          reasonable values in the respose)
1790
1791     """
1792     node = ninfo.name
1793     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1794
1795     # main result, nresult should be a non-empty dict
1796     test = not nresult or not isinstance(nresult, dict)
1797     _ErrorIf(test, constants.CV_ENODERPC, node,
1798                   "unable to verify node: no data returned")
1799     if test:
1800       return False
1801
1802     # compares ganeti version
1803     local_version = constants.PROTOCOL_VERSION
1804     remote_version = nresult.get("version", None)
1805     test = not (remote_version and
1806                 isinstance(remote_version, (list, tuple)) and
1807                 len(remote_version) == 2)
1808     _ErrorIf(test, constants.CV_ENODERPC, node,
1809              "connection to node returned invalid data")
1810     if test:
1811       return False
1812
1813     test = local_version != remote_version[0]
1814     _ErrorIf(test, constants.CV_ENODEVERSION, node,
1815              "incompatible protocol versions: master %s,"
1816              " node %s", local_version, remote_version[0])
1817     if test:
1818       return False
1819
1820     # node seems compatible, we can actually try to look into its results
1821
1822     # full package version
1823     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1824                   constants.CV_ENODEVERSION, node,
1825                   "software version mismatch: master %s, node %s",
1826                   constants.RELEASE_VERSION, remote_version[1],
1827                   code=self.ETYPE_WARNING)
1828
1829     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1830     if ninfo.vm_capable and isinstance(hyp_result, dict):
1831       for hv_name, hv_result in hyp_result.iteritems():
1832         test = hv_result is not None
1833         _ErrorIf(test, constants.CV_ENODEHV, node,
1834                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1835
1836     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1837     if ninfo.vm_capable and isinstance(hvp_result, list):
1838       for item, hv_name, hv_result in hvp_result:
1839         _ErrorIf(True, constants.CV_ENODEHV, node,
1840                  "hypervisor %s parameter verify failure (source %s): %s",
1841                  hv_name, item, hv_result)
1842
1843     test = nresult.get(constants.NV_NODESETUP,
1844                        ["Missing NODESETUP results"])
1845     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1846              "; ".join(test))
1847
1848     return True
1849
1850   def _VerifyNodeTime(self, ninfo, nresult,
1851                       nvinfo_starttime, nvinfo_endtime):
1852     """Check the node time.
1853
1854     @type ninfo: L{objects.Node}
1855     @param ninfo: the node to check
1856     @param nresult: the remote results for the node
1857     @param nvinfo_starttime: the start time of the RPC call
1858     @param nvinfo_endtime: the end time of the RPC call
1859
1860     """
1861     node = ninfo.name
1862     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1863
1864     ntime = nresult.get(constants.NV_TIME, None)
1865     try:
1866       ntime_merged = utils.MergeTime(ntime)
1867     except (ValueError, TypeError):
1868       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1869       return
1870
1871     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1872       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1873     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1874       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1875     else:
1876       ntime_diff = None
1877
1878     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1879              "Node time diverges by at least %s from master node time",
1880              ntime_diff)
1881
1882   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1883     """Check the node LVM results.
1884
1885     @type ninfo: L{objects.Node}
1886     @param ninfo: the node to check
1887     @param nresult: the remote results for the node
1888     @param vg_name: the configured VG name
1889
1890     """
1891     if vg_name is None:
1892       return
1893
1894     node = ninfo.name
1895     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1896
1897     # checks vg existence and size > 20G
1898     vglist = nresult.get(constants.NV_VGLIST, None)
1899     test = not vglist
1900     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1901     if not test:
1902       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1903                                             constants.MIN_VG_SIZE)
1904       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1905
1906     # check pv names
1907     pvlist = nresult.get(constants.NV_PVLIST, None)
1908     test = pvlist is None
1909     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1910     if not test:
1911       # check that ':' is not present in PV names, since it's a
1912       # special character for lvcreate (denotes the range of PEs to
1913       # use on the PV)
1914       for _, pvname, owner_vg in pvlist:
1915         test = ":" in pvname
1916         _ErrorIf(test, constants.CV_ENODELVM, node,
1917                  "Invalid character ':' in PV '%s' of VG '%s'",
1918                  pvname, owner_vg)
1919
1920   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1921     """Check the node bridges.
1922
1923     @type ninfo: L{objects.Node}
1924     @param ninfo: the node to check
1925     @param nresult: the remote results for the node
1926     @param bridges: the expected list of bridges
1927
1928     """
1929     if not bridges:
1930       return
1931
1932     node = ninfo.name
1933     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1934
1935     missing = nresult.get(constants.NV_BRIDGES, None)
1936     test = not isinstance(missing, list)
1937     _ErrorIf(test, constants.CV_ENODENET, node,
1938              "did not return valid bridge information")
1939     if not test:
1940       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1941                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1942
1943   def _VerifyNodeNetwork(self, ninfo, nresult):
1944     """Check the node network connectivity results.
1945
1946     @type ninfo: L{objects.Node}
1947     @param ninfo: the node to check
1948     @param nresult: the remote results for the node
1949
1950     """
1951     node = ninfo.name
1952     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1953
1954     test = constants.NV_NODELIST not in nresult
1955     _ErrorIf(test, constants.CV_ENODESSH, node,
1956              "node hasn't returned node ssh connectivity data")
1957     if not test:
1958       if nresult[constants.NV_NODELIST]:
1959         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1960           _ErrorIf(True, constants.CV_ENODESSH, node,
1961                    "ssh communication with node '%s': %s", a_node, a_msg)
1962
1963     test = constants.NV_NODENETTEST not in nresult
1964     _ErrorIf(test, constants.CV_ENODENET, node,
1965              "node hasn't returned node tcp connectivity data")
1966     if not test:
1967       if nresult[constants.NV_NODENETTEST]:
1968         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1969         for anode in nlist:
1970           _ErrorIf(True, constants.CV_ENODENET, node,
1971                    "tcp communication with node '%s': %s",
1972                    anode, nresult[constants.NV_NODENETTEST][anode])
1973
1974     test = constants.NV_MASTERIP not in nresult
1975     _ErrorIf(test, constants.CV_ENODENET, node,
1976              "node hasn't returned node master IP reachability data")
1977     if not test:
1978       if not nresult[constants.NV_MASTERIP]:
1979         if node == self.master_node:
1980           msg = "the master node cannot reach the master IP (not configured?)"
1981         else:
1982           msg = "cannot reach the master IP"
1983         _ErrorIf(True, constants.CV_ENODENET, node, msg)
1984
1985   def _VerifyInstance(self, instance, instanceconfig, node_image,
1986                       diskstatus):
1987     """Verify an instance.
1988
1989     This function checks to see if the required block devices are
1990     available on the instance's node.
1991
1992     """
1993     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1994     node_current = instanceconfig.primary_node
1995
1996     node_vol_should = {}
1997     instanceconfig.MapLVsByNode(node_vol_should)
1998
1999     for node in node_vol_should:
2000       n_img = node_image[node]
2001       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2002         # ignore missing volumes on offline or broken nodes
2003         continue
2004       for volume in node_vol_should[node]:
2005         test = volume not in n_img.volumes
2006         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2007                  "volume %s missing on node %s", volume, node)
2008
2009     if instanceconfig.admin_up:
2010       pri_img = node_image[node_current]
2011       test = instance not in pri_img.instances and not pri_img.offline
2012       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2013                "instance not running on its primary node %s",
2014                node_current)
2015
2016     diskdata = [(nname, success, status, idx)
2017                 for (nname, disks) in diskstatus.items()
2018                 for idx, (success, status) in enumerate(disks)]
2019
2020     for nname, success, bdev_status, idx in diskdata:
2021       # the 'ghost node' construction in Exec() ensures that we have a
2022       # node here
2023       snode = node_image[nname]
2024       bad_snode = snode.ghost or snode.offline
2025       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2026                constants.CV_EINSTANCEFAULTYDISK, instance,
2027                "couldn't retrieve status for disk/%s on %s: %s",
2028                idx, nname, bdev_status)
2029       _ErrorIf((instanceconfig.admin_up and success and
2030                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2031                constants.CV_EINSTANCEFAULTYDISK, instance,
2032                "disk/%s on %s is faulty", idx, nname)
2033
2034   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2035     """Verify if there are any unknown volumes in the cluster.
2036
2037     The .os, .swap and backup volumes are ignored. All other volumes are
2038     reported as unknown.
2039
2040     @type reserved: L{ganeti.utils.FieldSet}
2041     @param reserved: a FieldSet of reserved volume names
2042
2043     """
2044     for node, n_img in node_image.items():
2045       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2046         # skip non-healthy nodes
2047         continue
2048       for volume in n_img.volumes:
2049         test = ((node not in node_vol_should or
2050                 volume not in node_vol_should[node]) and
2051                 not reserved.Matches(volume))
2052         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2053                       "volume %s is unknown", volume)
2054
2055   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2056     """Verify N+1 Memory Resilience.
2057
2058     Check that if one single node dies we can still start all the
2059     instances it was primary for.
2060
2061     """
2062     cluster_info = self.cfg.GetClusterInfo()
2063     for node, n_img in node_image.items():
2064       # This code checks that every node which is now listed as
2065       # secondary has enough memory to host all instances it is
2066       # supposed to should a single other node in the cluster fail.
2067       # FIXME: not ready for failover to an arbitrary node
2068       # FIXME: does not support file-backed instances
2069       # WARNING: we currently take into account down instances as well
2070       # as up ones, considering that even if they're down someone
2071       # might want to start them even in the event of a node failure.
2072       if n_img.offline:
2073         # we're skipping offline nodes from the N+1 warning, since
2074         # most likely we don't have good memory infromation from them;
2075         # we already list instances living on such nodes, and that's
2076         # enough warning
2077         continue
2078       for prinode, instances in n_img.sbp.items():
2079         needed_mem = 0
2080         for instance in instances:
2081           bep = cluster_info.FillBE(instance_cfg[instance])
2082           if bep[constants.BE_AUTO_BALANCE]:
2083             needed_mem += bep[constants.BE_MEMORY]
2084         test = n_img.mfree < needed_mem
2085         self._ErrorIf(test, constants.CV_ENODEN1, node,
2086                       "not enough memory to accomodate instance failovers"
2087                       " should node %s fail (%dMiB needed, %dMiB available)",
2088                       prinode, needed_mem, n_img.mfree)
2089
2090   @classmethod
2091   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2092                    (files_all, files_opt, files_mc, files_vm)):
2093     """Verifies file checksums collected from all nodes.
2094
2095     @param errorif: Callback for reporting errors
2096     @param nodeinfo: List of L{objects.Node} objects
2097     @param master_node: Name of master node
2098     @param all_nvinfo: RPC results
2099
2100     """
2101     # Define functions determining which nodes to consider for a file
2102     files2nodefn = [
2103       (files_all, None),
2104       (files_mc, lambda node: (node.master_candidate or
2105                                node.name == master_node)),
2106       (files_vm, lambda node: node.vm_capable),
2107       ]
2108
2109     # Build mapping from filename to list of nodes which should have the file
2110     nodefiles = {}
2111     for (files, fn) in files2nodefn:
2112       if fn is None:
2113         filenodes = nodeinfo
2114       else:
2115         filenodes = filter(fn, nodeinfo)
2116       nodefiles.update((filename,
2117                         frozenset(map(operator.attrgetter("name"), filenodes)))
2118                        for filename in files)
2119
2120     assert set(nodefiles) == (files_all | files_mc | files_vm)
2121
2122     fileinfo = dict((filename, {}) for filename in nodefiles)
2123     ignore_nodes = set()
2124
2125     for node in nodeinfo:
2126       if node.offline:
2127         ignore_nodes.add(node.name)
2128         continue
2129
2130       nresult = all_nvinfo[node.name]
2131
2132       if nresult.fail_msg or not nresult.payload:
2133         node_files = None
2134       else:
2135         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2136
2137       test = not (node_files and isinstance(node_files, dict))
2138       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2139               "Node did not return file checksum data")
2140       if test:
2141         ignore_nodes.add(node.name)
2142         continue
2143
2144       # Build per-checksum mapping from filename to nodes having it
2145       for (filename, checksum) in node_files.items():
2146         assert filename in nodefiles
2147         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2148
2149     for (filename, checksums) in fileinfo.items():
2150       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2151
2152       # Nodes having the file
2153       with_file = frozenset(node_name
2154                             for nodes in fileinfo[filename].values()
2155                             for node_name in nodes) - ignore_nodes
2156
2157       expected_nodes = nodefiles[filename] - ignore_nodes
2158
2159       # Nodes missing file
2160       missing_file = expected_nodes - with_file
2161
2162       if filename in files_opt:
2163         # All or no nodes
2164         errorif(missing_file and missing_file != expected_nodes,
2165                 constants.CV_ECLUSTERFILECHECK, None,
2166                 "File %s is optional, but it must exist on all or no"
2167                 " nodes (not found on %s)",
2168                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2169       else:
2170         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2171                 "File %s is missing from node(s) %s", filename,
2172                 utils.CommaJoin(utils.NiceSort(missing_file)))
2173
2174         # Warn if a node has a file it shouldn't
2175         unexpected = with_file - expected_nodes
2176         errorif(unexpected,
2177                 constants.CV_ECLUSTERFILECHECK, None,
2178                 "File %s should not exist on node(s) %s",
2179                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2180
2181       # See if there are multiple versions of the file
2182       test = len(checksums) > 1
2183       if test:
2184         variants = ["variant %s on %s" %
2185                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2186                     for (idx, (checksum, nodes)) in
2187                       enumerate(sorted(checksums.items()))]
2188       else:
2189         variants = []
2190
2191       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2192               "File %s found with %s different checksums (%s)",
2193               filename, len(checksums), "; ".join(variants))
2194
2195   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2196                       drbd_map):
2197     """Verifies and the node DRBD status.
2198
2199     @type ninfo: L{objects.Node}
2200     @param ninfo: the node to check
2201     @param nresult: the remote results for the node
2202     @param instanceinfo: the dict of instances
2203     @param drbd_helper: the configured DRBD usermode helper
2204     @param drbd_map: the DRBD map as returned by
2205         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2206
2207     """
2208     node = ninfo.name
2209     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2210
2211     if drbd_helper:
2212       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2213       test = (helper_result == None)
2214       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2215                "no drbd usermode helper returned")
2216       if helper_result:
2217         status, payload = helper_result
2218         test = not status
2219         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2220                  "drbd usermode helper check unsuccessful: %s", payload)
2221         test = status and (payload != drbd_helper)
2222         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2223                  "wrong drbd usermode helper: %s", payload)
2224
2225     # compute the DRBD minors
2226     node_drbd = {}
2227     for minor, instance in drbd_map[node].items():
2228       test = instance not in instanceinfo
2229       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2230                "ghost instance '%s' in temporary DRBD map", instance)
2231         # ghost instance should not be running, but otherwise we
2232         # don't give double warnings (both ghost instance and
2233         # unallocated minor in use)
2234       if test:
2235         node_drbd[minor] = (instance, False)
2236       else:
2237         instance = instanceinfo[instance]
2238         node_drbd[minor] = (instance.name, instance.admin_up)
2239
2240     # and now check them
2241     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2242     test = not isinstance(used_minors, (tuple, list))
2243     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2244              "cannot parse drbd status file: %s", str(used_minors))
2245     if test:
2246       # we cannot check drbd status
2247       return
2248
2249     for minor, (iname, must_exist) in node_drbd.items():
2250       test = minor not in used_minors and must_exist
2251       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2252                "drbd minor %d of instance %s is not active", minor, iname)
2253     for minor in used_minors:
2254       test = minor not in node_drbd
2255       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2256                "unallocated drbd minor %d is in use", minor)
2257
2258   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2259     """Builds the node OS structures.
2260
2261     @type ninfo: L{objects.Node}
2262     @param ninfo: the node to check
2263     @param nresult: the remote results for the node
2264     @param nimg: the node image object
2265
2266     """
2267     node = ninfo.name
2268     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2269
2270     remote_os = nresult.get(constants.NV_OSLIST, None)
2271     test = (not isinstance(remote_os, list) or
2272             not compat.all(isinstance(v, list) and len(v) == 7
2273                            for v in remote_os))
2274
2275     _ErrorIf(test, constants.CV_ENODEOS, node,
2276              "node hasn't returned valid OS data")
2277
2278     nimg.os_fail = test
2279
2280     if test:
2281       return
2282
2283     os_dict = {}
2284
2285     for (name, os_path, status, diagnose,
2286          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2287
2288       if name not in os_dict:
2289         os_dict[name] = []
2290
2291       # parameters is a list of lists instead of list of tuples due to
2292       # JSON lacking a real tuple type, fix it:
2293       parameters = [tuple(v) for v in parameters]
2294       os_dict[name].append((os_path, status, diagnose,
2295                             set(variants), set(parameters), set(api_ver)))
2296
2297     nimg.oslist = os_dict
2298
2299   def _VerifyNodeOS(self, ninfo, nimg, base):
2300     """Verifies the node OS list.
2301
2302     @type ninfo: L{objects.Node}
2303     @param ninfo: the node to check
2304     @param nimg: the node image object
2305     @param base: the 'template' node we match against (e.g. from the master)
2306
2307     """
2308     node = ninfo.name
2309     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2310
2311     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2312
2313     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2314     for os_name, os_data in nimg.oslist.items():
2315       assert os_data, "Empty OS status for OS %s?!" % os_name
2316       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2317       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2318                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2319       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2320                "OS '%s' has multiple entries (first one shadows the rest): %s",
2321                os_name, utils.CommaJoin([v[0] for v in os_data]))
2322       # comparisons with the 'base' image
2323       test = os_name not in base.oslist
2324       _ErrorIf(test, constants.CV_ENODEOS, node,
2325                "Extra OS %s not present on reference node (%s)",
2326                os_name, base.name)
2327       if test:
2328         continue
2329       assert base.oslist[os_name], "Base node has empty OS status?"
2330       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2331       if not b_status:
2332         # base OS is invalid, skipping
2333         continue
2334       for kind, a, b in [("API version", f_api, b_api),
2335                          ("variants list", f_var, b_var),
2336                          ("parameters", beautify_params(f_param),
2337                           beautify_params(b_param))]:
2338         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2339                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2340                  kind, os_name, base.name,
2341                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2342
2343     # check any missing OSes
2344     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2345     _ErrorIf(missing, constants.CV_ENODEOS, node,
2346              "OSes present on reference node %s but missing on this node: %s",
2347              base.name, utils.CommaJoin(missing))
2348
2349   def _VerifyOob(self, ninfo, nresult):
2350     """Verifies out of band functionality of a node.
2351
2352     @type ninfo: L{objects.Node}
2353     @param ninfo: the node to check
2354     @param nresult: the remote results for the node
2355
2356     """
2357     node = ninfo.name
2358     # We just have to verify the paths on master and/or master candidates
2359     # as the oob helper is invoked on the master
2360     if ((ninfo.master_candidate or ninfo.master_capable) and
2361         constants.NV_OOB_PATHS in nresult):
2362       for path_result in nresult[constants.NV_OOB_PATHS]:
2363         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2364
2365   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2366     """Verifies and updates the node volume data.
2367
2368     This function will update a L{NodeImage}'s internal structures
2369     with data from the remote call.
2370
2371     @type ninfo: L{objects.Node}
2372     @param ninfo: the node to check
2373     @param nresult: the remote results for the node
2374     @param nimg: the node image object
2375     @param vg_name: the configured VG name
2376
2377     """
2378     node = ninfo.name
2379     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2380
2381     nimg.lvm_fail = True
2382     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2383     if vg_name is None:
2384       pass
2385     elif isinstance(lvdata, basestring):
2386       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2387                utils.SafeEncode(lvdata))
2388     elif not isinstance(lvdata, dict):
2389       _ErrorIf(True, constants.CV_ENODELVM, node,
2390                "rpc call to node failed (lvlist)")
2391     else:
2392       nimg.volumes = lvdata
2393       nimg.lvm_fail = False
2394
2395   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2396     """Verifies and updates the node instance list.
2397
2398     If the listing was successful, then updates this node's instance
2399     list. Otherwise, it marks the RPC call as failed for the instance
2400     list key.
2401
2402     @type ninfo: L{objects.Node}
2403     @param ninfo: the node to check
2404     @param nresult: the remote results for the node
2405     @param nimg: the node image object
2406
2407     """
2408     idata = nresult.get(constants.NV_INSTANCELIST, None)
2409     test = not isinstance(idata, list)
2410     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2411                   "rpc call to node failed (instancelist): %s",
2412                   utils.SafeEncode(str(idata)))
2413     if test:
2414       nimg.hyp_fail = True
2415     else:
2416       nimg.instances = idata
2417
2418   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2419     """Verifies and computes a node information map
2420
2421     @type ninfo: L{objects.Node}
2422     @param ninfo: the node to check
2423     @param nresult: the remote results for the node
2424     @param nimg: the node image object
2425     @param vg_name: the configured VG name
2426
2427     """
2428     node = ninfo.name
2429     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2430
2431     # try to read free memory (from the hypervisor)
2432     hv_info = nresult.get(constants.NV_HVINFO, None)
2433     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2434     _ErrorIf(test, constants.CV_ENODEHV, node,
2435              "rpc call to node failed (hvinfo)")
2436     if not test:
2437       try:
2438         nimg.mfree = int(hv_info["memory_free"])
2439       except (ValueError, TypeError):
2440         _ErrorIf(True, constants.CV_ENODERPC, node,
2441                  "node returned invalid nodeinfo, check hypervisor")
2442
2443     # FIXME: devise a free space model for file based instances as well
2444     if vg_name is not None:
2445       test = (constants.NV_VGLIST not in nresult or
2446               vg_name not in nresult[constants.NV_VGLIST])
2447       _ErrorIf(test, constants.CV_ENODELVM, node,
2448                "node didn't return data for the volume group '%s'"
2449                " - it is either missing or broken", vg_name)
2450       if not test:
2451         try:
2452           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2453         except (ValueError, TypeError):
2454           _ErrorIf(True, constants.CV_ENODERPC, node,
2455                    "node returned invalid LVM info, check LVM status")
2456
2457   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2458     """Gets per-disk status information for all instances.
2459
2460     @type nodelist: list of strings
2461     @param nodelist: Node names
2462     @type node_image: dict of (name, L{objects.Node})
2463     @param node_image: Node objects
2464     @type instanceinfo: dict of (name, L{objects.Instance})
2465     @param instanceinfo: Instance objects
2466     @rtype: {instance: {node: [(succes, payload)]}}
2467     @return: a dictionary of per-instance dictionaries with nodes as
2468         keys and disk information as values; the disk information is a
2469         list of tuples (success, payload)
2470
2471     """
2472     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2473
2474     node_disks = {}
2475     node_disks_devonly = {}
2476     diskless_instances = set()
2477     diskless = constants.DT_DISKLESS
2478
2479     for nname in nodelist:
2480       node_instances = list(itertools.chain(node_image[nname].pinst,
2481                                             node_image[nname].sinst))
2482       diskless_instances.update(inst for inst in node_instances
2483                                 if instanceinfo[inst].disk_template == diskless)
2484       disks = [(inst, disk)
2485                for inst in node_instances
2486                for disk in instanceinfo[inst].disks]
2487
2488       if not disks:
2489         # No need to collect data
2490         continue
2491
2492       node_disks[nname] = disks
2493
2494       # Creating copies as SetDiskID below will modify the objects and that can
2495       # lead to incorrect data returned from nodes
2496       devonly = [dev.Copy() for (_, dev) in disks]
2497
2498       for dev in devonly:
2499         self.cfg.SetDiskID(dev, nname)
2500
2501       node_disks_devonly[nname] = devonly
2502
2503     assert len(node_disks) == len(node_disks_devonly)
2504
2505     # Collect data from all nodes with disks
2506     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2507                                                           node_disks_devonly)
2508
2509     assert len(result) == len(node_disks)
2510
2511     instdisk = {}
2512
2513     for (nname, nres) in result.items():
2514       disks = node_disks[nname]
2515
2516       if nres.offline:
2517         # No data from this node
2518         data = len(disks) * [(False, "node offline")]
2519       else:
2520         msg = nres.fail_msg
2521         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2522                  "while getting disk information: %s", msg)
2523         if msg:
2524           # No data from this node
2525           data = len(disks) * [(False, msg)]
2526         else:
2527           data = []
2528           for idx, i in enumerate(nres.payload):
2529             if isinstance(i, (tuple, list)) and len(i) == 2:
2530               data.append(i)
2531             else:
2532               logging.warning("Invalid result from node %s, entry %d: %s",
2533                               nname, idx, i)
2534               data.append((False, "Invalid result from the remote node"))
2535
2536       for ((inst, _), status) in zip(disks, data):
2537         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2538
2539     # Add empty entries for diskless instances.
2540     for inst in diskless_instances:
2541       assert inst not in instdisk
2542       instdisk[inst] = {}
2543
2544     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2545                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2546                       compat.all(isinstance(s, (tuple, list)) and
2547                                  len(s) == 2 for s in statuses)
2548                       for inst, nnames in instdisk.items()
2549                       for nname, statuses in nnames.items())
2550     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2551
2552     return instdisk
2553
2554   @staticmethod
2555   def _SshNodeSelector(group_uuid, all_nodes):
2556     """Create endless iterators for all potential SSH check hosts.
2557
2558     """
2559     nodes = [node for node in all_nodes
2560              if (node.group != group_uuid and
2561                  not node.offline)]
2562     keyfunc = operator.attrgetter("group")
2563
2564     return map(itertools.cycle,
2565                [sorted(map(operator.attrgetter("name"), names))
2566                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2567                                                   keyfunc)])
2568
2569   @classmethod
2570   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2571     """Choose which nodes should talk to which other nodes.
2572
2573     We will make nodes contact all nodes in their group, and one node from
2574     every other group.
2575
2576     @warning: This algorithm has a known issue if one node group is much
2577       smaller than others (e.g. just one node). In such a case all other
2578       nodes will talk to the single node.
2579
2580     """
2581     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2582     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2583
2584     return (online_nodes,
2585             dict((name, sorted([i.next() for i in sel]))
2586                  for name in online_nodes))
2587
2588   def BuildHooksEnv(self):
2589     """Build hooks env.
2590
2591     Cluster-Verify hooks just ran in the post phase and their failure makes
2592     the output be logged in the verify output and the verification to fail.
2593
2594     """
2595     env = {
2596       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2597       }
2598
2599     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2600                for node in self.my_node_info.values())
2601
2602     return env
2603
2604   def BuildHooksNodes(self):
2605     """Build hooks nodes.
2606
2607     """
2608     return ([], self.my_node_names)
2609
2610   def Exec(self, feedback_fn):
2611     """Verify integrity of the node group, performing various test on nodes.
2612
2613     """
2614     # This method has too many local variables. pylint: disable=R0914
2615     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2616
2617     if not self.my_node_names:
2618       # empty node group
2619       feedback_fn("* Empty node group, skipping verification")
2620       return True
2621
2622     self.bad = False
2623     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2624     verbose = self.op.verbose
2625     self._feedback_fn = feedback_fn
2626
2627     vg_name = self.cfg.GetVGName()
2628     drbd_helper = self.cfg.GetDRBDHelper()
2629     cluster = self.cfg.GetClusterInfo()
2630     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2631     hypervisors = cluster.enabled_hypervisors
2632     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2633
2634     i_non_redundant = [] # Non redundant instances
2635     i_non_a_balanced = [] # Non auto-balanced instances
2636     n_offline = 0 # Count of offline nodes
2637     n_drained = 0 # Count of nodes being drained
2638     node_vol_should = {}
2639
2640     # FIXME: verify OS list
2641
2642     # File verification
2643     filemap = _ComputeAncillaryFiles(cluster, False)
2644
2645     # do local checksums
2646     master_node = self.master_node = self.cfg.GetMasterNode()
2647     master_ip = self.cfg.GetMasterIP()
2648
2649     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2650
2651     node_verify_param = {
2652       constants.NV_FILELIST:
2653         utils.UniqueSequence(filename
2654                              for files in filemap
2655                              for filename in files),
2656       constants.NV_NODELIST:
2657         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2658                                   self.all_node_info.values()),
2659       constants.NV_HYPERVISOR: hypervisors,
2660       constants.NV_HVPARAMS:
2661         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2662       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2663                                  for node in node_data_list
2664                                  if not node.offline],
2665       constants.NV_INSTANCELIST: hypervisors,
2666       constants.NV_VERSION: None,
2667       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2668       constants.NV_NODESETUP: None,
2669       constants.NV_TIME: None,
2670       constants.NV_MASTERIP: (master_node, master_ip),
2671       constants.NV_OSLIST: None,
2672       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2673       }
2674
2675     if vg_name is not None:
2676       node_verify_param[constants.NV_VGLIST] = None
2677       node_verify_param[constants.NV_LVLIST] = vg_name
2678       node_verify_param[constants.NV_PVLIST] = [vg_name]
2679       node_verify_param[constants.NV_DRBDLIST] = None
2680
2681     if drbd_helper:
2682       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2683
2684     # bridge checks
2685     # FIXME: this needs to be changed per node-group, not cluster-wide
2686     bridges = set()
2687     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2688     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2689       bridges.add(default_nicpp[constants.NIC_LINK])
2690     for instance in self.my_inst_info.values():
2691       for nic in instance.nics:
2692         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2693         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2694           bridges.add(full_nic[constants.NIC_LINK])
2695
2696     if bridges:
2697       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2698
2699     # Build our expected cluster state
2700     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2701                                                  name=node.name,
2702                                                  vm_capable=node.vm_capable))
2703                       for node in node_data_list)
2704
2705     # Gather OOB paths
2706     oob_paths = []
2707     for node in self.all_node_info.values():
2708       path = _SupportsOob(self.cfg, node)
2709       if path and path not in oob_paths:
2710         oob_paths.append(path)
2711
2712     if oob_paths:
2713       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2714
2715     for instance in self.my_inst_names:
2716       inst_config = self.my_inst_info[instance]
2717
2718       for nname in inst_config.all_nodes:
2719         if nname not in node_image:
2720           gnode = self.NodeImage(name=nname)
2721           gnode.ghost = (nname not in self.all_node_info)
2722           node_image[nname] = gnode
2723
2724       inst_config.MapLVsByNode(node_vol_should)
2725
2726       pnode = inst_config.primary_node
2727       node_image[pnode].pinst.append(instance)
2728
2729       for snode in inst_config.secondary_nodes:
2730         nimg = node_image[snode]
2731         nimg.sinst.append(instance)
2732         if pnode not in nimg.sbp:
2733           nimg.sbp[pnode] = []
2734         nimg.sbp[pnode].append(instance)
2735
2736     # At this point, we have the in-memory data structures complete,
2737     # except for the runtime information, which we'll gather next
2738
2739     # Due to the way our RPC system works, exact response times cannot be
2740     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2741     # time before and after executing the request, we can at least have a time
2742     # window.
2743     nvinfo_starttime = time.time()
2744     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2745                                            node_verify_param,
2746                                            self.cfg.GetClusterName())
2747     nvinfo_endtime = time.time()
2748
2749     if self.extra_lv_nodes and vg_name is not None:
2750       extra_lv_nvinfo = \
2751           self.rpc.call_node_verify(self.extra_lv_nodes,
2752                                     {constants.NV_LVLIST: vg_name},
2753                                     self.cfg.GetClusterName())
2754     else:
2755       extra_lv_nvinfo = {}
2756
2757     all_drbd_map = self.cfg.ComputeDRBDMap()
2758
2759     feedback_fn("* Gathering disk information (%s nodes)" %
2760                 len(self.my_node_names))
2761     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2762                                      self.my_inst_info)
2763
2764     feedback_fn("* Verifying configuration file consistency")
2765
2766     # If not all nodes are being checked, we need to make sure the master node
2767     # and a non-checked vm_capable node are in the list.
2768     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2769     if absent_nodes:
2770       vf_nvinfo = all_nvinfo.copy()
2771       vf_node_info = list(self.my_node_info.values())
2772       additional_nodes = []
2773       if master_node not in self.my_node_info:
2774         additional_nodes.append(master_node)
2775         vf_node_info.append(self.all_node_info[master_node])
2776       # Add the first vm_capable node we find which is not included
2777       for node in absent_nodes:
2778         nodeinfo = self.all_node_info[node]
2779         if nodeinfo.vm_capable and not nodeinfo.offline:
2780           additional_nodes.append(node)
2781           vf_node_info.append(self.all_node_info[node])
2782           break
2783       key = constants.NV_FILELIST
2784       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2785                                                  {key: node_verify_param[key]},
2786                                                  self.cfg.GetClusterName()))
2787     else:
2788       vf_nvinfo = all_nvinfo
2789       vf_node_info = self.my_node_info.values()
2790
2791     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2792
2793     feedback_fn("* Verifying node status")
2794
2795     refos_img = None
2796
2797     for node_i in node_data_list:
2798       node = node_i.name
2799       nimg = node_image[node]
2800
2801       if node_i.offline:
2802         if verbose:
2803           feedback_fn("* Skipping offline node %s" % (node,))
2804         n_offline += 1
2805         continue
2806
2807       if node == master_node:
2808         ntype = "master"
2809       elif node_i.master_candidate:
2810         ntype = "master candidate"
2811       elif node_i.drained:
2812         ntype = "drained"
2813         n_drained += 1
2814       else:
2815         ntype = "regular"
2816       if verbose:
2817         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2818
2819       msg = all_nvinfo[node].fail_msg
2820       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2821                msg)
2822       if msg:
2823         nimg.rpc_fail = True
2824         continue
2825
2826       nresult = all_nvinfo[node].payload
2827
2828       nimg.call_ok = self._VerifyNode(node_i, nresult)
2829       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2830       self._VerifyNodeNetwork(node_i, nresult)
2831       self._VerifyOob(node_i, nresult)
2832
2833       if nimg.vm_capable:
2834         self._VerifyNodeLVM(node_i, nresult, vg_name)
2835         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2836                              all_drbd_map)
2837
2838         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2839         self._UpdateNodeInstances(node_i, nresult, nimg)
2840         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2841         self._UpdateNodeOS(node_i, nresult, nimg)
2842
2843         if not nimg.os_fail:
2844           if refos_img is None:
2845             refos_img = nimg
2846           self._VerifyNodeOS(node_i, nimg, refos_img)
2847         self._VerifyNodeBridges(node_i, nresult, bridges)
2848
2849         # Check whether all running instancies are primary for the node. (This
2850         # can no longer be done from _VerifyInstance below, since some of the
2851         # wrong instances could be from other node groups.)
2852         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2853
2854         for inst in non_primary_inst:
2855           test = inst in self.all_inst_info
2856           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2857                    "instance should not run on node %s", node_i.name)
2858           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2859                    "node is running unknown instance %s", inst)
2860
2861     for node, result in extra_lv_nvinfo.items():
2862       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2863                               node_image[node], vg_name)
2864
2865     feedback_fn("* Verifying instance status")
2866     for instance in self.my_inst_names:
2867       if verbose:
2868         feedback_fn("* Verifying instance %s" % instance)
2869       inst_config = self.my_inst_info[instance]
2870       self._VerifyInstance(instance, inst_config, node_image,
2871                            instdisk[instance])
2872       inst_nodes_offline = []
2873
2874       pnode = inst_config.primary_node
2875       pnode_img = node_image[pnode]
2876       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2877                constants.CV_ENODERPC, pnode, "instance %s, connection to"
2878                " primary node failed", instance)
2879
2880       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2881                constants.CV_EINSTANCEBADNODE, instance,
2882                "instance is marked as running and lives on offline node %s",
2883                inst_config.primary_node)
2884
2885       # If the instance is non-redundant we cannot survive losing its primary
2886       # node, so we are not N+1 compliant. On the other hand we have no disk
2887       # templates with more than one secondary so that situation is not well
2888       # supported either.
2889       # FIXME: does not support file-backed instances
2890       if not inst_config.secondary_nodes:
2891         i_non_redundant.append(instance)
2892
2893       _ErrorIf(len(inst_config.secondary_nodes) > 1,
2894                constants.CV_EINSTANCELAYOUT,
2895                instance, "instance has multiple secondary nodes: %s",
2896                utils.CommaJoin(inst_config.secondary_nodes),
2897                code=self.ETYPE_WARNING)
2898
2899       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2900         pnode = inst_config.primary_node
2901         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2902         instance_groups = {}
2903
2904         for node in instance_nodes:
2905           instance_groups.setdefault(self.all_node_info[node].group,
2906                                      []).append(node)
2907
2908         pretty_list = [
2909           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2910           # Sort so that we always list the primary node first.
2911           for group, nodes in sorted(instance_groups.items(),
2912                                      key=lambda (_, nodes): pnode in nodes,
2913                                      reverse=True)]
2914
2915         self._ErrorIf(len(instance_groups) > 1,
2916                       constants.CV_EINSTANCESPLITGROUPS,
2917                       instance, "instance has primary and secondary nodes in"
2918                       " different groups: %s", utils.CommaJoin(pretty_list),
2919                       code=self.ETYPE_WARNING)
2920
2921       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2922         i_non_a_balanced.append(instance)
2923
2924       for snode in inst_config.secondary_nodes:
2925         s_img = node_image[snode]
2926         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2927                  snode, "instance %s, connection to secondary node failed",
2928                  instance)
2929
2930         if s_img.offline:
2931           inst_nodes_offline.append(snode)
2932
2933       # warn that the instance lives on offline nodes
2934       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2935                "instance has offline secondary node(s) %s",
2936                utils.CommaJoin(inst_nodes_offline))
2937       # ... or ghost/non-vm_capable nodes
2938       for node in inst_config.all_nodes:
2939         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2940                  instance, "instance lives on ghost node %s", node)
2941         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2942                  instance, "instance lives on non-vm_capable node %s", node)
2943
2944     feedback_fn("* Verifying orphan volumes")
2945     reserved = utils.FieldSet(*cluster.reserved_lvs)
2946
2947     # We will get spurious "unknown volume" warnings if any node of this group
2948     # is secondary for an instance whose primary is in another group. To avoid
2949     # them, we find these instances and add their volumes to node_vol_should.
2950     for inst in self.all_inst_info.values():
2951       for secondary in inst.secondary_nodes:
2952         if (secondary in self.my_node_info
2953             and inst.name not in self.my_inst_info):
2954           inst.MapLVsByNode(node_vol_should)
2955           break
2956
2957     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2958
2959     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2960       feedback_fn("* Verifying N+1 Memory redundancy")
2961       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2962
2963     feedback_fn("* Other Notes")
2964     if i_non_redundant:
2965       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2966                   % len(i_non_redundant))
2967
2968     if i_non_a_balanced:
2969       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2970                   % len(i_non_a_balanced))
2971
2972     if n_offline:
2973       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2974
2975     if n_drained:
2976       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2977
2978     return not self.bad
2979
2980   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2981     """Analyze the post-hooks' result
2982
2983     This method analyses the hook result, handles it, and sends some
2984     nicely-formatted feedback back to the user.
2985
2986     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2987         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2988     @param hooks_results: the results of the multi-node hooks rpc call
2989     @param feedback_fn: function used send feedback back to the caller
2990     @param lu_result: previous Exec result
2991     @return: the new Exec result, based on the previous result
2992         and hook results
2993
2994     """
2995     # We only really run POST phase hooks, only for non-empty groups,
2996     # and are only interested in their results
2997     if not self.my_node_names:
2998       # empty node group
2999       pass
3000     elif phase == constants.HOOKS_PHASE_POST:
3001       # Used to change hooks' output to proper indentation
3002       feedback_fn("* Hooks Results")
3003       assert hooks_results, "invalid result from hooks"
3004
3005       for node_name in hooks_results:
3006         res = hooks_results[node_name]
3007         msg = res.fail_msg
3008         test = msg and not res.offline
3009         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3010                       "Communication failure in hooks execution: %s", msg)
3011         if res.offline or msg:
3012           # No need to investigate payload if node is offline or gave
3013           # an error.
3014           continue
3015         for script, hkr, output in res.payload:
3016           test = hkr == constants.HKR_FAIL
3017           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3018                         "Script %s failed, output:", script)
3019           if test:
3020             output = self._HOOKS_INDENT_RE.sub("      ", output)
3021             feedback_fn("%s" % output)
3022             lu_result = False
3023
3024     return lu_result
3025
3026
3027 class LUClusterVerifyDisks(NoHooksLU):
3028   """Verifies the cluster disks status.
3029
3030   """
3031   REQ_BGL = False
3032
3033   def ExpandNames(self):
3034     self.share_locks = _ShareAll()
3035     self.needed_locks = {
3036       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3037       }
3038
3039   def Exec(self, feedback_fn):
3040     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3041
3042     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3043     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3044                            for group in group_names])
3045
3046
3047 class LUGroupVerifyDisks(NoHooksLU):
3048   """Verifies the status of all disks in a node group.
3049
3050   """
3051   REQ_BGL = False
3052
3053   def ExpandNames(self):
3054     # Raises errors.OpPrereqError on its own if group can't be found
3055     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3056
3057     self.share_locks = _ShareAll()
3058     self.needed_locks = {
3059       locking.LEVEL_INSTANCE: [],
3060       locking.LEVEL_NODEGROUP: [],
3061       locking.LEVEL_NODE: [],
3062       }
3063
3064   def DeclareLocks(self, level):
3065     if level == locking.LEVEL_INSTANCE:
3066       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3067
3068       # Lock instances optimistically, needs verification once node and group
3069       # locks have been acquired
3070       self.needed_locks[locking.LEVEL_INSTANCE] = \
3071         self.cfg.GetNodeGroupInstances(self.group_uuid)
3072
3073     elif level == locking.LEVEL_NODEGROUP:
3074       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3075
3076       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3077         set([self.group_uuid] +
3078             # Lock all groups used by instances optimistically; this requires
3079             # going via the node before it's locked, requiring verification
3080             # later on
3081             [group_uuid
3082              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3083              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3084
3085     elif level == locking.LEVEL_NODE:
3086       # This will only lock the nodes in the group to be verified which contain
3087       # actual instances
3088       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3089       self._LockInstancesNodes()
3090
3091       # Lock all nodes in group to be verified
3092       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3093       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3094       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3095
3096   def CheckPrereq(self):
3097     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3098     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3099     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3100
3101     assert self.group_uuid in owned_groups
3102
3103     # Check if locked instances are still correct
3104     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3105
3106     # Get instance information
3107     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3108
3109     # Check if node groups for locked instances are still correct
3110     for (instance_name, inst) in self.instances.items():
3111       assert owned_nodes.issuperset(inst.all_nodes), \
3112         "Instance %s's nodes changed while we kept the lock" % instance_name
3113
3114       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3115                                              owned_groups)
3116
3117       assert self.group_uuid in inst_groups, \
3118         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3119
3120   def Exec(self, feedback_fn):
3121     """Verify integrity of cluster disks.
3122
3123     @rtype: tuple of three items
3124     @return: a tuple of (dict of node-to-node_error, list of instances
3125         which need activate-disks, dict of instance: (node, volume) for
3126         missing volumes
3127
3128     """
3129     res_nodes = {}
3130     res_instances = set()
3131     res_missing = {}
3132
3133     nv_dict = _MapInstanceDisksToNodes([inst
3134                                         for inst in self.instances.values()
3135                                         if inst.admin_up])
3136
3137     if nv_dict:
3138       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3139                              set(self.cfg.GetVmCapableNodeList()))
3140
3141       node_lvs = self.rpc.call_lv_list(nodes, [])
3142
3143       for (node, node_res) in node_lvs.items():
3144         if node_res.offline:
3145           continue
3146
3147         msg = node_res.fail_msg
3148         if msg:
3149           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3150           res_nodes[node] = msg
3151           continue
3152
3153         for lv_name, (_, _, lv_online) in node_res.payload.items():
3154           inst = nv_dict.pop((node, lv_name), None)
3155           if not (lv_online or inst is None):
3156             res_instances.add(inst)
3157
3158       # any leftover items in nv_dict are missing LVs, let's arrange the data
3159       # better
3160       for key, inst in nv_dict.iteritems():
3161         res_missing.setdefault(inst, []).append(list(key))
3162
3163     return (res_nodes, list(res_instances), res_missing)
3164
3165
3166 class LUClusterRepairDiskSizes(NoHooksLU):
3167   """Verifies the cluster disks sizes.
3168
3169   """
3170   REQ_BGL = False
3171
3172   def ExpandNames(self):
3173     if self.op.instances:
3174       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3175       self.needed_locks = {
3176         locking.LEVEL_NODE: [],
3177         locking.LEVEL_INSTANCE: self.wanted_names,
3178         }
3179       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3180     else:
3181       self.wanted_names = None
3182       self.needed_locks = {
3183         locking.LEVEL_NODE: locking.ALL_SET,
3184         locking.LEVEL_INSTANCE: locking.ALL_SET,
3185         }
3186     self.share_locks = _ShareAll()
3187
3188   def DeclareLocks(self, level):
3189     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3190       self._LockInstancesNodes(primary_only=True)
3191
3192   def CheckPrereq(self):
3193     """Check prerequisites.
3194
3195     This only checks the optional instance list against the existing names.
3196
3197     """
3198     if self.wanted_names is None:
3199       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3200
3201     self.wanted_instances = \
3202         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3203
3204   def _EnsureChildSizes(self, disk):
3205     """Ensure children of the disk have the needed disk size.
3206
3207     This is valid mainly for DRBD8 and fixes an issue where the
3208     children have smaller disk size.
3209
3210     @param disk: an L{ganeti.objects.Disk} object
3211
3212     """
3213     if disk.dev_type == constants.LD_DRBD8:
3214       assert disk.children, "Empty children for DRBD8?"
3215       fchild = disk.children[0]
3216       mismatch = fchild.size < disk.size
3217       if mismatch:
3218         self.LogInfo("Child disk has size %d, parent %d, fixing",
3219                      fchild.size, disk.size)
3220         fchild.size = disk.size
3221
3222       # and we recurse on this child only, not on the metadev
3223       return self._EnsureChildSizes(fchild) or mismatch
3224     else:
3225       return False
3226
3227   def Exec(self, feedback_fn):
3228     """Verify the size of cluster disks.
3229
3230     """
3231     # TODO: check child disks too
3232     # TODO: check differences in size between primary/secondary nodes
3233     per_node_disks = {}
3234     for instance in self.wanted_instances:
3235       pnode = instance.primary_node
3236       if pnode not in per_node_disks:
3237         per_node_disks[pnode] = []
3238       for idx, disk in enumerate(instance.disks):
3239         per_node_disks[pnode].append((instance, idx, disk))
3240
3241     changed = []
3242     for node, dskl in per_node_disks.items():
3243       newl = [v[2].Copy() for v in dskl]
3244       for dsk in newl:
3245         self.cfg.SetDiskID(dsk, node)
3246       result = self.rpc.call_blockdev_getsize(node, newl)
3247       if result.fail_msg:
3248         self.LogWarning("Failure in blockdev_getsize call to node"
3249                         " %s, ignoring", node)
3250         continue
3251       if len(result.payload) != len(dskl):
3252         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3253                         " result.payload=%s", node, len(dskl), result.payload)
3254         self.LogWarning("Invalid result from node %s, ignoring node results",
3255                         node)
3256         continue
3257       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3258         if size is None:
3259           self.LogWarning("Disk %d of instance %s did not return size"
3260                           " information, ignoring", idx, instance.name)
3261           continue
3262         if not isinstance(size, (int, long)):
3263           self.LogWarning("Disk %d of instance %s did not return valid"
3264                           " size information, ignoring", idx, instance.name)
3265           continue
3266         size = size >> 20
3267         if size != disk.size:
3268           self.LogInfo("Disk %d of instance %s has mismatched size,"
3269                        " correcting: recorded %d, actual %d", idx,
3270                        instance.name, disk.size, size)
3271           disk.size = size
3272           self.cfg.Update(instance, feedback_fn)
3273           changed.append((instance.name, idx, size))
3274         if self._EnsureChildSizes(disk):
3275           self.cfg.Update(instance, feedback_fn)
3276           changed.append((instance.name, idx, disk.size))
3277     return changed
3278
3279
3280 class LUClusterRename(LogicalUnit):
3281   """Rename the cluster.
3282
3283   """
3284   HPATH = "cluster-rename"
3285   HTYPE = constants.HTYPE_CLUSTER
3286
3287   def BuildHooksEnv(self):
3288     """Build hooks env.
3289
3290     """
3291     return {
3292       "OP_TARGET": self.cfg.GetClusterName(),
3293       "NEW_NAME": self.op.name,
3294       }
3295
3296   def BuildHooksNodes(self):
3297     """Build hooks nodes.
3298
3299     """
3300     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3301
3302   def CheckPrereq(self):
3303     """Verify that the passed name is a valid one.
3304
3305     """
3306     hostname = netutils.GetHostname(name=self.op.name,
3307                                     family=self.cfg.GetPrimaryIPFamily())
3308
3309     new_name = hostname.name
3310     self.ip = new_ip = hostname.ip
3311     old_name = self.cfg.GetClusterName()
3312     old_ip = self.cfg.GetMasterIP()
3313     if new_name == old_name and new_ip == old_ip:
3314       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3315                                  " cluster has changed",
3316                                  errors.ECODE_INVAL)
3317     if new_ip != old_ip:
3318       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3319         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3320                                    " reachable on the network" %
3321                                    new_ip, errors.ECODE_NOTUNIQUE)
3322
3323     self.op.name = new_name
3324
3325   def Exec(self, feedback_fn):
3326     """Rename the cluster.
3327
3328     """
3329     clustername = self.op.name
3330     new_ip = self.ip
3331
3332     # shutdown the master IP
3333     master_params = self.cfg.GetMasterNetworkParameters()
3334     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3335                                                      master_params.ip,
3336                                                      master_params.netmask,
3337                                                      master_params.netdev,
3338                                                      master_params.ip_family)
3339     result.Raise("Could not disable the master role")
3340
3341     try:
3342       cluster = self.cfg.GetClusterInfo()
3343       cluster.cluster_name = clustername
3344       cluster.master_ip = new_ip
3345       self.cfg.Update(cluster, feedback_fn)
3346
3347       # update the known hosts file
3348       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3349       node_list = self.cfg.GetOnlineNodeList()
3350       try:
3351         node_list.remove(master_params.name)
3352       except ValueError:
3353         pass
3354       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3355     finally:
3356       result = self.rpc.call_node_activate_master_ip(master_params.name,
3357                                                      new_ip,
3358                                                      master_params.netmask,
3359                                                      master_params.netdev,
3360                                                      master_params.ip_family)
3361       msg = result.fail_msg
3362       if msg:
3363         self.LogWarning("Could not re-enable the master role on"
3364                         " the master, please restart manually: %s", msg)
3365
3366     return clustername
3367
3368
3369 def _ValidateNetmask(cfg, netmask):
3370   """Checks if a netmask is valid.
3371
3372   @type cfg: L{config.ConfigWriter}
3373   @param cfg: The cluster configuration
3374   @type netmask: int
3375   @param netmask: the netmask to be verified
3376   @raise errors.OpPrereqError: if the validation fails
3377
3378   """
3379   ip_family = cfg.GetPrimaryIPFamily()
3380   try:
3381     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3382   except errors.ProgrammerError:
3383     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3384                                ip_family)
3385   if not ipcls.ValidateNetmask(netmask):
3386     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3387                                 (netmask))
3388
3389
3390 class LUClusterSetParams(LogicalUnit):
3391   """Change the parameters of the cluster.
3392
3393   """
3394   HPATH = "cluster-modify"
3395   HTYPE = constants.HTYPE_CLUSTER
3396   REQ_BGL = False
3397
3398   def CheckArguments(self):
3399     """Check parameters
3400
3401     """
3402     if self.op.uid_pool:
3403       uidpool.CheckUidPool(self.op.uid_pool)
3404
3405     if self.op.add_uids:
3406       uidpool.CheckUidPool(self.op.add_uids)
3407
3408     if self.op.remove_uids:
3409       uidpool.CheckUidPool(self.op.remove_uids)
3410
3411     if self.op.master_netmask is not None:
3412       _ValidateNetmask(self.cfg, self.op.master_netmask)
3413
3414   def ExpandNames(self):
3415     # FIXME: in the future maybe other cluster params won't require checking on
3416     # all nodes to be modified.
3417     self.needed_locks = {
3418       locking.LEVEL_NODE: locking.ALL_SET,
3419     }
3420     self.share_locks[locking.LEVEL_NODE] = 1
3421
3422   def BuildHooksEnv(self):
3423     """Build hooks env.
3424
3425     """
3426     return {
3427       "OP_TARGET": self.cfg.GetClusterName(),
3428       "NEW_VG_NAME": self.op.vg_name,
3429       }
3430
3431   def BuildHooksNodes(self):
3432     """Build hooks nodes.
3433
3434     """
3435     mn = self.cfg.GetMasterNode()
3436     return ([mn], [mn])
3437
3438   def CheckPrereq(self):
3439     """Check prerequisites.
3440
3441     This checks whether the given params don't conflict and
3442     if the given volume group is valid.
3443
3444     """
3445     if self.op.vg_name is not None and not self.op.vg_name:
3446       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3447         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3448                                    " instances exist", errors.ECODE_INVAL)
3449
3450     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3451       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3452         raise errors.OpPrereqError("Cannot disable drbd helper while"
3453                                    " drbd-based instances exist",
3454                                    errors.ECODE_INVAL)
3455
3456     node_list = self.owned_locks(locking.LEVEL_NODE)
3457
3458     # if vg_name not None, checks given volume group on all nodes
3459     if self.op.vg_name:
3460       vglist = self.rpc.call_vg_list(node_list)
3461       for node in node_list:
3462         msg = vglist[node].fail_msg
3463         if msg:
3464           # ignoring down node
3465           self.LogWarning("Error while gathering data on node %s"
3466                           " (ignoring node): %s", node, msg)
3467           continue
3468         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3469                                               self.op.vg_name,
3470                                               constants.MIN_VG_SIZE)
3471         if vgstatus:
3472           raise errors.OpPrereqError("Error on node '%s': %s" %
3473                                      (node, vgstatus), errors.ECODE_ENVIRON)
3474
3475     if self.op.drbd_helper:
3476       # checks given drbd helper on all nodes
3477       helpers = self.rpc.call_drbd_helper(node_list)
3478       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3479         if ninfo.offline:
3480           self.LogInfo("Not checking drbd helper on offline node %s", node)
3481           continue
3482         msg = helpers[node].fail_msg
3483         if msg:
3484           raise errors.OpPrereqError("Error checking drbd helper on node"
3485                                      " '%s': %s" % (node, msg),
3486                                      errors.ECODE_ENVIRON)
3487         node_helper = helpers[node].payload
3488         if node_helper != self.op.drbd_helper:
3489           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3490                                      (node, node_helper), errors.ECODE_ENVIRON)
3491
3492     self.cluster = cluster = self.cfg.GetClusterInfo()
3493     # validate params changes
3494     if self.op.beparams:
3495       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3496       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3497
3498     if self.op.ndparams:
3499       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3500       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3501
3502       # TODO: we need a more general way to handle resetting
3503       # cluster-level parameters to default values
3504       if self.new_ndparams["oob_program"] == "":
3505         self.new_ndparams["oob_program"] = \
3506             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3507
3508     if self.op.nicparams:
3509       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3510       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3511       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3512       nic_errors = []
3513
3514       # check all instances for consistency
3515       for instance in self.cfg.GetAllInstancesInfo().values():
3516         for nic_idx, nic in enumerate(instance.nics):
3517           params_copy = copy.deepcopy(nic.nicparams)
3518           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3519
3520           # check parameter syntax
3521           try:
3522             objects.NIC.CheckParameterSyntax(params_filled)
3523           except errors.ConfigurationError, err:
3524             nic_errors.append("Instance %s, nic/%d: %s" %
3525                               (instance.name, nic_idx, err))
3526
3527           # if we're moving instances to routed, check that they have an ip
3528           target_mode = params_filled[constants.NIC_MODE]
3529           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3530             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3531                               " address" % (instance.name, nic_idx))
3532       if nic_errors:
3533         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3534                                    "\n".join(nic_errors))
3535
3536     # hypervisor list/parameters
3537     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3538     if self.op.hvparams:
3539       for hv_name, hv_dict in self.op.hvparams.items():
3540         if hv_name not in self.new_hvparams:
3541           self.new_hvparams[hv_name] = hv_dict
3542         else:
3543           self.new_hvparams[hv_name].update(hv_dict)
3544
3545     # os hypervisor parameters
3546     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3547     if self.op.os_hvp:
3548       for os_name, hvs in self.op.os_hvp.items():
3549         if os_name not in self.new_os_hvp:
3550           self.new_os_hvp[os_name] = hvs
3551         else:
3552           for hv_name, hv_dict in hvs.items():
3553             if hv_name not in self.new_os_hvp[os_name]:
3554               self.new_os_hvp[os_name][hv_name] = hv_dict
3555             else:
3556               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3557
3558     # os parameters
3559     self.new_osp = objects.FillDict(cluster.osparams, {})
3560     if self.op.osparams:
3561       for os_name, osp in self.op.osparams.items():
3562         if os_name not in self.new_osp:
3563           self.new_osp[os_name] = {}
3564
3565         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3566                                                   use_none=True)
3567
3568         if not self.new_osp[os_name]:
3569           # we removed all parameters
3570           del self.new_osp[os_name]
3571         else:
3572           # check the parameter validity (remote check)
3573           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3574                          os_name, self.new_osp[os_name])
3575
3576     # changes to the hypervisor list
3577     if self.op.enabled_hypervisors is not None:
3578       self.hv_list = self.op.enabled_hypervisors
3579       for hv in self.hv_list:
3580         # if the hypervisor doesn't already exist in the cluster
3581         # hvparams, we initialize it to empty, and then (in both
3582         # cases) we make sure to fill the defaults, as we might not
3583         # have a complete defaults list if the hypervisor wasn't
3584         # enabled before
3585         if hv not in new_hvp:
3586           new_hvp[hv] = {}
3587         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3588         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3589     else:
3590       self.hv_list = cluster.enabled_hypervisors
3591
3592     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3593       # either the enabled list has changed, or the parameters have, validate
3594       for hv_name, hv_params in self.new_hvparams.items():
3595         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3596             (self.op.enabled_hypervisors and
3597              hv_name in self.op.enabled_hypervisors)):
3598           # either this is a new hypervisor, or its parameters have changed
3599           hv_class = hypervisor.GetHypervisor(hv_name)
3600           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3601           hv_class.CheckParameterSyntax(hv_params)
3602           _CheckHVParams(self, node_list, hv_name, hv_params)
3603
3604     if self.op.os_hvp:
3605       # no need to check any newly-enabled hypervisors, since the
3606       # defaults have already been checked in the above code-block
3607       for os_name, os_hvp in self.new_os_hvp.items():
3608         for hv_name, hv_params in os_hvp.items():
3609           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3610           # we need to fill in the new os_hvp on top of the actual hv_p
3611           cluster_defaults = self.new_hvparams.get(hv_name, {})
3612           new_osp = objects.FillDict(cluster_defaults, hv_params)
3613           hv_class = hypervisor.GetHypervisor(hv_name)
3614           hv_class.CheckParameterSyntax(new_osp)
3615           _CheckHVParams(self, node_list, hv_name, new_osp)
3616
3617     if self.op.default_iallocator:
3618       alloc_script = utils.FindFile(self.op.default_iallocator,
3619                                     constants.IALLOCATOR_SEARCH_PATH,
3620                                     os.path.isfile)
3621       if alloc_script is None:
3622         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3623                                    " specified" % self.op.default_iallocator,
3624                                    errors.ECODE_INVAL)
3625
3626   def Exec(self, feedback_fn):
3627     """Change the parameters of the cluster.
3628
3629     """
3630     if self.op.vg_name is not None:
3631       new_volume = self.op.vg_name
3632       if not new_volume:
3633         new_volume = None
3634       if new_volume != self.cfg.GetVGName():
3635         self.cfg.SetVGName(new_volume)
3636       else:
3637         feedback_fn("Cluster LVM configuration already in desired"
3638                     " state, not changing")
3639     if self.op.drbd_helper is not None:
3640       new_helper = self.op.drbd_helper
3641       if not new_helper:
3642         new_helper = None
3643       if new_helper != self.cfg.GetDRBDHelper():
3644         self.cfg.SetDRBDHelper(new_helper)
3645       else:
3646         feedback_fn("Cluster DRBD helper already in desired state,"
3647                     " not changing")
3648     if self.op.hvparams:
3649       self.cluster.hvparams = self.new_hvparams
3650     if self.op.os_hvp:
3651       self.cluster.os_hvp = self.new_os_hvp
3652     if self.op.enabled_hypervisors is not None:
3653       self.cluster.hvparams = self.new_hvparams
3654       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3655     if self.op.beparams:
3656       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3657     if self.op.nicparams:
3658       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3659     if self.op.osparams:
3660       self.cluster.osparams = self.new_osp
3661     if self.op.ndparams:
3662       self.cluster.ndparams = self.new_ndparams
3663
3664     if self.op.candidate_pool_size is not None:
3665       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3666       # we need to update the pool size here, otherwise the save will fail
3667       _AdjustCandidatePool(self, [])
3668
3669     if self.op.maintain_node_health is not None:
3670       self.cluster.maintain_node_health = self.op.maintain_node_health
3671
3672     if self.op.prealloc_wipe_disks is not None:
3673       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3674
3675     if self.op.add_uids is not None:
3676       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3677
3678     if self.op.remove_uids is not None:
3679       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3680
3681     if self.op.uid_pool is not None:
3682       self.cluster.uid_pool = self.op.uid_pool
3683
3684     if self.op.default_iallocator is not None:
3685       self.cluster.default_iallocator = self.op.default_iallocator
3686
3687     if self.op.reserved_lvs is not None:
3688       self.cluster.reserved_lvs = self.op.reserved_lvs
3689
3690     def helper_os(aname, mods, desc):
3691       desc += " OS list"
3692       lst = getattr(self.cluster, aname)
3693       for key, val in mods:
3694         if key == constants.DDM_ADD:
3695           if val in lst:
3696             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3697           else:
3698             lst.append(val)
3699         elif key == constants.DDM_REMOVE:
3700           if val in lst:
3701             lst.remove(val)
3702           else:
3703             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3704         else:
3705           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3706
3707     if self.op.hidden_os:
3708       helper_os("hidden_os", self.op.hidden_os, "hidden")
3709
3710     if self.op.blacklisted_os:
3711       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3712
3713     if self.op.master_netdev:
3714       master_params = self.cfg.GetMasterNetworkParameters()
3715       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3716                   self.cluster.master_netdev)
3717       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3718                                                        master_params.ip,
3719                                                        master_params.netmask,
3720                                                        master_params.netdev,
3721                                                        master_params.ip_family)
3722       result.Raise("Could not disable the master ip")
3723       feedback_fn("Changing master_netdev from %s to %s" %
3724                   (master_params.netdev, self.op.master_netdev))
3725       self.cluster.master_netdev = self.op.master_netdev
3726
3727     if self.op.master_netmask:
3728       master_params = self.cfg.GetMasterNetworkParameters()
3729       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3730       result = self.rpc.call_node_change_master_netmask(master_params.name,
3731                                                         master_params.netmask,
3732                                                         self.op.master_netmask,
3733                                                         master_params.ip,
3734                                                         master_params.netdev)
3735       if result.fail_msg:
3736         msg = "Could not change the master IP netmask: %s" % result.fail_msg
3737         self.LogWarning(msg)
3738         feedback_fn(msg)
3739       else:
3740         self.cluster.master_netmask = self.op.master_netmask
3741
3742     self.cfg.Update(self.cluster, feedback_fn)
3743
3744     if self.op.master_netdev:
3745       master_params = self.cfg.GetMasterNetworkParameters()
3746       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3747                   self.op.master_netdev)
3748       result = self.rpc.call_node_activate_master_ip(master_params.name,
3749                                                      master_params.ip,
3750                                                      master_params.netmask,
3751                                                      master_params.netdev,
3752                                                      master_params.ip_family)
3753       if result.fail_msg:
3754         self.LogWarning("Could not re-enable the master ip on"
3755                         " the master, please restart manually: %s",
3756                         result.fail_msg)
3757
3758
3759 def _UploadHelper(lu, nodes, fname):
3760   """Helper for uploading a file and showing warnings.
3761
3762   """
3763   if os.path.exists(fname):
3764     result = lu.rpc.call_upload_file(nodes, fname)
3765     for to_node, to_result in result.items():
3766       msg = to_result.fail_msg
3767       if msg:
3768         msg = ("Copy of file %s to node %s failed: %s" %
3769                (fname, to_node, msg))
3770         lu.proc.LogWarning(msg)
3771
3772
3773 def _ComputeAncillaryFiles(cluster, redist):
3774   """Compute files external to Ganeti which need to be consistent.
3775
3776   @type redist: boolean
3777   @param redist: Whether to include files which need to be redistributed
3778
3779   """
3780   # Compute files for all nodes
3781   files_all = set([
3782     constants.SSH_KNOWN_HOSTS_FILE,
3783     constants.CONFD_HMAC_KEY,
3784     constants.CLUSTER_DOMAIN_SECRET_FILE,
3785     constants.SPICE_CERT_FILE,
3786     constants.SPICE_CACERT_FILE,
3787     constants.RAPI_USERS_FILE,
3788     ])
3789
3790   if not redist:
3791     files_all.update(constants.ALL_CERT_FILES)
3792     files_all.update(ssconf.SimpleStore().GetFileList())
3793   else:
3794     # we need to ship at least the RAPI certificate
3795     files_all.add(constants.RAPI_CERT_FILE)
3796
3797   if cluster.modify_etc_hosts:
3798     files_all.add(constants.ETC_HOSTS)
3799
3800   # Files which are optional, these must:
3801   # - be present in one other category as well
3802   # - either exist or not exist on all nodes of that category (mc, vm all)
3803   files_opt = set([
3804     constants.RAPI_USERS_FILE,
3805     ])
3806
3807   # Files which should only be on master candidates
3808   files_mc = set()
3809   if not redist:
3810     files_mc.add(constants.CLUSTER_CONF_FILE)
3811
3812   # Files which should only be on VM-capable nodes
3813   files_vm = set(filename
3814     for hv_name in cluster.enabled_hypervisors
3815     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3816
3817   files_opt |= set(filename
3818     for hv_name in cluster.enabled_hypervisors
3819     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3820
3821   # Filenames in each category must be unique
3822   all_files_set = files_all | files_mc | files_vm
3823   assert (len(all_files_set) ==
3824           sum(map(len, [files_all, files_mc, files_vm]))), \
3825          "Found file listed in more than one file list"
3826
3827   # Optional files must be present in one other category
3828   assert all_files_set.issuperset(files_opt), \
3829          "Optional file not in a different required list"
3830
3831   return (files_all, files_opt, files_mc, files_vm)
3832
3833
3834 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3835   """Distribute additional files which are part of the cluster configuration.
3836
3837   ConfigWriter takes care of distributing the config and ssconf files, but
3838   there are more files which should be distributed to all nodes. This function
3839   makes sure those are copied.
3840
3841   @param lu: calling logical unit
3842   @param additional_nodes: list of nodes not in the config to distribute to
3843   @type additional_vm: boolean
3844   @param additional_vm: whether the additional nodes are vm-capable or not
3845
3846   """
3847   # Gather target nodes
3848   cluster = lu.cfg.GetClusterInfo()
3849   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3850
3851   online_nodes = lu.cfg.GetOnlineNodeList()
3852   vm_nodes = lu.cfg.GetVmCapableNodeList()
3853
3854   if additional_nodes is not None:
3855     online_nodes.extend(additional_nodes)
3856     if additional_vm:
3857       vm_nodes.extend(additional_nodes)
3858
3859   # Never distribute to master node
3860   for nodelist in [online_nodes, vm_nodes]:
3861     if master_info.name in nodelist:
3862       nodelist.remove(master_info.name)
3863
3864   # Gather file lists
3865   (files_all, _, files_mc, files_vm) = \
3866     _ComputeAncillaryFiles(cluster, True)
3867
3868   # Never re-distribute configuration file from here
3869   assert not (constants.CLUSTER_CONF_FILE in files_all or
3870               constants.CLUSTER_CONF_FILE in files_vm)
3871   assert not files_mc, "Master candidates not handled in this function"
3872
3873   filemap = [
3874     (online_nodes, files_all),
3875     (vm_nodes, files_vm),
3876     ]
3877
3878   # Upload the files
3879   for (node_list, files) in filemap:
3880     for fname in files:
3881       _UploadHelper(lu, node_list, fname)
3882
3883
3884 class LUClusterRedistConf(NoHooksLU):
3885   """Force the redistribution of cluster configuration.
3886
3887   This is a very simple LU.
3888
3889   """
3890   REQ_BGL = False
3891
3892   def ExpandNames(self):
3893     self.needed_locks = {
3894       locking.LEVEL_NODE: locking.ALL_SET,
3895     }
3896     self.share_locks[locking.LEVEL_NODE] = 1
3897
3898   def Exec(self, feedback_fn):
3899     """Redistribute the configuration.
3900
3901     """
3902     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3903     _RedistributeAncillaryFiles(self)
3904
3905
3906 class LUClusterActivateMasterIp(NoHooksLU):
3907   """Activate the master IP on the master node.
3908
3909   """
3910   def Exec(self, feedback_fn):
3911     """Activate the master IP.
3912
3913     """
3914     master_params = self.cfg.GetMasterNetworkParameters()
3915     self.rpc.call_node_activate_master_ip(master_params.name,
3916                                           master_params.ip,
3917                                           master_params.netmask,
3918                                           master_params.netdev,
3919                                           master_params.ip_family)
3920
3921
3922 class LUClusterDeactivateMasterIp(NoHooksLU):
3923   """Deactivate the master IP on the master node.
3924
3925   """
3926   def Exec(self, feedback_fn):
3927     """Deactivate the master IP.
3928
3929     """
3930     master_params = self.cfg.GetMasterNetworkParameters()
3931     self.rpc.call_node_deactivate_master_ip(master_params.name,
3932                                             master_params.ip,
3933                                             master_params.netmask,
3934                                             master_params.netdev,
3935                                             master_params.ip_family)
3936
3937
3938 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3939   """Sleep and poll for an instance's disk to sync.
3940
3941   """
3942   if not instance.disks or disks is not None and not disks:
3943     return True
3944
3945   disks = _ExpandCheckDisks(instance, disks)
3946
3947   if not oneshot:
3948     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3949
3950   node = instance.primary_node
3951
3952   for dev in disks:
3953     lu.cfg.SetDiskID(dev, node)
3954
3955   # TODO: Convert to utils.Retry
3956
3957   retries = 0
3958   degr_retries = 10 # in seconds, as we sleep 1 second each time
3959   while True:
3960     max_time = 0
3961     done = True
3962     cumul_degraded = False
3963     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3964     msg = rstats.fail_msg
3965     if msg:
3966       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3967       retries += 1
3968       if retries >= 10:
3969         raise errors.RemoteError("Can't contact node %s for mirror data,"
3970                                  " aborting." % node)
3971       time.sleep(6)
3972       continue
3973     rstats = rstats.payload
3974     retries = 0
3975     for i, mstat in enumerate(rstats):
3976       if mstat is None:
3977         lu.LogWarning("Can't compute data for node %s/%s",
3978                            node, disks[i].iv_name)
3979         continue
3980
3981       cumul_degraded = (cumul_degraded or
3982                         (mstat.is_degraded and mstat.sync_percent is None))
3983       if mstat.sync_percent is not None:
3984         done = False
3985         if mstat.estimated_time is not None:
3986           rem_time = ("%s remaining (estimated)" %
3987                       utils.FormatSeconds(mstat.estimated_time))
3988           max_time = mstat.estimated_time
3989         else:
3990           rem_time = "no time estimate"
3991         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3992                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3993
3994     # if we're done but degraded, let's do a few small retries, to
3995     # make sure we see a stable and not transient situation; therefore
3996     # we force restart of the loop
3997     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3998       logging.info("Degraded disks found, %d retries left", degr_retries)
3999       degr_retries -= 1
4000       time.sleep(1)
4001       continue
4002
4003     if done or oneshot:
4004       break
4005
4006     time.sleep(min(60, max_time))
4007
4008   if done:
4009     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4010   return not cumul_degraded
4011
4012
4013 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4014   """Check that mirrors are not degraded.
4015
4016   The ldisk parameter, if True, will change the test from the
4017   is_degraded attribute (which represents overall non-ok status for
4018   the device(s)) to the ldisk (representing the local storage status).
4019
4020   """
4021   lu.cfg.SetDiskID(dev, node)
4022
4023   result = True
4024
4025   if on_primary or dev.AssembleOnSecondary():
4026     rstats = lu.rpc.call_blockdev_find(node, dev)
4027     msg = rstats.fail_msg
4028     if msg:
4029       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4030       result = False
4031     elif not rstats.payload:
4032       lu.LogWarning("Can't find disk on node %s", node)
4033       result = False
4034     else:
4035       if ldisk:
4036         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4037       else:
4038         result = result and not rstats.payload.is_degraded
4039
4040   if dev.children:
4041     for child in dev.children:
4042       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4043
4044   return result
4045
4046
4047 class LUOobCommand(NoHooksLU):
4048   """Logical unit for OOB handling.
4049
4050   """
4051   REG_BGL = False
4052   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4053
4054   def ExpandNames(self):
4055     """Gather locks we need.
4056
4057     """
4058     if self.op.node_names:
4059       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4060       lock_names = self.op.node_names
4061     else:
4062       lock_names = locking.ALL_SET
4063
4064     self.needed_locks = {
4065       locking.LEVEL_NODE: lock_names,
4066       }
4067
4068   def CheckPrereq(self):
4069     """Check prerequisites.
4070
4071     This checks:
4072      - the node exists in the configuration
4073      - OOB is supported
4074
4075     Any errors are signaled by raising errors.OpPrereqError.
4076
4077     """
4078     self.nodes = []
4079     self.master_node = self.cfg.GetMasterNode()
4080
4081     assert self.op.power_delay >= 0.0
4082
4083     if self.op.node_names:
4084       if (self.op.command in self._SKIP_MASTER and
4085           self.master_node in self.op.node_names):
4086         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4087         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4088
4089         if master_oob_handler:
4090           additional_text = ("run '%s %s %s' if you want to operate on the"
4091                              " master regardless") % (master_oob_handler,
4092                                                       self.op.command,
4093                                                       self.master_node)
4094         else:
4095           additional_text = "it does not support out-of-band operations"
4096
4097         raise errors.OpPrereqError(("Operating on the master node %s is not"
4098                                     " allowed for %s; %s") %
4099                                    (self.master_node, self.op.command,
4100                                     additional_text), errors.ECODE_INVAL)
4101     else:
4102       self.op.node_names = self.cfg.GetNodeList()
4103       if self.op.command in self._SKIP_MASTER:
4104         self.op.node_names.remove(self.master_node)
4105
4106     if self.op.command in self._SKIP_MASTER:
4107       assert self.master_node not in self.op.node_names
4108
4109     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4110       if node is None:
4111         raise errors.OpPrereqError("Node %s not found" % node_name,
4112                                    errors.ECODE_NOENT)
4113       else:
4114         self.nodes.append(node)
4115
4116       if (not self.op.ignore_status and
4117           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4118         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4119                                     " not marked offline") % node_name,
4120                                    errors.ECODE_STATE)
4121
4122   def Exec(self, feedback_fn):
4123     """Execute OOB and return result if we expect any.
4124
4125     """
4126     master_node = self.master_node
4127     ret = []
4128
4129     for idx, node in enumerate(utils.NiceSort(self.nodes,
4130                                               key=lambda node: node.name)):
4131       node_entry = [(constants.RS_NORMAL, node.name)]
4132       ret.append(node_entry)
4133
4134       oob_program = _SupportsOob(self.cfg, node)
4135
4136       if not oob_program:
4137         node_entry.append((constants.RS_UNAVAIL, None))
4138         continue
4139
4140       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4141                    self.op.command, oob_program, node.name)
4142       result = self.rpc.call_run_oob(master_node, oob_program,
4143                                      self.op.command, node.name,
4144                                      self.op.timeout)
4145
4146       if result.fail_msg:
4147         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4148                         node.name, result.fail_msg)
4149         node_entry.append((constants.RS_NODATA, None))
4150       else:
4151         try:
4152           self._CheckPayload(result)
4153         except errors.OpExecError, err:
4154           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4155                           node.name, err)
4156           node_entry.append((constants.RS_NODATA, None))
4157         else:
4158           if self.op.command == constants.OOB_HEALTH:
4159             # For health we should log important events
4160             for item, status in result.payload:
4161               if status in [constants.OOB_STATUS_WARNING,
4162                             constants.OOB_STATUS_CRITICAL]:
4163                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4164                                 item, node.name, status)
4165
4166           if self.op.command == constants.OOB_POWER_ON:
4167             node.powered = True
4168           elif self.op.command == constants.OOB_POWER_OFF:
4169             node.powered = False
4170           elif self.op.command == constants.OOB_POWER_STATUS:
4171             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4172             if powered != node.powered:
4173               logging.warning(("Recorded power state (%s) of node '%s' does not"
4174                                " match actual power state (%s)"), node.powered,
4175                               node.name, powered)
4176
4177           # For configuration changing commands we should update the node
4178           if self.op.command in (constants.OOB_POWER_ON,
4179                                  constants.OOB_POWER_OFF):
4180             self.cfg.Update(node, feedback_fn)
4181
4182           node_entry.append((constants.RS_NORMAL, result.payload))
4183
4184           if (self.op.command == constants.OOB_POWER_ON and
4185               idx < len(self.nodes) - 1):
4186             time.sleep(self.op.power_delay)
4187
4188     return ret
4189
4190   def _CheckPayload(self, result):
4191     """Checks if the payload is valid.
4192
4193     @param result: RPC result
4194     @raises errors.OpExecError: If payload is not valid
4195
4196     """
4197     errs = []
4198     if self.op.command == constants.OOB_HEALTH:
4199       if not isinstance(result.payload, list):
4200         errs.append("command 'health' is expected to return a list but got %s" %
4201                     type(result.payload))
4202       else:
4203         for item, status in result.payload:
4204           if status not in constants.OOB_STATUSES:
4205             errs.append("health item '%s' has invalid status '%s'" %
4206                         (item, status))
4207
4208     if self.op.command == constants.OOB_POWER_STATUS:
4209       if not isinstance(result.payload, dict):
4210         errs.append("power-status is expected to return a dict but got %s" %
4211                     type(result.payload))
4212
4213     if self.op.command in [
4214         constants.OOB_POWER_ON,
4215         constants.OOB_POWER_OFF,
4216         constants.OOB_POWER_CYCLE,
4217         ]:
4218       if result.payload is not None:
4219         errs.append("%s is expected to not return payload but got '%s'" %
4220                     (self.op.command, result.payload))
4221
4222     if errs:
4223       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4224                                utils.CommaJoin(errs))
4225
4226
4227 class _OsQuery(_QueryBase):
4228   FIELDS = query.OS_FIELDS
4229
4230   def ExpandNames(self, lu):
4231     # Lock all nodes in shared mode
4232     # Temporary removal of locks, should be reverted later
4233     # TODO: reintroduce locks when they are lighter-weight
4234     lu.needed_locks = {}
4235     #self.share_locks[locking.LEVEL_NODE] = 1
4236     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4237
4238     # The following variables interact with _QueryBase._GetNames
4239     if self.names:
4240       self.wanted = self.names
4241     else:
4242       self.wanted = locking.ALL_SET
4243
4244     self.do_locking = self.use_locking
4245
4246   def DeclareLocks(self, lu, level):
4247     pass
4248
4249   @staticmethod
4250   def _DiagnoseByOS(rlist):
4251     """Remaps a per-node return list into an a per-os per-node dictionary
4252
4253     @param rlist: a map with node names as keys and OS objects as values
4254
4255     @rtype: dict
4256     @return: a dictionary with osnames as keys and as value another
4257         map, with nodes as keys and tuples of (path, status, diagnose,
4258         variants, parameters, api_versions) as values, eg::
4259
4260           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4261                                      (/srv/..., False, "invalid api")],
4262                            "node2": [(/srv/..., True, "", [], [])]}
4263           }
4264
4265     """
4266     all_os = {}
4267     # we build here the list of nodes that didn't fail the RPC (at RPC
4268     # level), so that nodes with a non-responding node daemon don't
4269     # make all OSes invalid
4270     good_nodes = [node_name for node_name in rlist
4271                   if not rlist[node_name].fail_msg]
4272     for node_name, nr in rlist.items():
4273       if nr.fail_msg or not nr.payload:
4274         continue
4275       for (name, path, status, diagnose, variants,
4276            params, api_versions) in nr.payload:
4277         if name not in all_os:
4278           # build a list of nodes for this os containing empty lists
4279           # for each node in node_list
4280           all_os[name] = {}
4281           for nname in good_nodes:
4282             all_os[name][nname] = []
4283         # convert params from [name, help] to (name, help)
4284         params = [tuple(v) for v in params]
4285         all_os[name][node_name].append((path, status, diagnose,
4286                                         variants, params, api_versions))
4287     return all_os
4288
4289   def _GetQueryData(self, lu):
4290     """Computes the list of nodes and their attributes.
4291
4292     """
4293     # Locking is not used
4294     assert not (compat.any(lu.glm.is_owned(level)
4295                            for level in locking.LEVELS
4296                            if level != locking.LEVEL_CLUSTER) or
4297                 self.do_locking or self.use_locking)
4298
4299     valid_nodes = [node.name
4300                    for node in lu.cfg.GetAllNodesInfo().values()
4301                    if not node.offline and node.vm_capable]
4302     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4303     cluster = lu.cfg.GetClusterInfo()
4304
4305     data = {}
4306
4307     for (os_name, os_data) in pol.items():
4308       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4309                           hidden=(os_name in cluster.hidden_os),
4310                           blacklisted=(os_name in cluster.blacklisted_os))
4311
4312       variants = set()
4313       parameters = set()
4314       api_versions = set()
4315
4316       for idx, osl in enumerate(os_data.values()):
4317         info.valid = bool(info.valid and osl and osl[0][1])
4318         if not info.valid:
4319           break
4320
4321         (node_variants, node_params, node_api) = osl[0][3:6]
4322         if idx == 0:
4323           # First entry
4324           variants.update(node_variants)
4325           parameters.update(node_params)
4326           api_versions.update(node_api)
4327         else:
4328           # Filter out inconsistent values
4329           variants.intersection_update(node_variants)
4330           parameters.intersection_update(node_params)
4331           api_versions.intersection_update(node_api)
4332
4333       info.variants = list(variants)
4334       info.parameters = list(parameters)
4335       info.api_versions = list(api_versions)
4336
4337       data[os_name] = info
4338
4339     # Prepare data in requested order
4340     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4341             if name in data]
4342
4343
4344 class LUOsDiagnose(NoHooksLU):
4345   """Logical unit for OS diagnose/query.
4346
4347   """
4348   REQ_BGL = False
4349
4350   @staticmethod
4351   def _BuildFilter(fields, names):
4352     """Builds a filter for querying OSes.
4353
4354     """
4355     name_filter = qlang.MakeSimpleFilter("name", names)
4356
4357     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4358     # respective field is not requested
4359     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4360                      for fname in ["hidden", "blacklisted"]
4361                      if fname not in fields]
4362     if "valid" not in fields:
4363       status_filter.append([qlang.OP_TRUE, "valid"])
4364
4365     if status_filter:
4366       status_filter.insert(0, qlang.OP_AND)
4367     else:
4368       status_filter = None
4369
4370     if name_filter and status_filter:
4371       return [qlang.OP_AND, name_filter, status_filter]
4372     elif name_filter:
4373       return name_filter
4374     else:
4375       return status_filter
4376
4377   def CheckArguments(self):
4378     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4379                        self.op.output_fields, False)
4380
4381   def ExpandNames(self):
4382     self.oq.ExpandNames(self)
4383
4384   def Exec(self, feedback_fn):
4385     return self.oq.OldStyleQuery(self)
4386
4387
4388 class LUNodeRemove(LogicalUnit):
4389   """Logical unit for removing a node.
4390
4391   """
4392   HPATH = "node-remove"
4393   HTYPE = constants.HTYPE_NODE
4394
4395   def BuildHooksEnv(self):
4396     """Build hooks env.
4397
4398     This doesn't run on the target node in the pre phase as a failed
4399     node would then be impossible to remove.
4400
4401     """
4402     return {
4403       "OP_TARGET": self.op.node_name,
4404       "NODE_NAME": self.op.node_name,
4405       }
4406
4407   def BuildHooksNodes(self):
4408     """Build hooks nodes.
4409
4410     """
4411     all_nodes = self.cfg.GetNodeList()
4412     try:
4413       all_nodes.remove(self.op.node_name)
4414     except ValueError:
4415       logging.warning("Node '%s', which is about to be removed, was not found"
4416                       " in the list of all nodes", self.op.node_name)
4417     return (all_nodes, all_nodes)
4418
4419   def CheckPrereq(self):
4420     """Check prerequisites.
4421
4422     This checks:
4423      - the node exists in the configuration
4424      - it does not have primary or secondary instances
4425      - it's not the master
4426
4427     Any errors are signaled by raising errors.OpPrereqError.
4428
4429     """
4430     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4431     node = self.cfg.GetNodeInfo(self.op.node_name)
4432     assert node is not None
4433
4434     masternode = self.cfg.GetMasterNode()
4435     if node.name == masternode:
4436       raise errors.OpPrereqError("Node is the master node, failover to another"
4437                                  " node is required", errors.ECODE_INVAL)
4438
4439     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4440       if node.name in instance.all_nodes:
4441         raise errors.OpPrereqError("Instance %s is still running on the node,"
4442                                    " please remove first" % instance_name,
4443                                    errors.ECODE_INVAL)
4444     self.op.node_name = node.name
4445     self.node = node
4446
4447   def Exec(self, feedback_fn):
4448     """Removes the node from the cluster.
4449
4450     """
4451     node = self.node
4452     logging.info("Stopping the node daemon and removing configs from node %s",
4453                  node.name)
4454
4455     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4456
4457     # Promote nodes to master candidate as needed
4458     _AdjustCandidatePool(self, exceptions=[node.name])
4459     self.context.RemoveNode(node.name)
4460
4461     # Run post hooks on the node before it's removed
4462     _RunPostHook(self, node.name)
4463
4464     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4465     msg = result.fail_msg
4466     if msg:
4467       self.LogWarning("Errors encountered on the remote node while leaving"
4468                       " the cluster: %s", msg)
4469
4470     # Remove node from our /etc/hosts
4471     if self.cfg.GetClusterInfo().modify_etc_hosts:
4472       master_node = self.cfg.GetMasterNode()
4473       result = self.rpc.call_etc_hosts_modify(master_node,
4474                                               constants.ETC_HOSTS_REMOVE,
4475                                               node.name, None)
4476       result.Raise("Can't update hosts file with new host data")
4477       _RedistributeAncillaryFiles(self)
4478
4479
4480 class _NodeQuery(_QueryBase):
4481   FIELDS = query.NODE_FIELDS
4482
4483   def ExpandNames(self, lu):
4484     lu.needed_locks = {}
4485     lu.share_locks = _ShareAll()
4486
4487     if self.names:
4488       self.wanted = _GetWantedNodes(lu, self.names)
4489     else:
4490       self.wanted = locking.ALL_SET
4491
4492     self.do_locking = (self.use_locking and
4493                        query.NQ_LIVE in self.requested_data)
4494
4495     if self.do_locking:
4496       # If any non-static field is requested we need to lock the nodes
4497       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4498
4499   def DeclareLocks(self, lu, level):
4500     pass
4501
4502   def _GetQueryData(self, lu):
4503     """Computes the list of nodes and their attributes.
4504
4505     """
4506     all_info = lu.cfg.GetAllNodesInfo()
4507
4508     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4509
4510     # Gather data as requested
4511     if query.NQ_LIVE in self.requested_data:
4512       # filter out non-vm_capable nodes
4513       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4514
4515       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4516                                         lu.cfg.GetHypervisorType())
4517       live_data = dict((name, nresult.payload)
4518                        for (name, nresult) in node_data.items()
4519                        if not nresult.fail_msg and nresult.payload)
4520     else:
4521       live_data = None
4522
4523     if query.NQ_INST in self.requested_data:
4524       node_to_primary = dict([(name, set()) for name in nodenames])
4525       node_to_secondary = dict([(name, set()) for name in nodenames])
4526
4527       inst_data = lu.cfg.GetAllInstancesInfo()
4528
4529       for inst in inst_data.values():
4530         if inst.primary_node in node_to_primary:
4531           node_to_primary[inst.primary_node].add(inst.name)
4532         for secnode in inst.secondary_nodes:
4533           if secnode in node_to_secondary:
4534             node_to_secondary[secnode].add(inst.name)
4535     else:
4536       node_to_primary = None
4537       node_to_secondary = None
4538
4539     if query.NQ_OOB in self.requested_data:
4540       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4541                          for name, node in all_info.iteritems())
4542     else:
4543       oob_support = None
4544
4545     if query.NQ_GROUP in self.requested_data:
4546       groups = lu.cfg.GetAllNodeGroupsInfo()
4547     else:
4548       groups = {}
4549
4550     return query.NodeQueryData([all_info[name] for name in nodenames],
4551                                live_data, lu.cfg.GetMasterNode(),
4552                                node_to_primary, node_to_secondary, groups,
4553                                oob_support, lu.cfg.GetClusterInfo())
4554
4555
4556 class LUNodeQuery(NoHooksLU):
4557   """Logical unit for querying nodes.
4558
4559   """
4560   # pylint: disable=W0142
4561   REQ_BGL = False
4562
4563   def CheckArguments(self):
4564     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4565                          self.op.output_fields, self.op.use_locking)
4566
4567   def ExpandNames(self):
4568     self.nq.ExpandNames(self)
4569
4570   def Exec(self, feedback_fn):
4571     return self.nq.OldStyleQuery(self)
4572
4573
4574 class LUNodeQueryvols(NoHooksLU):
4575   """Logical unit for getting volumes on node(s).
4576
4577   """
4578   REQ_BGL = False
4579   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4580   _FIELDS_STATIC = utils.FieldSet("node")
4581
4582   def CheckArguments(self):
4583     _CheckOutputFields(static=self._FIELDS_STATIC,
4584                        dynamic=self._FIELDS_DYNAMIC,
4585                        selected=self.op.output_fields)
4586
4587   def ExpandNames(self):
4588     self.needed_locks = {}
4589     self.share_locks[locking.LEVEL_NODE] = 1
4590     if not self.op.nodes:
4591       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4592     else:
4593       self.needed_locks[locking.LEVEL_NODE] = \
4594         _GetWantedNodes(self, self.op.nodes)
4595
4596   def Exec(self, feedback_fn):
4597     """Computes the list of nodes and their attributes.
4598
4599     """
4600     nodenames = self.owned_locks(locking.LEVEL_NODE)
4601     volumes = self.rpc.call_node_volumes(nodenames)
4602
4603     ilist = self.cfg.GetAllInstancesInfo()
4604     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4605
4606     output = []
4607     for node in nodenames:
4608       nresult = volumes[node]
4609       if nresult.offline:
4610         continue
4611       msg = nresult.fail_msg
4612       if msg:
4613         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4614         continue
4615
4616       node_vols = sorted(nresult.payload,
4617                          key=operator.itemgetter("dev"))
4618
4619       for vol in node_vols:
4620         node_output = []
4621         for field in self.op.output_fields:
4622           if field == "node":
4623             val = node
4624           elif field == "phys":
4625             val = vol["dev"]
4626           elif field == "vg":
4627             val = vol["vg"]
4628           elif field == "name":
4629             val = vol["name"]
4630           elif field == "size":
4631             val = int(float(vol["size"]))
4632           elif field == "instance":
4633             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4634           else:
4635             raise errors.ParameterError(field)
4636           node_output.append(str(val))
4637
4638         output.append(node_output)
4639
4640     return output
4641
4642
4643 class LUNodeQueryStorage(NoHooksLU):
4644   """Logical unit for getting information on storage units on node(s).
4645
4646   """
4647   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4648   REQ_BGL = False
4649
4650   def CheckArguments(self):
4651     _CheckOutputFields(static=self._FIELDS_STATIC,
4652                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4653                        selected=self.op.output_fields)
4654
4655   def ExpandNames(self):
4656     self.needed_locks = {}
4657     self.share_locks[locking.LEVEL_NODE] = 1
4658
4659     if self.op.nodes:
4660       self.needed_locks[locking.LEVEL_NODE] = \
4661         _GetWantedNodes(self, self.op.nodes)
4662     else:
4663       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4664
4665   def Exec(self, feedback_fn):
4666     """Computes the list of nodes and their attributes.
4667
4668     """
4669     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4670
4671     # Always get name to sort by
4672     if constants.SF_NAME in self.op.output_fields:
4673       fields = self.op.output_fields[:]
4674     else:
4675       fields = [constants.SF_NAME] + self.op.output_fields
4676
4677     # Never ask for node or type as it's only known to the LU
4678     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4679       while extra in fields:
4680         fields.remove(extra)
4681
4682     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4683     name_idx = field_idx[constants.SF_NAME]
4684
4685     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4686     data = self.rpc.call_storage_list(self.nodes,
4687                                       self.op.storage_type, st_args,
4688                                       self.op.name, fields)
4689
4690     result = []
4691
4692     for node in utils.NiceSort(self.nodes):
4693       nresult = data[node]
4694       if nresult.offline:
4695         continue
4696
4697       msg = nresult.fail_msg
4698       if msg:
4699         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4700         continue
4701
4702       rows = dict([(row[name_idx], row) for row in nresult.payload])
4703
4704       for name in utils.NiceSort(rows.keys()):
4705         row = rows[name]
4706
4707         out = []
4708
4709         for field in self.op.output_fields:
4710           if field == constants.SF_NODE:
4711             val = node
4712           elif field == constants.SF_TYPE:
4713             val = self.op.storage_type
4714           elif field in field_idx:
4715             val = row[field_idx[field]]
4716           else:
4717             raise errors.ParameterError(field)
4718
4719           out.append(val)
4720
4721         result.append(out)
4722
4723     return result
4724
4725
4726 class _InstanceQuery(_QueryBase):
4727   FIELDS = query.INSTANCE_FIELDS
4728
4729   def ExpandNames(self, lu):
4730     lu.needed_locks = {}
4731     lu.share_locks = _ShareAll()
4732
4733     if self.names:
4734       self.wanted = _GetWantedInstances(lu, self.names)
4735     else:
4736       self.wanted = locking.ALL_SET
4737
4738     self.do_locking = (self.use_locking and
4739                        query.IQ_LIVE in self.requested_data)
4740     if self.do_locking:
4741       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4742       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4743       lu.needed_locks[locking.LEVEL_NODE] = []
4744       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4745
4746     self.do_grouplocks = (self.do_locking and
4747                           query.IQ_NODES in self.requested_data)
4748
4749   def DeclareLocks(self, lu, level):
4750     if self.do_locking:
4751       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4752         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4753
4754         # Lock all groups used by instances optimistically; this requires going
4755         # via the node before it's locked, requiring verification later on
4756         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4757           set(group_uuid
4758               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4759               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4760       elif level == locking.LEVEL_NODE:
4761         lu._LockInstancesNodes() # pylint: disable=W0212
4762
4763   @staticmethod
4764   def _CheckGroupLocks(lu):
4765     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4766     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4767
4768     # Check if node groups for locked instances are still correct
4769     for instance_name in owned_instances:
4770       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4771
4772   def _GetQueryData(self, lu):
4773     """Computes the list of instances and their attributes.
4774
4775     """
4776     if self.do_grouplocks:
4777       self._CheckGroupLocks(lu)
4778
4779     cluster = lu.cfg.GetClusterInfo()
4780     all_info = lu.cfg.GetAllInstancesInfo()
4781
4782     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4783
4784     instance_list = [all_info[name] for name in instance_names]
4785     nodes = frozenset(itertools.chain(*(inst.all_nodes
4786                                         for inst in instance_list)))
4787     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4788     bad_nodes = []
4789     offline_nodes = []
4790     wrongnode_inst = set()
4791
4792     # Gather data as requested
4793     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4794       live_data = {}
4795       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4796       for name in nodes:
4797         result = node_data[name]
4798         if result.offline:
4799           # offline nodes will be in both lists
4800           assert result.fail_msg
4801           offline_nodes.append(name)
4802         if result.fail_msg:
4803           bad_nodes.append(name)
4804         elif result.payload:
4805           for inst in result.payload:
4806             if inst in all_info:
4807               if all_info[inst].primary_node == name:
4808                 live_data.update(result.payload)
4809               else:
4810                 wrongnode_inst.add(inst)
4811             else:
4812               # orphan instance; we don't list it here as we don't
4813               # handle this case yet in the output of instance listing
4814               logging.warning("Orphan instance '%s' found on node %s",
4815                               inst, name)
4816         # else no instance is alive
4817     else:
4818       live_data = {}
4819
4820     if query.IQ_DISKUSAGE in self.requested_data:
4821       disk_usage = dict((inst.name,
4822                          _ComputeDiskSize(inst.disk_template,
4823                                           [{constants.IDISK_SIZE: disk.size}
4824                                            for disk in inst.disks]))
4825                         for inst in instance_list)
4826     else:
4827       disk_usage = None
4828
4829     if query.IQ_CONSOLE in self.requested_data:
4830       consinfo = {}
4831       for inst in instance_list:
4832         if inst.name in live_data:
4833           # Instance is running
4834           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4835         else:
4836           consinfo[inst.name] = None
4837       assert set(consinfo.keys()) == set(instance_names)
4838     else:
4839       consinfo = None
4840
4841     if query.IQ_NODES in self.requested_data:
4842       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4843                                             instance_list)))
4844       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4845       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4846                     for uuid in set(map(operator.attrgetter("group"),
4847                                         nodes.values())))
4848     else:
4849       nodes = None
4850       groups = None
4851
4852     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4853                                    disk_usage, offline_nodes, bad_nodes,
4854                                    live_data, wrongnode_inst, consinfo,
4855                                    nodes, groups)
4856
4857
4858 class LUQuery(NoHooksLU):
4859   """Query for resources/items of a certain kind.
4860
4861   """
4862   # pylint: disable=W0142
4863   REQ_BGL = False
4864
4865   def CheckArguments(self):
4866     qcls = _GetQueryImplementation(self.op.what)
4867
4868     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4869
4870   def ExpandNames(self):
4871     self.impl.ExpandNames(self)
4872
4873   def DeclareLocks(self, level):
4874     self.impl.DeclareLocks(self, level)
4875
4876   def Exec(self, feedback_fn):
4877     return self.impl.NewStyleQuery(self)
4878
4879
4880 class LUQueryFields(NoHooksLU):
4881   """Query for resources/items of a certain kind.
4882
4883   """
4884   # pylint: disable=W0142
4885   REQ_BGL = False
4886
4887   def CheckArguments(self):
4888     self.qcls = _GetQueryImplementation(self.op.what)
4889
4890   def ExpandNames(self):
4891     self.needed_locks = {}
4892
4893   def Exec(self, feedback_fn):
4894     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4895
4896
4897 class LUNodeModifyStorage(NoHooksLU):
4898   """Logical unit for modifying a storage volume on a node.
4899
4900   """
4901   REQ_BGL = False
4902
4903   def CheckArguments(self):
4904     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4905
4906     storage_type = self.op.storage_type
4907
4908     try:
4909       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4910     except KeyError:
4911       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4912                                  " modified" % storage_type,
4913                                  errors.ECODE_INVAL)
4914
4915     diff = set(self.op.changes.keys()) - modifiable
4916     if diff:
4917       raise errors.OpPrereqError("The following fields can not be modified for"
4918                                  " storage units of type '%s': %r" %
4919                                  (storage_type, list(diff)),
4920                                  errors.ECODE_INVAL)
4921
4922   def ExpandNames(self):
4923     self.needed_locks = {
4924       locking.LEVEL_NODE: self.op.node_name,
4925       }
4926
4927   def Exec(self, feedback_fn):
4928     """Computes the list of nodes and their attributes.
4929
4930     """
4931     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4932     result = self.rpc.call_storage_modify(self.op.node_name,
4933                                           self.op.storage_type, st_args,
4934                                           self.op.name, self.op.changes)
4935     result.Raise("Failed to modify storage unit '%s' on %s" %
4936                  (self.op.name, self.op.node_name))
4937
4938
4939 class LUNodeAdd(LogicalUnit):
4940   """Logical unit for adding node to the cluster.
4941
4942   """
4943   HPATH = "node-add"
4944   HTYPE = constants.HTYPE_NODE
4945   _NFLAGS = ["master_capable", "vm_capable"]
4946
4947   def CheckArguments(self):
4948     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4949     # validate/normalize the node name
4950     self.hostname = netutils.GetHostname(name=self.op.node_name,
4951                                          family=self.primary_ip_family)
4952     self.op.node_name = self.hostname.name
4953
4954     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4955       raise errors.OpPrereqError("Cannot readd the master node",
4956                                  errors.ECODE_STATE)
4957
4958     if self.op.readd and self.op.group:
4959       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4960                                  " being readded", errors.ECODE_INVAL)
4961
4962   def BuildHooksEnv(self):
4963     """Build hooks env.
4964
4965     This will run on all nodes before, and on all nodes + the new node after.
4966
4967     """
4968     return {
4969       "OP_TARGET": self.op.node_name,
4970       "NODE_NAME": self.op.node_name,
4971       "NODE_PIP": self.op.primary_ip,
4972       "NODE_SIP": self.op.secondary_ip,
4973       "MASTER_CAPABLE": str(self.op.master_capable),
4974       "VM_CAPABLE": str(self.op.vm_capable),
4975       }
4976
4977   def BuildHooksNodes(self):
4978     """Build hooks nodes.
4979
4980     """
4981     # Exclude added node
4982     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4983     post_nodes = pre_nodes + [self.op.node_name, ]
4984
4985     return (pre_nodes, post_nodes)
4986
4987   def CheckPrereq(self):
4988     """Check prerequisites.
4989
4990     This checks:
4991      - the new node is not already in the config
4992      - it is resolvable
4993      - its parameters (single/dual homed) matches the cluster
4994
4995     Any errors are signaled by raising errors.OpPrereqError.
4996
4997     """
4998     cfg = self.cfg
4999     hostname = self.hostname
5000     node = hostname.name
5001     primary_ip = self.op.primary_ip = hostname.ip
5002     if self.op.secondary_ip is None:
5003       if self.primary_ip_family == netutils.IP6Address.family:
5004         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5005                                    " IPv4 address must be given as secondary",
5006                                    errors.ECODE_INVAL)
5007       self.op.secondary_ip = primary_ip
5008
5009     secondary_ip = self.op.secondary_ip
5010     if not netutils.IP4Address.IsValid(secondary_ip):
5011       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5012                                  " address" % secondary_ip, errors.ECODE_INVAL)
5013
5014     node_list = cfg.GetNodeList()
5015     if not self.op.readd and node in node_list:
5016       raise errors.OpPrereqError("Node %s is already in the configuration" %
5017                                  node, errors.ECODE_EXISTS)
5018     elif self.op.readd and node not in node_list:
5019       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5020                                  errors.ECODE_NOENT)
5021
5022     self.changed_primary_ip = False
5023
5024     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5025       if self.op.readd and node == existing_node_name:
5026         if existing_node.secondary_ip != secondary_ip:
5027           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5028                                      " address configuration as before",
5029                                      errors.ECODE_INVAL)
5030         if existing_node.primary_ip != primary_ip:
5031           self.changed_primary_ip = True
5032
5033         continue
5034
5035       if (existing_node.primary_ip == primary_ip or
5036           existing_node.secondary_ip == primary_ip or
5037           existing_node.primary_ip == secondary_ip or
5038           existing_node.secondary_ip == secondary_ip):
5039         raise errors.OpPrereqError("New node ip address(es) conflict with"
5040                                    " existing node %s" % existing_node.name,
5041                                    errors.ECODE_NOTUNIQUE)
5042
5043     # After this 'if' block, None is no longer a valid value for the
5044     # _capable op attributes
5045     if self.op.readd:
5046       old_node = self.cfg.GetNodeInfo(node)
5047       assert old_node is not None, "Can't retrieve locked node %s" % node
5048       for attr in self._NFLAGS:
5049         if getattr(self.op, attr) is None:
5050           setattr(self.op, attr, getattr(old_node, attr))
5051     else:
5052       for attr in self._NFLAGS:
5053         if getattr(self.op, attr) is None:
5054           setattr(self.op, attr, True)
5055
5056     if self.op.readd and not self.op.vm_capable:
5057       pri, sec = cfg.GetNodeInstances(node)
5058       if pri or sec:
5059         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5060                                    " flag set to false, but it already holds"
5061                                    " instances" % node,
5062                                    errors.ECODE_STATE)
5063
5064     # check that the type of the node (single versus dual homed) is the
5065     # same as for the master
5066     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5067     master_singlehomed = myself.secondary_ip == myself.primary_ip
5068     newbie_singlehomed = secondary_ip == primary_ip
5069     if master_singlehomed != newbie_singlehomed:
5070       if master_singlehomed:
5071         raise errors.OpPrereqError("The master has no secondary ip but the"
5072                                    " new node has one",
5073                                    errors.ECODE_INVAL)
5074       else:
5075         raise errors.OpPrereqError("The master has a secondary ip but the"
5076                                    " new node doesn't have one",
5077                                    errors.ECODE_INVAL)
5078
5079     # checks reachability
5080     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5081       raise errors.OpPrereqError("Node not reachable by ping",
5082                                  errors.ECODE_ENVIRON)
5083
5084     if not newbie_singlehomed:
5085       # check reachability from my secondary ip to newbie's secondary ip
5086       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5087                            source=myself.secondary_ip):
5088         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5089                                    " based ping to node daemon port",
5090                                    errors.ECODE_ENVIRON)
5091
5092     if self.op.readd:
5093       exceptions = [node]
5094     else:
5095       exceptions = []
5096
5097     if self.op.master_capable:
5098       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5099     else:
5100       self.master_candidate = False
5101
5102     if self.op.readd:
5103       self.new_node = old_node
5104     else:
5105       node_group = cfg.LookupNodeGroup(self.op.group)
5106       self.new_node = objects.Node(name=node,
5107                                    primary_ip=primary_ip,
5108                                    secondary_ip=secondary_ip,
5109                                    master_candidate=self.master_candidate,
5110                                    offline=False, drained=False,
5111                                    group=node_group)
5112
5113     if self.op.ndparams:
5114       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5115
5116   def Exec(self, feedback_fn):
5117     """Adds the new node to the cluster.
5118
5119     """
5120     new_node = self.new_node
5121     node = new_node.name
5122
5123     # We adding a new node so we assume it's powered
5124     new_node.powered = True
5125
5126     # for re-adds, reset the offline/drained/master-candidate flags;
5127     # we need to reset here, otherwise offline would prevent RPC calls
5128     # later in the procedure; this also means that if the re-add
5129     # fails, we are left with a non-offlined, broken node
5130     if self.op.readd:
5131       new_node.drained = new_node.offline = False # pylint: disable=W0201
5132       self.LogInfo("Readding a node, the offline/drained flags were reset")
5133       # if we demote the node, we do cleanup later in the procedure
5134       new_node.master_candidate = self.master_candidate
5135       if self.changed_primary_ip:
5136         new_node.primary_ip = self.op.primary_ip
5137
5138     # copy the master/vm_capable flags
5139     for attr in self._NFLAGS:
5140       setattr(new_node, attr, getattr(self.op, attr))
5141
5142     # notify the user about any possible mc promotion
5143     if new_node.master_candidate:
5144       self.LogInfo("Node will be a master candidate")
5145
5146     if self.op.ndparams:
5147       new_node.ndparams = self.op.ndparams
5148     else:
5149       new_node.ndparams = {}
5150
5151     # check connectivity
5152     result = self.rpc.call_version([node])[node]
5153     result.Raise("Can't get version information from node %s" % node)
5154     if constants.PROTOCOL_VERSION == result.payload:
5155       logging.info("Communication to node %s fine, sw version %s match",
5156                    node, result.payload)
5157     else:
5158       raise errors.OpExecError("Version mismatch master version %s,"
5159                                " node version %s" %
5160                                (constants.PROTOCOL_VERSION, result.payload))
5161
5162     # Add node to our /etc/hosts, and add key to known_hosts
5163     if self.cfg.GetClusterInfo().modify_etc_hosts:
5164       master_node = self.cfg.GetMasterNode()
5165       result = self.rpc.call_etc_hosts_modify(master_node,
5166                                               constants.ETC_HOSTS_ADD,
5167                                               self.hostname.name,
5168                                               self.hostname.ip)
5169       result.Raise("Can't update hosts file with new host data")
5170
5171     if new_node.secondary_ip != new_node.primary_ip:
5172       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5173                                False)
5174
5175     node_verify_list = [self.cfg.GetMasterNode()]
5176     node_verify_param = {
5177       constants.NV_NODELIST: ([node], {}),
5178       # TODO: do a node-net-test as well?
5179     }
5180
5181     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5182                                        self.cfg.GetClusterName())
5183     for verifier in node_verify_list:
5184       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5185       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5186       if nl_payload:
5187         for failed in nl_payload:
5188           feedback_fn("ssh/hostname verification failed"
5189                       " (checking from %s): %s" %
5190                       (verifier, nl_payload[failed]))
5191         raise errors.OpExecError("ssh/hostname verification failed")
5192
5193     if self.op.readd:
5194       _RedistributeAncillaryFiles(self)
5195       self.context.ReaddNode(new_node)
5196       # make sure we redistribute the config
5197       self.cfg.Update(new_node, feedback_fn)
5198       # and make sure the new node will not have old files around
5199       if not new_node.master_candidate:
5200         result = self.rpc.call_node_demote_from_mc(new_node.name)
5201         msg = result.fail_msg
5202         if msg:
5203           self.LogWarning("Node failed to demote itself from master"
5204                           " candidate status: %s" % msg)
5205     else:
5206       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5207                                   additional_vm=self.op.vm_capable)
5208       self.context.AddNode(new_node, self.proc.GetECId())
5209
5210
5211 class LUNodeSetParams(LogicalUnit):
5212   """Modifies the parameters of a node.
5213
5214   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5215       to the node role (as _ROLE_*)
5216   @cvar _R2F: a dictionary from node role to tuples of flags
5217   @cvar _FLAGS: a list of attribute names corresponding to the flags
5218
5219   """
5220   HPATH = "node-modify"
5221   HTYPE = constants.HTYPE_NODE
5222   REQ_BGL = False
5223   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5224   _F2R = {
5225     (True, False, False): _ROLE_CANDIDATE,
5226     (False, True, False): _ROLE_DRAINED,
5227     (False, False, True): _ROLE_OFFLINE,
5228     (False, False, False): _ROLE_REGULAR,
5229     }
5230   _R2F = dict((v, k) for k, v in _F2R.items())
5231   _FLAGS = ["master_candidate", "drained", "offline"]
5232
5233   def CheckArguments(self):
5234     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5235     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5236                 self.op.master_capable, self.op.vm_capable,
5237                 self.op.secondary_ip, self.op.ndparams]
5238     if all_mods.count(None) == len(all_mods):
5239       raise errors.OpPrereqError("Please pass at least one modification",
5240                                  errors.ECODE_INVAL)
5241     if all_mods.count(True) > 1:
5242       raise errors.OpPrereqError("Can't set the node into more than one"
5243                                  " state at the same time",
5244                                  errors.ECODE_INVAL)
5245
5246     # Boolean value that tells us whether we might be demoting from MC
5247     self.might_demote = (self.op.master_candidate == False or
5248                          self.op.offline == True or
5249                          self.op.drained == True or
5250                          self.op.master_capable == False)
5251
5252     if self.op.secondary_ip:
5253       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5254         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5255                                    " address" % self.op.secondary_ip,
5256                                    errors.ECODE_INVAL)
5257
5258     self.lock_all = self.op.auto_promote and self.might_demote
5259     self.lock_instances = self.op.secondary_ip is not None
5260
5261   def ExpandNames(self):
5262     if self.lock_all:
5263       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5264     else:
5265       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5266
5267     if self.lock_instances:
5268       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5269
5270   def DeclareLocks(self, level):
5271     # If we have locked all instances, before waiting to lock nodes, release
5272     # all the ones living on nodes unrelated to the current operation.
5273     if level == locking.LEVEL_NODE and self.lock_instances:
5274       self.affected_instances = []
5275       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5276         instances_keep = []
5277
5278         # Build list of instances to release
5279         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5280         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5281           if (instance.disk_template in constants.DTS_INT_MIRROR and
5282               self.op.node_name in instance.all_nodes):
5283             instances_keep.append(instance_name)
5284             self.affected_instances.append(instance)
5285
5286         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5287
5288         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5289                 set(instances_keep))
5290
5291   def BuildHooksEnv(self):
5292     """Build hooks env.
5293
5294     This runs on the master node.
5295
5296     """
5297     return {
5298       "OP_TARGET": self.op.node_name,
5299       "MASTER_CANDIDATE": str(self.op.master_candidate),
5300       "OFFLINE": str(self.op.offline),
5301       "DRAINED": str(self.op.drained),
5302       "MASTER_CAPABLE": str(self.op.master_capable),
5303       "VM_CAPABLE": str(self.op.vm_capable),
5304       }
5305
5306   def BuildHooksNodes(self):
5307     """Build hooks nodes.
5308
5309     """
5310     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5311     return (nl, nl)
5312
5313   def CheckPrereq(self):
5314     """Check prerequisites.
5315
5316     This only checks the instance list against the existing names.
5317
5318     """
5319     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5320
5321     if (self.op.master_candidate is not None or
5322         self.op.drained is not None or
5323         self.op.offline is not None):
5324       # we can't change the master's node flags
5325       if self.op.node_name == self.cfg.GetMasterNode():
5326         raise errors.OpPrereqError("The master role can be changed"
5327                                    " only via master-failover",
5328                                    errors.ECODE_INVAL)
5329
5330     if self.op.master_candidate and not node.master_capable:
5331       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5332                                  " it a master candidate" % node.name,
5333                                  errors.ECODE_STATE)
5334
5335     if self.op.vm_capable == False:
5336       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5337       if ipri or isec:
5338         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5339                                    " the vm_capable flag" % node.name,
5340                                    errors.ECODE_STATE)
5341
5342     if node.master_candidate and self.might_demote and not self.lock_all:
5343       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5344       # check if after removing the current node, we're missing master
5345       # candidates
5346       (mc_remaining, mc_should, _) = \
5347           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5348       if mc_remaining < mc_should:
5349         raise errors.OpPrereqError("Not enough master candidates, please"
5350                                    " pass auto promote option to allow"
5351                                    " promotion", errors.ECODE_STATE)
5352
5353     self.old_flags = old_flags = (node.master_candidate,
5354                                   node.drained, node.offline)
5355     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5356     self.old_role = old_role = self._F2R[old_flags]
5357
5358     # Check for ineffective changes
5359     for attr in self._FLAGS:
5360       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5361         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5362         setattr(self.op, attr, None)
5363
5364     # Past this point, any flag change to False means a transition
5365     # away from the respective state, as only real changes are kept
5366
5367     # TODO: We might query the real power state if it supports OOB
5368     if _SupportsOob(self.cfg, node):
5369       if self.op.offline is False and not (node.powered or
5370                                            self.op.powered == True):
5371         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5372                                     " offline status can be reset") %
5373                                    self.op.node_name)
5374     elif self.op.powered is not None:
5375       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5376                                   " as it does not support out-of-band"
5377                                   " handling") % self.op.node_name)
5378
5379     # If we're being deofflined/drained, we'll MC ourself if needed
5380     if (self.op.drained == False or self.op.offline == False or
5381         (self.op.master_capable and not node.master_capable)):
5382       if _DecideSelfPromotion(self):
5383         self.op.master_candidate = True
5384         self.LogInfo("Auto-promoting node to master candidate")
5385
5386     # If we're no longer master capable, we'll demote ourselves from MC
5387     if self.op.master_capable == False and node.master_candidate:
5388       self.LogInfo("Demoting from master candidate")
5389       self.op.master_candidate = False
5390
5391     # Compute new role
5392     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5393     if self.op.master_candidate:
5394       new_role = self._ROLE_CANDIDATE
5395     elif self.op.drained:
5396       new_role = self._ROLE_DRAINED
5397     elif self.op.offline:
5398       new_role = self._ROLE_OFFLINE
5399     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5400       # False is still in new flags, which means we're un-setting (the
5401       # only) True flag
5402       new_role = self._ROLE_REGULAR
5403     else: # no new flags, nothing, keep old role
5404       new_role = old_role
5405
5406     self.new_role = new_role
5407
5408     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5409       # Trying to transition out of offline status
5410       # TODO: Use standard RPC runner, but make sure it works when the node is
5411       # still marked offline
5412       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5413       if result.fail_msg:
5414         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5415                                    " to report its version: %s" %
5416                                    (node.name, result.fail_msg),
5417                                    errors.ECODE_STATE)
5418       else:
5419         self.LogWarning("Transitioning node from offline to online state"
5420                         " without using re-add. Please make sure the node"
5421                         " is healthy!")
5422
5423     if self.op.secondary_ip:
5424       # Ok even without locking, because this can't be changed by any LU
5425       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5426       master_singlehomed = master.secondary_ip == master.primary_ip
5427       if master_singlehomed and self.op.secondary_ip:
5428         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5429                                    " homed cluster", errors.ECODE_INVAL)
5430
5431       if node.offline:
5432         if self.affected_instances:
5433           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5434                                      " node has instances (%s) configured"
5435                                      " to use it" % self.affected_instances)
5436       else:
5437         # On online nodes, check that no instances are running, and that
5438         # the node has the new ip and we can reach it.
5439         for instance in self.affected_instances:
5440           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5441
5442         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5443         if master.name != node.name:
5444           # check reachability from master secondary ip to new secondary ip
5445           if not netutils.TcpPing(self.op.secondary_ip,
5446                                   constants.DEFAULT_NODED_PORT,
5447                                   source=master.secondary_ip):
5448             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5449                                        " based ping to node daemon port",
5450                                        errors.ECODE_ENVIRON)
5451
5452     if self.op.ndparams:
5453       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5454       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5455       self.new_ndparams = new_ndparams
5456
5457   def Exec(self, feedback_fn):
5458     """Modifies a node.
5459
5460     """
5461     node = self.node
5462     old_role = self.old_role
5463     new_role = self.new_role
5464
5465     result = []
5466
5467     if self.op.ndparams:
5468       node.ndparams = self.new_ndparams
5469
5470     if self.op.powered is not None:
5471       node.powered = self.op.powered
5472
5473     for attr in ["master_capable", "vm_capable"]:
5474       val = getattr(self.op, attr)
5475       if val is not None:
5476         setattr(node, attr, val)
5477         result.append((attr, str(val)))
5478
5479     if new_role != old_role:
5480       # Tell the node to demote itself, if no longer MC and not offline
5481       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5482         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5483         if msg:
5484           self.LogWarning("Node failed to demote itself: %s", msg)
5485
5486       new_flags = self._R2F[new_role]
5487       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5488         if of != nf:
5489           result.append((desc, str(nf)))
5490       (node.master_candidate, node.drained, node.offline) = new_flags
5491
5492       # we locked all nodes, we adjust the CP before updating this node
5493       if self.lock_all:
5494         _AdjustCandidatePool(self, [node.name])
5495
5496     if self.op.secondary_ip:
5497       node.secondary_ip = self.op.secondary_ip
5498       result.append(("secondary_ip", self.op.secondary_ip))
5499
5500     # this will trigger configuration file update, if needed
5501     self.cfg.Update(node, feedback_fn)
5502
5503     # this will trigger job queue propagation or cleanup if the mc
5504     # flag changed
5505     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5506       self.context.ReaddNode(node)
5507
5508     return result
5509
5510
5511 class LUNodePowercycle(NoHooksLU):
5512   """Powercycles a node.
5513
5514   """
5515   REQ_BGL = False
5516
5517   def CheckArguments(self):
5518     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5519     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5520       raise errors.OpPrereqError("The node is the master and the force"
5521                                  " parameter was not set",
5522                                  errors.ECODE_INVAL)
5523
5524   def ExpandNames(self):
5525     """Locking for PowercycleNode.
5526
5527     This is a last-resort option and shouldn't block on other
5528     jobs. Therefore, we grab no locks.
5529
5530     """
5531     self.needed_locks = {}
5532
5533   def Exec(self, feedback_fn):
5534     """Reboots a node.
5535
5536     """
5537     result = self.rpc.call_node_powercycle(self.op.node_name,
5538                                            self.cfg.GetHypervisorType())
5539     result.Raise("Failed to schedule the reboot")
5540     return result.payload
5541
5542
5543 class LUClusterQuery(NoHooksLU):
5544   """Query cluster configuration.
5545
5546   """
5547   REQ_BGL = False
5548
5549   def ExpandNames(self):
5550     self.needed_locks = {}
5551
5552   def Exec(self, feedback_fn):
5553     """Return cluster config.
5554
5555     """
5556     cluster = self.cfg.GetClusterInfo()
5557     os_hvp = {}
5558
5559     # Filter just for enabled hypervisors
5560     for os_name, hv_dict in cluster.os_hvp.items():
5561       os_hvp[os_name] = {}
5562       for hv_name, hv_params in hv_dict.items():
5563         if hv_name in cluster.enabled_hypervisors:
5564           os_hvp[os_name][hv_name] = hv_params
5565
5566     # Convert ip_family to ip_version
5567     primary_ip_version = constants.IP4_VERSION
5568     if cluster.primary_ip_family == netutils.IP6Address.family:
5569       primary_ip_version = constants.IP6_VERSION
5570
5571     result = {
5572       "software_version": constants.RELEASE_VERSION,
5573       "protocol_version": constants.PROTOCOL_VERSION,
5574       "config_version": constants.CONFIG_VERSION,
5575       "os_api_version": max(constants.OS_API_VERSIONS),
5576       "export_version": constants.EXPORT_VERSION,
5577       "architecture": (platform.architecture()[0], platform.machine()),
5578       "name": cluster.cluster_name,
5579       "master": cluster.master_node,
5580       "default_hypervisor": cluster.enabled_hypervisors[0],
5581       "enabled_hypervisors": cluster.enabled_hypervisors,
5582       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5583                         for hypervisor_name in cluster.enabled_hypervisors]),
5584       "os_hvp": os_hvp,
5585       "beparams": cluster.beparams,
5586       "osparams": cluster.osparams,
5587       "nicparams": cluster.nicparams,
5588       "ndparams": cluster.ndparams,
5589       "candidate_pool_size": cluster.candidate_pool_size,
5590       "master_netdev": cluster.master_netdev,
5591       "master_netmask": cluster.master_netmask,
5592       "volume_group_name": cluster.volume_group_name,
5593       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5594       "file_storage_dir": cluster.file_storage_dir,
5595       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5596       "maintain_node_health": cluster.maintain_node_health,
5597       "ctime": cluster.ctime,
5598       "mtime": cluster.mtime,
5599       "uuid": cluster.uuid,
5600       "tags": list(cluster.GetTags()),
5601       "uid_pool": cluster.uid_pool,
5602       "default_iallocator": cluster.default_iallocator,
5603       "reserved_lvs": cluster.reserved_lvs,
5604       "primary_ip_version": primary_ip_version,
5605       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5606       "hidden_os": cluster.hidden_os,
5607       "blacklisted_os": cluster.blacklisted_os,
5608       }
5609
5610     return result
5611
5612
5613 class LUClusterConfigQuery(NoHooksLU):
5614   """Return configuration values.
5615
5616   """
5617   REQ_BGL = False
5618   _FIELDS_DYNAMIC = utils.FieldSet()
5619   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5620                                   "watcher_pause", "volume_group_name")
5621
5622   def CheckArguments(self):
5623     _CheckOutputFields(static=self._FIELDS_STATIC,
5624                        dynamic=self._FIELDS_DYNAMIC,
5625                        selected=self.op.output_fields)
5626
5627   def ExpandNames(self):
5628     self.needed_locks = {}
5629
5630   def Exec(self, feedback_fn):
5631     """Dump a representation of the cluster config to the standard output.
5632
5633     """
5634     values = []
5635     for field in self.op.output_fields:
5636       if field == "cluster_name":
5637         entry = self.cfg.GetClusterName()
5638       elif field == "master_node":
5639         entry = self.cfg.GetMasterNode()
5640       elif field == "drain_flag":
5641         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5642       elif field == "watcher_pause":
5643         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5644       elif field == "volume_group_name":
5645         entry = self.cfg.GetVGName()
5646       else:
5647         raise errors.ParameterError(field)
5648       values.append(entry)
5649     return values
5650
5651
5652 class LUInstanceActivateDisks(NoHooksLU):
5653   """Bring up an instance's disks.
5654
5655   """
5656   REQ_BGL = False
5657
5658   def ExpandNames(self):
5659     self._ExpandAndLockInstance()
5660     self.needed_locks[locking.LEVEL_NODE] = []
5661     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5662
5663   def DeclareLocks(self, level):
5664     if level == locking.LEVEL_NODE:
5665       self._LockInstancesNodes()
5666
5667   def CheckPrereq(self):
5668     """Check prerequisites.
5669
5670     This checks that the instance is in the cluster.
5671
5672     """
5673     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5674     assert self.instance is not None, \
5675       "Cannot retrieve locked instance %s" % self.op.instance_name
5676     _CheckNodeOnline(self, self.instance.primary_node)
5677
5678   def Exec(self, feedback_fn):
5679     """Activate the disks.
5680
5681     """
5682     disks_ok, disks_info = \
5683               _AssembleInstanceDisks(self, self.instance,
5684                                      ignore_size=self.op.ignore_size)
5685     if not disks_ok:
5686       raise errors.OpExecError("Cannot activate block devices")
5687
5688     return disks_info
5689
5690
5691 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5692                            ignore_size=False):
5693   """Prepare the block devices for an instance.
5694
5695   This sets up the block devices on all nodes.
5696
5697   @type lu: L{LogicalUnit}
5698   @param lu: the logical unit on whose behalf we execute
5699   @type instance: L{objects.Instance}
5700   @param instance: the instance for whose disks we assemble
5701   @type disks: list of L{objects.Disk} or None
5702   @param disks: which disks to assemble (or all, if None)
5703   @type ignore_secondaries: boolean
5704   @param ignore_secondaries: if true, errors on secondary nodes
5705       won't result in an error return from the function
5706   @type ignore_size: boolean
5707   @param ignore_size: if true, the current known size of the disk
5708       will not be used during the disk activation, useful for cases
5709       when the size is wrong
5710   @return: False if the operation failed, otherwise a list of
5711       (host, instance_visible_name, node_visible_name)
5712       with the mapping from node devices to instance devices
5713
5714   """
5715   device_info = []
5716   disks_ok = True
5717   iname = instance.name
5718   disks = _ExpandCheckDisks(instance, disks)
5719
5720   # With the two passes mechanism we try to reduce the window of
5721   # opportunity for the race condition of switching DRBD to primary
5722   # before handshaking occured, but we do not eliminate it
5723
5724   # The proper fix would be to wait (with some limits) until the
5725   # connection has been made and drbd transitions from WFConnection
5726   # into any other network-connected state (Connected, SyncTarget,
5727   # SyncSource, etc.)
5728
5729   # 1st pass, assemble on all nodes in secondary mode
5730   for idx, inst_disk in enumerate(disks):
5731     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5732       if ignore_size:
5733         node_disk = node_disk.Copy()
5734         node_disk.UnsetSize()
5735       lu.cfg.SetDiskID(node_disk, node)
5736       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5737       msg = result.fail_msg
5738       if msg:
5739         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5740                            " (is_primary=False, pass=1): %s",
5741                            inst_disk.iv_name, node, msg)
5742         if not ignore_secondaries:
5743           disks_ok = False
5744
5745   # FIXME: race condition on drbd migration to primary
5746
5747   # 2nd pass, do only the primary node
5748   for idx, inst_disk in enumerate(disks):
5749     dev_path = None
5750
5751     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5752       if node != instance.primary_node:
5753         continue
5754       if ignore_size:
5755         node_disk = node_disk.Copy()
5756         node_disk.UnsetSize()
5757       lu.cfg.SetDiskID(node_disk, node)
5758       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5759       msg = result.fail_msg
5760       if msg:
5761         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5762                            " (is_primary=True, pass=2): %s",
5763                            inst_disk.iv_name, node, msg)
5764         disks_ok = False
5765       else:
5766         dev_path = result.payload
5767
5768     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5769
5770   # leave the disks configured for the primary node
5771   # this is a workaround that would be fixed better by
5772   # improving the logical/physical id handling
5773   for disk in disks:
5774     lu.cfg.SetDiskID(disk, instance.primary_node)
5775
5776   return disks_ok, device_info
5777
5778
5779 def _StartInstanceDisks(lu, instance, force):
5780   """Start the disks of an instance.
5781
5782   """
5783   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5784                                            ignore_secondaries=force)
5785   if not disks_ok:
5786     _ShutdownInstanceDisks(lu, instance)
5787     if force is not None and not force:
5788       lu.proc.LogWarning("", hint="If the message above refers to a"
5789                          " secondary node,"
5790                          " you can retry the operation using '--force'.")
5791     raise errors.OpExecError("Disk consistency error")
5792
5793
5794 class LUInstanceDeactivateDisks(NoHooksLU):
5795   """Shutdown an instance's disks.
5796
5797   """
5798   REQ_BGL = False
5799
5800   def ExpandNames(self):
5801     self._ExpandAndLockInstance()
5802     self.needed_locks[locking.LEVEL_NODE] = []
5803     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5804
5805   def DeclareLocks(self, level):
5806     if level == locking.LEVEL_NODE:
5807       self._LockInstancesNodes()
5808
5809   def CheckPrereq(self):
5810     """Check prerequisites.
5811
5812     This checks that the instance is in the cluster.
5813
5814     """
5815     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5816     assert self.instance is not None, \
5817       "Cannot retrieve locked instance %s" % self.op.instance_name
5818
5819   def Exec(self, feedback_fn):
5820     """Deactivate the disks
5821
5822     """
5823     instance = self.instance
5824     if self.op.force:
5825       _ShutdownInstanceDisks(self, instance)
5826     else:
5827       _SafeShutdownInstanceDisks(self, instance)
5828
5829
5830 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5831   """Shutdown block devices of an instance.
5832
5833   This function checks if an instance is running, before calling
5834   _ShutdownInstanceDisks.
5835
5836   """
5837   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5838   _ShutdownInstanceDisks(lu, instance, disks=disks)
5839
5840
5841 def _ExpandCheckDisks(instance, disks):
5842   """Return the instance disks selected by the disks list
5843
5844   @type disks: list of L{objects.Disk} or None
5845   @param disks: selected disks
5846   @rtype: list of L{objects.Disk}
5847   @return: selected instance disks to act on
5848
5849   """
5850   if disks is None:
5851     return instance.disks
5852   else:
5853     if not set(disks).issubset(instance.disks):
5854       raise errors.ProgrammerError("Can only act on disks belonging to the"
5855                                    " target instance")
5856     return disks
5857
5858
5859 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5860   """Shutdown block devices of an instance.
5861
5862   This does the shutdown on all nodes of the instance.
5863
5864   If the ignore_primary is false, errors on the primary node are
5865   ignored.
5866
5867   """
5868   all_result = True
5869   disks = _ExpandCheckDisks(instance, disks)
5870
5871   for disk in disks:
5872     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5873       lu.cfg.SetDiskID(top_disk, node)
5874       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5875       msg = result.fail_msg
5876       if msg:
5877         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5878                       disk.iv_name, node, msg)
5879         if ((node == instance.primary_node and not ignore_primary) or
5880             (node != instance.primary_node and not result.offline)):
5881           all_result = False
5882   return all_result
5883
5884
5885 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5886   """Checks if a node has enough free memory.
5887
5888   This function check if a given node has the needed amount of free
5889   memory. In case the node has less memory or we cannot get the
5890   information from the node, this function raise an OpPrereqError
5891   exception.
5892
5893   @type lu: C{LogicalUnit}
5894   @param lu: a logical unit from which we get configuration data
5895   @type node: C{str}
5896   @param node: the node to check
5897   @type reason: C{str}
5898   @param reason: string to use in the error message
5899   @type requested: C{int}
5900   @param requested: the amount of memory in MiB to check for
5901   @type hypervisor_name: C{str}
5902   @param hypervisor_name: the hypervisor to ask for memory stats
5903   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5904       we cannot check the node
5905
5906   """
5907   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5908   nodeinfo[node].Raise("Can't get data from node %s" % node,
5909                        prereq=True, ecode=errors.ECODE_ENVIRON)
5910   free_mem = nodeinfo[node].payload.get("memory_free", None)
5911   if not isinstance(free_mem, int):
5912     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5913                                " was '%s'" % (node, free_mem),
5914                                errors.ECODE_ENVIRON)
5915   if requested > free_mem:
5916     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5917                                " needed %s MiB, available %s MiB" %
5918                                (node, reason, requested, free_mem),
5919                                errors.ECODE_NORES)
5920
5921
5922 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5923   """Checks if nodes have enough free disk space in the all VGs.
5924
5925   This function check if all given nodes have the needed amount of
5926   free disk. In case any node has less disk or we cannot get the
5927   information from the node, this function raise an OpPrereqError
5928   exception.
5929
5930   @type lu: C{LogicalUnit}
5931   @param lu: a logical unit from which we get configuration data
5932   @type nodenames: C{list}
5933   @param nodenames: the list of node names to check
5934   @type req_sizes: C{dict}
5935   @param req_sizes: the hash of vg and corresponding amount of disk in
5936       MiB to check for
5937   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5938       or we cannot check the node
5939
5940   """
5941   for vg, req_size in req_sizes.items():
5942     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5943
5944
5945 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5946   """Checks if nodes have enough free disk space in the specified VG.
5947
5948   This function check if all given nodes have the needed amount of
5949   free disk. In case any node has less disk or we cannot get the
5950   information from the node, this function raise an OpPrereqError
5951   exception.
5952
5953   @type lu: C{LogicalUnit}
5954   @param lu: a logical unit from which we get configuration data
5955   @type nodenames: C{list}
5956   @param nodenames: the list of node names to check
5957   @type vg: C{str}
5958   @param vg: the volume group to check
5959   @type requested: C{int}
5960   @param requested: the amount of disk in MiB to check for
5961   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5962       or we cannot check the node
5963
5964   """
5965   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5966   for node in nodenames:
5967     info = nodeinfo[node]
5968     info.Raise("Cannot get current information from node %s" % node,
5969                prereq=True, ecode=errors.ECODE_ENVIRON)
5970     vg_free = info.payload.get("vg_free", None)
5971     if not isinstance(vg_free, int):
5972       raise errors.OpPrereqError("Can't compute free disk space on node"
5973                                  " %s for vg %s, result was '%s'" %
5974                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5975     if requested > vg_free:
5976       raise errors.OpPrereqError("Not enough disk space on target node %s"
5977                                  " vg %s: required %d MiB, available %d MiB" %
5978                                  (node, vg, requested, vg_free),
5979                                  errors.ECODE_NORES)
5980
5981
5982 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5983   """Checks if nodes have enough physical CPUs
5984
5985   This function checks if all given nodes have the needed number of
5986   physical CPUs. In case any node has less CPUs or we cannot get the
5987   information from the node, this function raises an OpPrereqError
5988   exception.
5989
5990   @type lu: C{LogicalUnit}
5991   @param lu: a logical unit from which we get configuration data
5992   @type nodenames: C{list}
5993   @param nodenames: the list of node names to check
5994   @type requested: C{int}
5995   @param requested: the minimum acceptable number of physical CPUs
5996   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5997       or we cannot check the node
5998
5999   """
6000   nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6001   for node in nodenames:
6002     info = nodeinfo[node]
6003     info.Raise("Cannot get current information from node %s" % node,
6004                prereq=True, ecode=errors.ECODE_ENVIRON)
6005     num_cpus = info.payload.get("cpu_total", None)
6006     if not isinstance(num_cpus, int):
6007       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6008                                  " on node %s, result was '%s'" %
6009                                  (node, num_cpus), errors.ECODE_ENVIRON)
6010     if requested > num_cpus:
6011       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6012                                  "required" % (node, num_cpus, requested),
6013                                  errors.ECODE_NORES)
6014
6015
6016 class LUInstanceStartup(LogicalUnit):
6017   """Starts an instance.
6018
6019   """
6020   HPATH = "instance-start"
6021   HTYPE = constants.HTYPE_INSTANCE
6022   REQ_BGL = False
6023
6024   def CheckArguments(self):
6025     # extra beparams
6026     if self.op.beparams:
6027       # fill the beparams dict
6028       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6029
6030   def ExpandNames(self):
6031     self._ExpandAndLockInstance()
6032
6033   def BuildHooksEnv(self):
6034     """Build hooks env.
6035
6036     This runs on master, primary and secondary nodes of the instance.
6037
6038     """
6039     env = {
6040       "FORCE": self.op.force,
6041       }
6042
6043     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6044
6045     return env
6046
6047   def BuildHooksNodes(self):
6048     """Build hooks nodes.
6049
6050     """
6051     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6052     return (nl, nl)
6053
6054   def CheckPrereq(self):
6055     """Check prerequisites.
6056
6057     This checks that the instance is in the cluster.
6058
6059     """
6060     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6061     assert self.instance is not None, \
6062       "Cannot retrieve locked instance %s" % self.op.instance_name
6063
6064     # extra hvparams
6065     if self.op.hvparams:
6066       # check hypervisor parameter syntax (locally)
6067       cluster = self.cfg.GetClusterInfo()
6068       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6069       filled_hvp = cluster.FillHV(instance)
6070       filled_hvp.update(self.op.hvparams)
6071       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6072       hv_type.CheckParameterSyntax(filled_hvp)
6073       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6074
6075     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6076
6077     if self.primary_offline and self.op.ignore_offline_nodes:
6078       self.proc.LogWarning("Ignoring offline primary node")
6079
6080       if self.op.hvparams or self.op.beparams:
6081         self.proc.LogWarning("Overridden parameters are ignored")
6082     else:
6083       _CheckNodeOnline(self, instance.primary_node)
6084
6085       bep = self.cfg.GetClusterInfo().FillBE(instance)
6086
6087       # check bridges existence
6088       _CheckInstanceBridgesExist(self, instance)
6089
6090       remote_info = self.rpc.call_instance_info(instance.primary_node,
6091                                                 instance.name,
6092                                                 instance.hypervisor)
6093       remote_info.Raise("Error checking node %s" % instance.primary_node,
6094                         prereq=True, ecode=errors.ECODE_ENVIRON)
6095       if not remote_info.payload: # not running already
6096         _CheckNodeFreeMemory(self, instance.primary_node,
6097                              "starting instance %s" % instance.name,
6098                              bep[constants.BE_MEMORY], instance.hypervisor)
6099
6100   def Exec(self, feedback_fn):
6101     """Start the instance.
6102
6103     """
6104     instance = self.instance
6105     force = self.op.force
6106
6107     if not self.op.no_remember:
6108       self.cfg.MarkInstanceUp(instance.name)
6109
6110     if self.primary_offline:
6111       assert self.op.ignore_offline_nodes
6112       self.proc.LogInfo("Primary node offline, marked instance as started")
6113     else:
6114       node_current = instance.primary_node
6115
6116       _StartInstanceDisks(self, instance, force)
6117
6118       result = \
6119         self.rpc.call_instance_start(node_current,
6120                                      (instance, self.op.hvparams,
6121                                       self.op.beparams),
6122                                      self.op.startup_paused)
6123       msg = result.fail_msg
6124       if msg:
6125         _ShutdownInstanceDisks(self, instance)
6126         raise errors.OpExecError("Could not start instance: %s" % msg)
6127
6128
6129 class LUInstanceReboot(LogicalUnit):
6130   """Reboot an instance.
6131
6132   """
6133   HPATH = "instance-reboot"
6134   HTYPE = constants.HTYPE_INSTANCE
6135   REQ_BGL = False
6136
6137   def ExpandNames(self):
6138     self._ExpandAndLockInstance()
6139
6140   def BuildHooksEnv(self):
6141     """Build hooks env.
6142
6143     This runs on master, primary and secondary nodes of the instance.
6144
6145     """
6146     env = {
6147       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6148       "REBOOT_TYPE": self.op.reboot_type,
6149       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6150       }
6151
6152     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6153
6154     return env
6155
6156   def BuildHooksNodes(self):
6157     """Build hooks nodes.
6158
6159     """
6160     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6161     return (nl, nl)
6162
6163   def CheckPrereq(self):
6164     """Check prerequisites.
6165
6166     This checks that the instance is in the cluster.
6167
6168     """
6169     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6170     assert self.instance is not None, \
6171       "Cannot retrieve locked instance %s" % self.op.instance_name
6172
6173     _CheckNodeOnline(self, instance.primary_node)
6174
6175     # check bridges existence
6176     _CheckInstanceBridgesExist(self, instance)
6177
6178   def Exec(self, feedback_fn):
6179     """Reboot the instance.
6180
6181     """
6182     instance = self.instance
6183     ignore_secondaries = self.op.ignore_secondaries
6184     reboot_type = self.op.reboot_type
6185
6186     remote_info = self.rpc.call_instance_info(instance.primary_node,
6187                                               instance.name,
6188                                               instance.hypervisor)
6189     remote_info.Raise("Error checking node %s" % instance.primary_node)
6190     instance_running = bool(remote_info.payload)
6191
6192     node_current = instance.primary_node
6193
6194     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6195                                             constants.INSTANCE_REBOOT_HARD]:
6196       for disk in instance.disks:
6197         self.cfg.SetDiskID(disk, node_current)
6198       result = self.rpc.call_instance_reboot(node_current, instance,
6199                                              reboot_type,
6200                                              self.op.shutdown_timeout)
6201       result.Raise("Could not reboot instance")
6202     else:
6203       if instance_running:
6204         result = self.rpc.call_instance_shutdown(node_current, instance,
6205                                                  self.op.shutdown_timeout)
6206         result.Raise("Could not shutdown instance for full reboot")
6207         _ShutdownInstanceDisks(self, instance)
6208       else:
6209         self.LogInfo("Instance %s was already stopped, starting now",
6210                      instance.name)
6211       _StartInstanceDisks(self, instance, ignore_secondaries)
6212       result = self.rpc.call_instance_start(node_current,
6213                                             (instance, None, None), False)
6214       msg = result.fail_msg
6215       if msg:
6216         _ShutdownInstanceDisks(self, instance)
6217         raise errors.OpExecError("Could not start instance for"
6218                                  " full reboot: %s" % msg)
6219
6220     self.cfg.MarkInstanceUp(instance.name)
6221
6222
6223 class LUInstanceShutdown(LogicalUnit):
6224   """Shutdown an instance.
6225
6226   """
6227   HPATH = "instance-stop"
6228   HTYPE = constants.HTYPE_INSTANCE
6229   REQ_BGL = False
6230
6231   def ExpandNames(self):
6232     self._ExpandAndLockInstance()
6233
6234   def BuildHooksEnv(self):
6235     """Build hooks env.
6236
6237     This runs on master, primary and secondary nodes of the instance.
6238
6239     """
6240     env = _BuildInstanceHookEnvByObject(self, self.instance)
6241     env["TIMEOUT"] = self.op.timeout
6242     return env
6243
6244   def BuildHooksNodes(self):
6245     """Build hooks nodes.
6246
6247     """
6248     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6249     return (nl, nl)
6250
6251   def CheckPrereq(self):
6252     """Check prerequisites.
6253
6254     This checks that the instance is in the cluster.
6255
6256     """
6257     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6258     assert self.instance is not None, \
6259       "Cannot retrieve locked instance %s" % self.op.instance_name
6260
6261     self.primary_offline = \
6262       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6263
6264     if self.primary_offline and self.op.ignore_offline_nodes:
6265       self.proc.LogWarning("Ignoring offline primary node")
6266     else:
6267       _CheckNodeOnline(self, self.instance.primary_node)
6268
6269   def Exec(self, feedback_fn):
6270     """Shutdown the instance.
6271
6272     """
6273     instance = self.instance
6274     node_current = instance.primary_node
6275     timeout = self.op.timeout
6276
6277     if not self.op.no_remember:
6278       self.cfg.MarkInstanceDown(instance.name)
6279
6280     if self.primary_offline:
6281       assert self.op.ignore_offline_nodes
6282       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6283     else:
6284       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6285       msg = result.fail_msg
6286       if msg:
6287         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6288
6289       _ShutdownInstanceDisks(self, instance)
6290
6291
6292 class LUInstanceReinstall(LogicalUnit):
6293   """Reinstall an instance.
6294
6295   """
6296   HPATH = "instance-reinstall"
6297   HTYPE = constants.HTYPE_INSTANCE
6298   REQ_BGL = False
6299
6300   def ExpandNames(self):
6301     self._ExpandAndLockInstance()
6302
6303   def BuildHooksEnv(self):
6304     """Build hooks env.
6305
6306     This runs on master, primary and secondary nodes of the instance.
6307
6308     """
6309     return _BuildInstanceHookEnvByObject(self, self.instance)
6310
6311   def BuildHooksNodes(self):
6312     """Build hooks nodes.
6313
6314     """
6315     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6316     return (nl, nl)
6317
6318   def CheckPrereq(self):
6319     """Check prerequisites.
6320
6321     This checks that the instance is in the cluster and is not running.
6322
6323     """
6324     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6325     assert instance is not None, \
6326       "Cannot retrieve locked instance %s" % self.op.instance_name
6327     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6328                      " offline, cannot reinstall")
6329     for node in instance.secondary_nodes:
6330       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6331                        " cannot reinstall")
6332
6333     if instance.disk_template == constants.DT_DISKLESS:
6334       raise errors.OpPrereqError("Instance '%s' has no disks" %
6335                                  self.op.instance_name,
6336                                  errors.ECODE_INVAL)
6337     _CheckInstanceDown(self, instance, "cannot reinstall")
6338
6339     if self.op.os_type is not None:
6340       # OS verification
6341       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6342       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6343       instance_os = self.op.os_type
6344     else:
6345       instance_os = instance.os
6346
6347     nodelist = list(instance.all_nodes)
6348
6349     if self.op.osparams:
6350       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6351       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6352       self.os_inst = i_osdict # the new dict (without defaults)
6353     else:
6354       self.os_inst = None
6355
6356     self.instance = instance
6357
6358   def Exec(self, feedback_fn):
6359     """Reinstall the instance.
6360
6361     """
6362     inst = self.instance
6363
6364     if self.op.os_type is not None:
6365       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6366       inst.os = self.op.os_type
6367       # Write to configuration
6368       self.cfg.Update(inst, feedback_fn)
6369
6370     _StartInstanceDisks(self, inst, None)
6371     try:
6372       feedback_fn("Running the instance OS create scripts...")
6373       # FIXME: pass debug option from opcode to backend
6374       result = self.rpc.call_instance_os_add(inst.primary_node,
6375                                              (inst, self.os_inst), True,
6376                                              self.op.debug_level)
6377       result.Raise("Could not install OS for instance %s on node %s" %
6378                    (inst.name, inst.primary_node))
6379     finally:
6380       _ShutdownInstanceDisks(self, inst)
6381
6382
6383 class LUInstanceRecreateDisks(LogicalUnit):
6384   """Recreate an instance's missing disks.
6385
6386   """
6387   HPATH = "instance-recreate-disks"
6388   HTYPE = constants.HTYPE_INSTANCE
6389   REQ_BGL = False
6390
6391   def CheckArguments(self):
6392     # normalise the disk list
6393     self.op.disks = sorted(frozenset(self.op.disks))
6394
6395   def ExpandNames(self):
6396     self._ExpandAndLockInstance()
6397     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6398     if self.op.nodes:
6399       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6400       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6401     else:
6402       self.needed_locks[locking.LEVEL_NODE] = []
6403
6404   def DeclareLocks(self, level):
6405     if level == locking.LEVEL_NODE:
6406       # if we replace the nodes, we only need to lock the old primary,
6407       # otherwise we need to lock all nodes for disk re-creation
6408       primary_only = bool(self.op.nodes)
6409       self._LockInstancesNodes(primary_only=primary_only)
6410
6411   def BuildHooksEnv(self):
6412     """Build hooks env.
6413
6414     This runs on master, primary and secondary nodes of the instance.
6415
6416     """
6417     return _BuildInstanceHookEnvByObject(self, self.instance)
6418
6419   def BuildHooksNodes(self):
6420     """Build hooks nodes.
6421
6422     """
6423     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6424     return (nl, nl)
6425
6426   def CheckPrereq(self):
6427     """Check prerequisites.
6428
6429     This checks that the instance is in the cluster and is not running.
6430
6431     """
6432     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6433     assert instance is not None, \
6434       "Cannot retrieve locked instance %s" % self.op.instance_name
6435     if self.op.nodes:
6436       if len(self.op.nodes) != len(instance.all_nodes):
6437         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6438                                    " %d replacement nodes were specified" %
6439                                    (instance.name, len(instance.all_nodes),
6440                                     len(self.op.nodes)),
6441                                    errors.ECODE_INVAL)
6442       assert instance.disk_template != constants.DT_DRBD8 or \
6443           len(self.op.nodes) == 2
6444       assert instance.disk_template != constants.DT_PLAIN or \
6445           len(self.op.nodes) == 1
6446       primary_node = self.op.nodes[0]
6447     else:
6448       primary_node = instance.primary_node
6449     _CheckNodeOnline(self, primary_node)
6450
6451     if instance.disk_template == constants.DT_DISKLESS:
6452       raise errors.OpPrereqError("Instance '%s' has no disks" %
6453                                  self.op.instance_name, errors.ECODE_INVAL)
6454     # if we replace nodes *and* the old primary is offline, we don't
6455     # check
6456     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6457     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6458     if not (self.op.nodes and old_pnode.offline):
6459       _CheckInstanceDown(self, instance, "cannot recreate disks")
6460
6461     if not self.op.disks:
6462       self.op.disks = range(len(instance.disks))
6463     else:
6464       for idx in self.op.disks:
6465         if idx >= len(instance.disks):
6466           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6467                                      errors.ECODE_INVAL)
6468     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6469       raise errors.OpPrereqError("Can't recreate disks partially and"
6470                                  " change the nodes at the same time",
6471                                  errors.ECODE_INVAL)
6472     self.instance = instance
6473
6474   def Exec(self, feedback_fn):
6475     """Recreate the disks.
6476
6477     """
6478     instance = self.instance
6479
6480     to_skip = []
6481     mods = [] # keeps track of needed logical_id changes
6482
6483     for idx, disk in enumerate(instance.disks):
6484       if idx not in self.op.disks: # disk idx has not been passed in
6485         to_skip.append(idx)
6486         continue
6487       # update secondaries for disks, if needed
6488       if self.op.nodes:
6489         if disk.dev_type == constants.LD_DRBD8:
6490           # need to update the nodes and minors
6491           assert len(self.op.nodes) == 2
6492           assert len(disk.logical_id) == 6 # otherwise disk internals
6493                                            # have changed
6494           (_, _, old_port, _, _, old_secret) = disk.logical_id
6495           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6496           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6497                     new_minors[0], new_minors[1], old_secret)
6498           assert len(disk.logical_id) == len(new_id)
6499           mods.append((idx, new_id))
6500
6501     # now that we have passed all asserts above, we can apply the mods
6502     # in a single run (to avoid partial changes)
6503     for idx, new_id in mods:
6504       instance.disks[idx].logical_id = new_id
6505
6506     # change primary node, if needed
6507     if self.op.nodes:
6508       instance.primary_node = self.op.nodes[0]
6509       self.LogWarning("Changing the instance's nodes, you will have to"
6510                       " remove any disks left on the older nodes manually")
6511
6512     if self.op.nodes:
6513       self.cfg.Update(instance, feedback_fn)
6514
6515     _CreateDisks(self, instance, to_skip=to_skip)
6516
6517
6518 class LUInstanceRename(LogicalUnit):
6519   """Rename an instance.
6520
6521   """
6522   HPATH = "instance-rename"
6523   HTYPE = constants.HTYPE_INSTANCE
6524
6525   def CheckArguments(self):
6526     """Check arguments.
6527
6528     """
6529     if self.op.ip_check and not self.op.name_check:
6530       # TODO: make the ip check more flexible and not depend on the name check
6531       raise errors.OpPrereqError("IP address check requires a name check",
6532                                  errors.ECODE_INVAL)
6533
6534   def BuildHooksEnv(self):
6535     """Build hooks env.
6536
6537     This runs on master, primary and secondary nodes of the instance.
6538
6539     """
6540     env = _BuildInstanceHookEnvByObject(self, self.instance)
6541     env["INSTANCE_NEW_NAME"] = self.op.new_name
6542     return env
6543
6544   def BuildHooksNodes(self):
6545     """Build hooks nodes.
6546
6547     """
6548     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6549     return (nl, nl)
6550
6551   def CheckPrereq(self):
6552     """Check prerequisites.
6553
6554     This checks that the instance is in the cluster and is not running.
6555
6556     """
6557     self.op.instance_name = _ExpandInstanceName(self.cfg,
6558                                                 self.op.instance_name)
6559     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6560     assert instance is not None
6561     _CheckNodeOnline(self, instance.primary_node)
6562     _CheckInstanceDown(self, instance, "cannot rename")
6563     self.instance = instance
6564
6565     new_name = self.op.new_name
6566     if self.op.name_check:
6567       hostname = netutils.GetHostname(name=new_name)
6568       if hostname != new_name:
6569         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6570                      hostname.name)
6571       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6572         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6573                                     " same as given hostname '%s'") %
6574                                     (hostname.name, self.op.new_name),
6575                                     errors.ECODE_INVAL)
6576       new_name = self.op.new_name = hostname.name
6577       if (self.op.ip_check and
6578           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6579         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6580                                    (hostname.ip, new_name),
6581                                    errors.ECODE_NOTUNIQUE)
6582
6583     instance_list = self.cfg.GetInstanceList()
6584     if new_name in instance_list and new_name != instance.name:
6585       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6586                                  new_name, errors.ECODE_EXISTS)
6587
6588   def Exec(self, feedback_fn):
6589     """Rename the instance.
6590
6591     """
6592     inst = self.instance
6593     old_name = inst.name
6594
6595     rename_file_storage = False
6596     if (inst.disk_template in constants.DTS_FILEBASED and
6597         self.op.new_name != inst.name):
6598       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6599       rename_file_storage = True
6600
6601     self.cfg.RenameInstance(inst.name, self.op.new_name)
6602     # Change the instance lock. This is definitely safe while we hold the BGL.
6603     # Otherwise the new lock would have to be added in acquired mode.
6604     assert self.REQ_BGL
6605     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6606     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6607
6608     # re-read the instance from the configuration after rename
6609     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6610
6611     if rename_file_storage:
6612       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6613       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6614                                                      old_file_storage_dir,
6615                                                      new_file_storage_dir)
6616       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6617                    " (but the instance has been renamed in Ganeti)" %
6618                    (inst.primary_node, old_file_storage_dir,
6619                     new_file_storage_dir))
6620
6621     _StartInstanceDisks(self, inst, None)
6622     try:
6623       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6624                                                  old_name, self.op.debug_level)
6625       msg = result.fail_msg
6626       if msg:
6627         msg = ("Could not run OS rename script for instance %s on node %s"
6628                " (but the instance has been renamed in Ganeti): %s" %
6629                (inst.name, inst.primary_node, msg))
6630         self.proc.LogWarning(msg)
6631     finally:
6632       _ShutdownInstanceDisks(self, inst)
6633
6634     return inst.name
6635
6636
6637 class LUInstanceRemove(LogicalUnit):
6638   """Remove an instance.
6639
6640   """
6641   HPATH = "instance-remove"
6642   HTYPE = constants.HTYPE_INSTANCE
6643   REQ_BGL = False
6644
6645   def ExpandNames(self):
6646     self._ExpandAndLockInstance()
6647     self.needed_locks[locking.LEVEL_NODE] = []
6648     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6649
6650   def DeclareLocks(self, level):
6651     if level == locking.LEVEL_NODE:
6652       self._LockInstancesNodes()
6653
6654   def BuildHooksEnv(self):
6655     """Build hooks env.
6656
6657     This runs on master, primary and secondary nodes of the instance.
6658
6659     """
6660     env = _BuildInstanceHookEnvByObject(self, self.instance)
6661     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6662     return env
6663
6664   def BuildHooksNodes(self):
6665     """Build hooks nodes.
6666
6667     """
6668     nl = [self.cfg.GetMasterNode()]
6669     nl_post = list(self.instance.all_nodes) + nl
6670     return (nl, nl_post)
6671
6672   def CheckPrereq(self):
6673     """Check prerequisites.
6674
6675     This checks that the instance is in the cluster.
6676
6677     """
6678     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6679     assert self.instance is not None, \
6680       "Cannot retrieve locked instance %s" % self.op.instance_name
6681
6682   def Exec(self, feedback_fn):
6683     """Remove the instance.
6684
6685     """
6686     instance = self.instance
6687     logging.info("Shutting down instance %s on node %s",
6688                  instance.name, instance.primary_node)
6689
6690     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6691                                              self.op.shutdown_timeout)
6692     msg = result.fail_msg
6693     if msg:
6694       if self.op.ignore_failures:
6695         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6696       else:
6697         raise errors.OpExecError("Could not shutdown instance %s on"
6698                                  " node %s: %s" %
6699                                  (instance.name, instance.primary_node, msg))
6700
6701     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6702
6703
6704 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6705   """Utility function to remove an instance.
6706
6707   """
6708   logging.info("Removing block devices for instance %s", instance.name)
6709
6710   if not _RemoveDisks(lu, instance):
6711     if not ignore_failures:
6712       raise errors.OpExecError("Can't remove instance's disks")
6713     feedback_fn("Warning: can't remove instance's disks")
6714
6715   logging.info("Removing instance %s out of cluster config", instance.name)
6716
6717   lu.cfg.RemoveInstance(instance.name)
6718
6719   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6720     "Instance lock removal conflict"
6721
6722   # Remove lock for the instance
6723   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6724
6725
6726 class LUInstanceQuery(NoHooksLU):
6727   """Logical unit for querying instances.
6728
6729   """
6730   # pylint: disable=W0142
6731   REQ_BGL = False
6732
6733   def CheckArguments(self):
6734     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6735                              self.op.output_fields, self.op.use_locking)
6736
6737   def ExpandNames(self):
6738     self.iq.ExpandNames(self)
6739
6740   def DeclareLocks(self, level):
6741     self.iq.DeclareLocks(self, level)
6742
6743   def Exec(self, feedback_fn):
6744     return self.iq.OldStyleQuery(self)
6745
6746
6747 class LUInstanceFailover(LogicalUnit):
6748   """Failover an instance.
6749
6750   """
6751   HPATH = "instance-failover"
6752   HTYPE = constants.HTYPE_INSTANCE
6753   REQ_BGL = False
6754
6755   def CheckArguments(self):
6756     """Check the arguments.
6757
6758     """
6759     self.iallocator = getattr(self.op, "iallocator", None)
6760     self.target_node = getattr(self.op, "target_node", None)
6761
6762   def ExpandNames(self):
6763     self._ExpandAndLockInstance()
6764
6765     if self.op.target_node is not None:
6766       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6767
6768     self.needed_locks[locking.LEVEL_NODE] = []
6769     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6770
6771     ignore_consistency = self.op.ignore_consistency
6772     shutdown_timeout = self.op.shutdown_timeout
6773     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6774                                        cleanup=False,
6775                                        failover=True,
6776                                        ignore_consistency=ignore_consistency,
6777                                        shutdown_timeout=shutdown_timeout)
6778     self.tasklets = [self._migrater]
6779
6780   def DeclareLocks(self, level):
6781     if level == locking.LEVEL_NODE:
6782       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6783       if instance.disk_template in constants.DTS_EXT_MIRROR:
6784         if self.op.target_node is None:
6785           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6786         else:
6787           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6788                                                    self.op.target_node]
6789         del self.recalculate_locks[locking.LEVEL_NODE]
6790       else:
6791         self._LockInstancesNodes()
6792
6793   def BuildHooksEnv(self):
6794     """Build hooks env.
6795
6796     This runs on master, primary and secondary nodes of the instance.
6797
6798     """
6799     instance = self._migrater.instance
6800     source_node = instance.primary_node
6801     target_node = self.op.target_node
6802     env = {
6803       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6804       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6805       "OLD_PRIMARY": source_node,
6806       "NEW_PRIMARY": target_node,
6807       }
6808
6809     if instance.disk_template in constants.DTS_INT_MIRROR:
6810       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6811       env["NEW_SECONDARY"] = source_node
6812     else:
6813       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6814
6815     env.update(_BuildInstanceHookEnvByObject(self, instance))
6816
6817     return env
6818
6819   def BuildHooksNodes(self):
6820     """Build hooks nodes.
6821
6822     """
6823     instance = self._migrater.instance
6824     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6825     return (nl, nl + [instance.primary_node])
6826
6827
6828 class LUInstanceMigrate(LogicalUnit):
6829   """Migrate an instance.
6830
6831   This is migration without shutting down, compared to the failover,
6832   which is done with shutdown.
6833
6834   """
6835   HPATH = "instance-migrate"
6836   HTYPE = constants.HTYPE_INSTANCE
6837   REQ_BGL = False
6838
6839   def ExpandNames(self):
6840     self._ExpandAndLockInstance()
6841
6842     if self.op.target_node is not None:
6843       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6844
6845     self.needed_locks[locking.LEVEL_NODE] = []
6846     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6847
6848     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6849                                        cleanup=self.op.cleanup,
6850                                        failover=False,
6851                                        fallback=self.op.allow_failover)
6852     self.tasklets = [self._migrater]
6853
6854   def DeclareLocks(self, level):
6855     if level == locking.LEVEL_NODE:
6856       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6857       if instance.disk_template in constants.DTS_EXT_MIRROR:
6858         if self.op.target_node is None:
6859           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6860         else:
6861           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6862                                                    self.op.target_node]
6863         del self.recalculate_locks[locking.LEVEL_NODE]
6864       else:
6865         self._LockInstancesNodes()
6866
6867   def BuildHooksEnv(self):
6868     """Build hooks env.
6869
6870     This runs on master, primary and secondary nodes of the instance.
6871
6872     """
6873     instance = self._migrater.instance
6874     source_node = instance.primary_node
6875     target_node = self.op.target_node
6876     env = _BuildInstanceHookEnvByObject(self, instance)
6877     env.update({
6878       "MIGRATE_LIVE": self._migrater.live,
6879       "MIGRATE_CLEANUP": self.op.cleanup,
6880       "OLD_PRIMARY": source_node,
6881       "NEW_PRIMARY": target_node,
6882       })
6883
6884     if instance.disk_template in constants.DTS_INT_MIRROR:
6885       env["OLD_SECONDARY"] = target_node
6886       env["NEW_SECONDARY"] = source_node
6887     else:
6888       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6889
6890     return env
6891
6892   def BuildHooksNodes(self):
6893     """Build hooks nodes.
6894
6895     """
6896     instance = self._migrater.instance
6897     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6898     return (nl, nl + [instance.primary_node])
6899
6900
6901 class LUInstanceMove(LogicalUnit):
6902   """Move an instance by data-copying.
6903
6904   """
6905   HPATH = "instance-move"
6906   HTYPE = constants.HTYPE_INSTANCE
6907   REQ_BGL = False
6908
6909   def ExpandNames(self):
6910     self._ExpandAndLockInstance()
6911     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6912     self.op.target_node = target_node
6913     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6914     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6915
6916   def DeclareLocks(self, level):
6917     if level == locking.LEVEL_NODE:
6918       self._LockInstancesNodes(primary_only=True)
6919
6920   def BuildHooksEnv(self):
6921     """Build hooks env.
6922
6923     This runs on master, primary and secondary nodes of the instance.
6924
6925     """
6926     env = {
6927       "TARGET_NODE": self.op.target_node,
6928       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6929       }
6930     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6931     return env
6932
6933   def BuildHooksNodes(self):
6934     """Build hooks nodes.
6935
6936     """
6937     nl = [
6938       self.cfg.GetMasterNode(),
6939       self.instance.primary_node,
6940       self.op.target_node,
6941       ]
6942     return (nl, nl)
6943
6944   def CheckPrereq(self):
6945     """Check prerequisites.
6946
6947     This checks that the instance is in the cluster.
6948
6949     """
6950     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6951     assert self.instance is not None, \
6952       "Cannot retrieve locked instance %s" % self.op.instance_name
6953
6954     node = self.cfg.GetNodeInfo(self.op.target_node)
6955     assert node is not None, \
6956       "Cannot retrieve locked node %s" % self.op.target_node
6957
6958     self.target_node = target_node = node.name
6959
6960     if target_node == instance.primary_node:
6961       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6962                                  (instance.name, target_node),
6963                                  errors.ECODE_STATE)
6964
6965     bep = self.cfg.GetClusterInfo().FillBE(instance)
6966
6967     for idx, dsk in enumerate(instance.disks):
6968       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6969         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6970                                    " cannot copy" % idx, errors.ECODE_STATE)
6971
6972     _CheckNodeOnline(self, target_node)
6973     _CheckNodeNotDrained(self, target_node)
6974     _CheckNodeVmCapable(self, target_node)
6975
6976     if instance.admin_up:
6977       # check memory requirements on the secondary node
6978       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6979                            instance.name, bep[constants.BE_MEMORY],
6980                            instance.hypervisor)
6981     else:
6982       self.LogInfo("Not checking memory on the secondary node as"
6983                    " instance will not be started")
6984
6985     # check bridge existance
6986     _CheckInstanceBridgesExist(self, instance, node=target_node)
6987
6988   def Exec(self, feedback_fn):
6989     """Move an instance.
6990
6991     The move is done by shutting it down on its present node, copying
6992     the data over (slow) and starting it on the new node.
6993
6994     """
6995     instance = self.instance
6996
6997     source_node = instance.primary_node
6998     target_node = self.target_node
6999
7000     self.LogInfo("Shutting down instance %s on source node %s",
7001                  instance.name, source_node)
7002
7003     result = self.rpc.call_instance_shutdown(source_node, instance,
7004                                              self.op.shutdown_timeout)
7005     msg = result.fail_msg
7006     if msg:
7007       if self.op.ignore_consistency:
7008         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7009                              " Proceeding anyway. Please make sure node"
7010                              " %s is down. Error details: %s",
7011                              instance.name, source_node, source_node, msg)
7012       else:
7013         raise errors.OpExecError("Could not shutdown instance %s on"
7014                                  " node %s: %s" %
7015                                  (instance.name, source_node, msg))
7016
7017     # create the target disks
7018     try:
7019       _CreateDisks(self, instance, target_node=target_node)
7020     except errors.OpExecError:
7021       self.LogWarning("Device creation failed, reverting...")
7022       try:
7023         _RemoveDisks(self, instance, target_node=target_node)
7024       finally:
7025         self.cfg.ReleaseDRBDMinors(instance.name)
7026         raise
7027
7028     cluster_name = self.cfg.GetClusterInfo().cluster_name
7029
7030     errs = []
7031     # activate, get path, copy the data over
7032     for idx, disk in enumerate(instance.disks):
7033       self.LogInfo("Copying data for disk %d", idx)
7034       result = self.rpc.call_blockdev_assemble(target_node, disk,
7035                                                instance.name, True, idx)
7036       if result.fail_msg:
7037         self.LogWarning("Can't assemble newly created disk %d: %s",
7038                         idx, result.fail_msg)
7039         errs.append(result.fail_msg)
7040         break
7041       dev_path = result.payload
7042       result = self.rpc.call_blockdev_export(source_node, disk,
7043                                              target_node, dev_path,
7044                                              cluster_name)
7045       if result.fail_msg:
7046         self.LogWarning("Can't copy data over for disk %d: %s",
7047                         idx, result.fail_msg)
7048         errs.append(result.fail_msg)
7049         break
7050
7051     if errs:
7052       self.LogWarning("Some disks failed to copy, aborting")
7053       try:
7054         _RemoveDisks(self, instance, target_node=target_node)
7055       finally:
7056         self.cfg.ReleaseDRBDMinors(instance.name)
7057         raise errors.OpExecError("Errors during disk copy: %s" %
7058                                  (",".join(errs),))
7059
7060     instance.primary_node = target_node
7061     self.cfg.Update(instance, feedback_fn)
7062
7063     self.LogInfo("Removing the disks on the original node")
7064     _RemoveDisks(self, instance, target_node=source_node)
7065
7066     # Only start the instance if it's marked as up
7067     if instance.admin_up:
7068       self.LogInfo("Starting instance %s on node %s",
7069                    instance.name, target_node)
7070
7071       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7072                                            ignore_secondaries=True)
7073       if not disks_ok:
7074         _ShutdownInstanceDisks(self, instance)
7075         raise errors.OpExecError("Can't activate the instance's disks")
7076
7077       result = self.rpc.call_instance_start(target_node,
7078                                             (instance, None, None), False)
7079       msg = result.fail_msg
7080       if msg:
7081         _ShutdownInstanceDisks(self, instance)
7082         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7083                                  (instance.name, target_node, msg))
7084
7085
7086 class LUNodeMigrate(LogicalUnit):
7087   """Migrate all instances from a node.
7088
7089   """
7090   HPATH = "node-migrate"
7091   HTYPE = constants.HTYPE_NODE
7092   REQ_BGL = False
7093
7094   def CheckArguments(self):
7095     pass
7096
7097   def ExpandNames(self):
7098     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7099
7100     self.share_locks = _ShareAll()
7101     self.needed_locks = {
7102       locking.LEVEL_NODE: [self.op.node_name],
7103       }
7104
7105   def BuildHooksEnv(self):
7106     """Build hooks env.
7107
7108     This runs on the master, the primary and all the secondaries.
7109
7110     """
7111     return {
7112       "NODE_NAME": self.op.node_name,
7113       }
7114
7115   def BuildHooksNodes(self):
7116     """Build hooks nodes.
7117
7118     """
7119     nl = [self.cfg.GetMasterNode()]
7120     return (nl, nl)
7121
7122   def CheckPrereq(self):
7123     pass
7124
7125   def Exec(self, feedback_fn):
7126     # Prepare jobs for migration instances
7127     jobs = [
7128       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7129                                  mode=self.op.mode,
7130                                  live=self.op.live,
7131                                  iallocator=self.op.iallocator,
7132                                  target_node=self.op.target_node)]
7133       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7134       ]
7135
7136     # TODO: Run iallocator in this opcode and pass correct placement options to
7137     # OpInstanceMigrate. Since other jobs can modify the cluster between
7138     # running the iallocator and the actual migration, a good consistency model
7139     # will have to be found.
7140
7141     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7142             frozenset([self.op.node_name]))
7143
7144     return ResultWithJobs(jobs)
7145
7146
7147 class TLMigrateInstance(Tasklet):
7148   """Tasklet class for instance migration.
7149
7150   @type live: boolean
7151   @ivar live: whether the migration will be done live or non-live;
7152       this variable is initalized only after CheckPrereq has run
7153   @type cleanup: boolean
7154   @ivar cleanup: Wheater we cleanup from a failed migration
7155   @type iallocator: string
7156   @ivar iallocator: The iallocator used to determine target_node
7157   @type target_node: string
7158   @ivar target_node: If given, the target_node to reallocate the instance to
7159   @type failover: boolean
7160   @ivar failover: Whether operation results in failover or migration
7161   @type fallback: boolean
7162   @ivar fallback: Whether fallback to failover is allowed if migration not
7163                   possible
7164   @type ignore_consistency: boolean
7165   @ivar ignore_consistency: Wheter we should ignore consistency between source
7166                             and target node
7167   @type shutdown_timeout: int
7168   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7169
7170   """
7171
7172   # Constants
7173   _MIGRATION_POLL_INTERVAL = 1      # seconds
7174   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7175
7176   def __init__(self, lu, instance_name, cleanup=False,
7177                failover=False, fallback=False,
7178                ignore_consistency=False,
7179                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7180     """Initializes this class.
7181
7182     """
7183     Tasklet.__init__(self, lu)
7184
7185     # Parameters
7186     self.instance_name = instance_name
7187     self.cleanup = cleanup
7188     self.live = False # will be overridden later
7189     self.failover = failover
7190     self.fallback = fallback
7191     self.ignore_consistency = ignore_consistency
7192     self.shutdown_timeout = shutdown_timeout
7193
7194   def CheckPrereq(self):
7195     """Check prerequisites.
7196
7197     This checks that the instance is in the cluster.
7198
7199     """
7200     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7201     instance = self.cfg.GetInstanceInfo(instance_name)
7202     assert instance is not None
7203     self.instance = instance
7204
7205     if (not self.cleanup and not instance.admin_up and not self.failover and
7206         self.fallback):
7207       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7208                       " to failover")
7209       self.failover = True
7210
7211     if instance.disk_template not in constants.DTS_MIRRORED:
7212       if self.failover:
7213         text = "failovers"
7214       else:
7215         text = "migrations"
7216       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7217                                  " %s" % (instance.disk_template, text),
7218                                  errors.ECODE_STATE)
7219
7220     if instance.disk_template in constants.DTS_EXT_MIRROR:
7221       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7222
7223       if self.lu.op.iallocator:
7224         self._RunAllocator()
7225       else:
7226         # We set set self.target_node as it is required by
7227         # BuildHooksEnv
7228         self.target_node = self.lu.op.target_node
7229
7230       # self.target_node is already populated, either directly or by the
7231       # iallocator run
7232       target_node = self.target_node
7233       if self.target_node == instance.primary_node:
7234         raise errors.OpPrereqError("Cannot migrate instance %s"
7235                                    " to its primary (%s)" %
7236                                    (instance.name, instance.primary_node))
7237
7238       if len(self.lu.tasklets) == 1:
7239         # It is safe to release locks only when we're the only tasklet
7240         # in the LU
7241         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7242                       keep=[instance.primary_node, self.target_node])
7243
7244     else:
7245       secondary_nodes = instance.secondary_nodes
7246       if not secondary_nodes:
7247         raise errors.ConfigurationError("No secondary node but using"
7248                                         " %s disk template" %
7249                                         instance.disk_template)
7250       target_node = secondary_nodes[0]
7251       if self.lu.op.iallocator or (self.lu.op.target_node and
7252                                    self.lu.op.target_node != target_node):
7253         if self.failover:
7254           text = "failed over"
7255         else:
7256           text = "migrated"
7257         raise errors.OpPrereqError("Instances with disk template %s cannot"
7258                                    " be %s to arbitrary nodes"
7259                                    " (neither an iallocator nor a target"
7260                                    " node can be passed)" %
7261                                    (instance.disk_template, text),
7262                                    errors.ECODE_INVAL)
7263
7264     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7265
7266     # check memory requirements on the secondary node
7267     if not self.failover or instance.admin_up:
7268       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7269                            instance.name, i_be[constants.BE_MEMORY],
7270                            instance.hypervisor)
7271     else:
7272       self.lu.LogInfo("Not checking memory on the secondary node as"
7273                       " instance will not be started")
7274
7275     # check bridge existance
7276     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7277
7278     if not self.cleanup:
7279       _CheckNodeNotDrained(self.lu, target_node)
7280       if not self.failover:
7281         result = self.rpc.call_instance_migratable(instance.primary_node,
7282                                                    instance)
7283         if result.fail_msg and self.fallback:
7284           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7285                           " failover")
7286           self.failover = True
7287         else:
7288           result.Raise("Can't migrate, please use failover",
7289                        prereq=True, ecode=errors.ECODE_STATE)
7290
7291     assert not (self.failover and self.cleanup)
7292
7293     if not self.failover:
7294       if self.lu.op.live is not None and self.lu.op.mode is not None:
7295         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7296                                    " parameters are accepted",
7297                                    errors.ECODE_INVAL)
7298       if self.lu.op.live is not None:
7299         if self.lu.op.live:
7300           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7301         else:
7302           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7303         # reset the 'live' parameter to None so that repeated
7304         # invocations of CheckPrereq do not raise an exception
7305         self.lu.op.live = None
7306       elif self.lu.op.mode is None:
7307         # read the default value from the hypervisor
7308         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7309                                                 skip_globals=False)
7310         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7311
7312       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7313     else:
7314       # Failover is never live
7315       self.live = False
7316
7317   def _RunAllocator(self):
7318     """Run the allocator based on input opcode.
7319
7320     """
7321     ial = IAllocator(self.cfg, self.rpc,
7322                      mode=constants.IALLOCATOR_MODE_RELOC,
7323                      name=self.instance_name,
7324                      # TODO See why hail breaks with a single node below
7325                      relocate_from=[self.instance.primary_node,
7326                                     self.instance.primary_node],
7327                      )
7328
7329     ial.Run(self.lu.op.iallocator)
7330
7331     if not ial.success:
7332       raise errors.OpPrereqError("Can't compute nodes using"
7333                                  " iallocator '%s': %s" %
7334                                  (self.lu.op.iallocator, ial.info),
7335                                  errors.ECODE_NORES)
7336     if len(ial.result) != ial.required_nodes:
7337       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7338                                  " of nodes (%s), required %s" %
7339                                  (self.lu.op.iallocator, len(ial.result),
7340                                   ial.required_nodes), errors.ECODE_FAULT)
7341     self.target_node = ial.result[0]
7342     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7343                  self.instance_name, self.lu.op.iallocator,
7344                  utils.CommaJoin(ial.result))
7345
7346   def _WaitUntilSync(self):
7347     """Poll with custom rpc for disk sync.
7348
7349     This uses our own step-based rpc call.
7350
7351     """
7352     self.feedback_fn("* wait until resync is done")
7353     all_done = False
7354     while not all_done:
7355       all_done = True
7356       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7357                                             self.nodes_ip,
7358                                             self.instance.disks)
7359       min_percent = 100
7360       for node, nres in result.items():
7361         nres.Raise("Cannot resync disks on node %s" % node)
7362         node_done, node_percent = nres.payload
7363         all_done = all_done and node_done
7364         if node_percent is not None:
7365           min_percent = min(min_percent, node_percent)
7366       if not all_done:
7367         if min_percent < 100:
7368           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7369         time.sleep(2)
7370
7371   def _EnsureSecondary(self, node):
7372     """Demote a node to secondary.
7373
7374     """
7375     self.feedback_fn("* switching node %s to secondary mode" % node)
7376
7377     for dev in self.instance.disks:
7378       self.cfg.SetDiskID(dev, node)
7379
7380     result = self.rpc.call_blockdev_close(node, self.instance.name,
7381                                           self.instance.disks)
7382     result.Raise("Cannot change disk to secondary on node %s" % node)
7383
7384   def _GoStandalone(self):
7385     """Disconnect from the network.
7386
7387     """
7388     self.feedback_fn("* changing into standalone mode")
7389     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7390                                                self.instance.disks)
7391     for node, nres in result.items():
7392       nres.Raise("Cannot disconnect disks node %s" % node)
7393
7394   def _GoReconnect(self, multimaster):
7395     """Reconnect to the network.
7396
7397     """
7398     if multimaster:
7399       msg = "dual-master"
7400     else:
7401       msg = "single-master"
7402     self.feedback_fn("* changing disks into %s mode" % msg)
7403     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7404                                            self.instance.disks,
7405                                            self.instance.name, multimaster)
7406     for node, nres in result.items():
7407       nres.Raise("Cannot change disks config on node %s" % node)
7408
7409   def _ExecCleanup(self):
7410     """Try to cleanup after a failed migration.
7411
7412     The cleanup is done by:
7413       - check that the instance is running only on one node
7414         (and update the config if needed)
7415       - change disks on its secondary node to secondary
7416       - wait until disks are fully synchronized
7417       - disconnect from the network
7418       - change disks into single-master mode
7419       - wait again until disks are fully synchronized
7420
7421     """
7422     instance = self.instance
7423     target_node = self.target_node
7424     source_node = self.source_node
7425
7426     # check running on only one node
7427     self.feedback_fn("* checking where the instance actually runs"
7428                      " (if this hangs, the hypervisor might be in"
7429                      " a bad state)")
7430     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7431     for node, result in ins_l.items():
7432       result.Raise("Can't contact node %s" % node)
7433
7434     runningon_source = instance.name in ins_l[source_node].payload
7435     runningon_target = instance.name in ins_l[target_node].payload
7436
7437     if runningon_source and runningon_target:
7438       raise errors.OpExecError("Instance seems to be running on two nodes,"
7439                                " or the hypervisor is confused; you will have"
7440                                " to ensure manually that it runs only on one"
7441                                " and restart this operation")
7442
7443     if not (runningon_source or runningon_target):
7444       raise errors.OpExecError("Instance does not seem to be running at all;"
7445                                " in this case it's safer to repair by"
7446                                " running 'gnt-instance stop' to ensure disk"
7447                                " shutdown, and then restarting it")
7448
7449     if runningon_target:
7450       # the migration has actually succeeded, we need to update the config
7451       self.feedback_fn("* instance running on secondary node (%s),"
7452                        " updating config" % target_node)
7453       instance.primary_node = target_node
7454       self.cfg.Update(instance, self.feedback_fn)
7455       demoted_node = source_node
7456     else:
7457       self.feedback_fn("* instance confirmed to be running on its"
7458                        " primary node (%s)" % source_node)
7459       demoted_node = target_node
7460
7461     if instance.disk_template in constants.DTS_INT_MIRROR:
7462       self._EnsureSecondary(demoted_node)
7463       try:
7464         self._WaitUntilSync()
7465       except errors.OpExecError:
7466         # we ignore here errors, since if the device is standalone, it
7467         # won't be able to sync
7468         pass
7469       self._GoStandalone()
7470       self._GoReconnect(False)
7471       self._WaitUntilSync()
7472
7473     self.feedback_fn("* done")
7474
7475   def _RevertDiskStatus(self):
7476     """Try to revert the disk status after a failed migration.
7477
7478     """
7479     target_node = self.target_node
7480     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7481       return
7482
7483     try:
7484       self._EnsureSecondary(target_node)
7485       self._GoStandalone()
7486       self._GoReconnect(False)
7487       self._WaitUntilSync()
7488     except errors.OpExecError, err:
7489       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7490                          " please try to recover the instance manually;"
7491                          " error '%s'" % str(err))
7492
7493   def _AbortMigration(self):
7494     """Call the hypervisor code to abort a started migration.
7495
7496     """
7497     instance = self.instance
7498     target_node = self.target_node
7499     source_node = self.source_node
7500     migration_info = self.migration_info
7501
7502     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7503                                                                  instance,
7504                                                                  migration_info,
7505                                                                  False)
7506     abort_msg = abort_result.fail_msg
7507     if abort_msg:
7508       logging.error("Aborting migration failed on target node %s: %s",
7509                     target_node, abort_msg)
7510       # Don't raise an exception here, as we stil have to try to revert the
7511       # disk status, even if this step failed.
7512
7513     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7514         instance, False, self.live)
7515     abort_msg = abort_result.fail_msg
7516     if abort_msg:
7517       logging.error("Aborting migration failed on source node %s: %s",
7518                     source_node, abort_msg)
7519
7520   def _ExecMigration(self):
7521     """Migrate an instance.
7522
7523     The migrate is done by:
7524       - change the disks into dual-master mode
7525       - wait until disks are fully synchronized again
7526       - migrate the instance
7527       - change disks on the new secondary node (the old primary) to secondary
7528       - wait until disks are fully synchronized
7529       - change disks into single-master mode
7530
7531     """
7532     instance = self.instance
7533     target_node = self.target_node
7534     source_node = self.source_node
7535
7536     # Check for hypervisor version mismatch and warn the user.
7537     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7538                                        None, self.instance.hypervisor)
7539     src_info = nodeinfo[source_node]
7540     dst_info = nodeinfo[target_node]
7541
7542     if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7543         (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7544       src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7545       dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7546       if src_version != dst_version:
7547         self.feedback_fn("* warning: hypervisor version mismatch between"
7548                          " source (%s) and target (%s) node" %
7549                          (src_version, dst_version))
7550
7551     self.feedback_fn("* checking disk consistency between source and target")
7552     for dev in instance.disks:
7553       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7554         raise errors.OpExecError("Disk %s is degraded or not fully"
7555                                  " synchronized on target node,"
7556                                  " aborting migration" % dev.iv_name)
7557
7558     # First get the migration information from the remote node
7559     result = self.rpc.call_migration_info(source_node, instance)
7560     msg = result.fail_msg
7561     if msg:
7562       log_err = ("Failed fetching source migration information from %s: %s" %
7563                  (source_node, msg))
7564       logging.error(log_err)
7565       raise errors.OpExecError(log_err)
7566
7567     self.migration_info = migration_info = result.payload
7568
7569     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7570       # Then switch the disks to master/master mode
7571       self._EnsureSecondary(target_node)
7572       self._GoStandalone()
7573       self._GoReconnect(True)
7574       self._WaitUntilSync()
7575
7576     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7577     result = self.rpc.call_accept_instance(target_node,
7578                                            instance,
7579                                            migration_info,
7580                                            self.nodes_ip[target_node])
7581
7582     msg = result.fail_msg
7583     if msg:
7584       logging.error("Instance pre-migration failed, trying to revert"
7585                     " disk status: %s", msg)
7586       self.feedback_fn("Pre-migration failed, aborting")
7587       self._AbortMigration()
7588       self._RevertDiskStatus()
7589       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7590                                (instance.name, msg))
7591
7592     self.feedback_fn("* migrating instance to %s" % target_node)
7593     result = self.rpc.call_instance_migrate(source_node, instance,
7594                                             self.nodes_ip[target_node],
7595                                             self.live)
7596     msg = result.fail_msg
7597     if msg:
7598       logging.error("Instance migration failed, trying to revert"
7599                     " disk status: %s", msg)
7600       self.feedback_fn("Migration failed, aborting")
7601       self._AbortMigration()
7602       self._RevertDiskStatus()
7603       raise errors.OpExecError("Could not migrate instance %s: %s" %
7604                                (instance.name, msg))
7605
7606     self.feedback_fn("* starting memory transfer")
7607     last_feedback = time.time()
7608     while True:
7609       result = self.rpc.call_instance_get_migration_status(source_node,
7610                                                            instance)
7611       msg = result.fail_msg
7612       ms = result.payload   # MigrationStatus instance
7613       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7614         logging.error("Instance migration failed, trying to revert"
7615                       " disk status: %s", msg)
7616         self.feedback_fn("Migration failed, aborting")
7617         self._AbortMigration()
7618         self._RevertDiskStatus()
7619         raise errors.OpExecError("Could not migrate instance %s: %s" %
7620                                  (instance.name, msg))
7621
7622       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7623         self.feedback_fn("* memory transfer complete")
7624         break
7625
7626       if (utils.TimeoutExpired(last_feedback,
7627                                self._MIGRATION_FEEDBACK_INTERVAL) and
7628           ms.transferred_ram is not None):
7629         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7630         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7631         last_feedback = time.time()
7632
7633       time.sleep(self._MIGRATION_POLL_INTERVAL)
7634
7635     result = self.rpc.call_instance_finalize_migration_src(source_node,
7636                                                            instance,
7637                                                            True,
7638                                                            self.live)
7639     msg = result.fail_msg
7640     if msg:
7641       logging.error("Instance migration succeeded, but finalization failed"
7642                     " on the source node: %s", msg)
7643       raise errors.OpExecError("Could not finalize instance migration: %s" %
7644                                msg)
7645
7646     instance.primary_node = target_node
7647
7648     # distribute new instance config to the other nodes
7649     self.cfg.Update(instance, self.feedback_fn)
7650
7651     result = self.rpc.call_instance_finalize_migration_dst(target_node,
7652                                                            instance,
7653                                                            migration_info,
7654                                                            True)
7655     msg = result.fail_msg
7656     if msg:
7657       logging.error("Instance migration succeeded, but finalization failed"
7658                     " on the target node: %s", msg)
7659       raise errors.OpExecError("Could not finalize instance migration: %s" %
7660                                msg)
7661
7662     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7663       self._EnsureSecondary(source_node)
7664       self._WaitUntilSync()
7665       self._GoStandalone()
7666       self._GoReconnect(False)
7667       self._WaitUntilSync()
7668
7669     self.feedback_fn("* done")
7670
7671   def _ExecFailover(self):
7672     """Failover an instance.
7673
7674     The failover is done by shutting it down on its present node and
7675     starting it on the secondary.
7676
7677     """
7678     instance = self.instance
7679     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7680
7681     source_node = instance.primary_node
7682     target_node = self.target_node
7683
7684     if instance.admin_up:
7685       self.feedback_fn("* checking disk consistency between source and target")
7686       for dev in instance.disks:
7687         # for drbd, these are drbd over lvm
7688         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7689           if primary_node.offline:
7690             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7691                              " target node %s" %
7692                              (primary_node.name, dev.iv_name, target_node))
7693           elif not self.ignore_consistency:
7694             raise errors.OpExecError("Disk %s is degraded on target node,"
7695                                      " aborting failover" % dev.iv_name)
7696     else:
7697       self.feedback_fn("* not checking disk consistency as instance is not"
7698                        " running")
7699
7700     self.feedback_fn("* shutting down instance on source node")
7701     logging.info("Shutting down instance %s on node %s",
7702                  instance.name, source_node)
7703
7704     result = self.rpc.call_instance_shutdown(source_node, instance,
7705                                              self.shutdown_timeout)
7706     msg = result.fail_msg
7707     if msg:
7708       if self.ignore_consistency or primary_node.offline:
7709         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7710                            " proceeding anyway; please make sure node"
7711                            " %s is down; error details: %s",
7712                            instance.name, source_node, source_node, msg)
7713       else:
7714         raise errors.OpExecError("Could not shutdown instance %s on"
7715                                  " node %s: %s" %
7716                                  (instance.name, source_node, msg))
7717
7718     self.feedback_fn("* deactivating the instance's disks on source node")
7719     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7720       raise errors.OpExecError("Can't shut down the instance's disks")
7721
7722     instance.primary_node = target_node
7723     # distribute new instance config to the other nodes
7724     self.cfg.Update(instance, self.feedback_fn)
7725
7726     # Only start the instance if it's marked as up
7727     if instance.admin_up:
7728       self.feedback_fn("* activating the instance's disks on target node %s" %
7729                        target_node)
7730       logging.info("Starting instance %s on node %s",
7731                    instance.name, target_node)
7732
7733       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7734                                            ignore_secondaries=True)
7735       if not disks_ok:
7736         _ShutdownInstanceDisks(self.lu, instance)
7737         raise errors.OpExecError("Can't activate the instance's disks")
7738
7739       self.feedback_fn("* starting the instance on the target node %s" %
7740                        target_node)
7741       result = self.rpc.call_instance_start(target_node, (instance, None, None),
7742                                             False)
7743       msg = result.fail_msg
7744       if msg:
7745         _ShutdownInstanceDisks(self.lu, instance)
7746         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7747                                  (instance.name, target_node, msg))
7748
7749   def Exec(self, feedback_fn):
7750     """Perform the migration.
7751
7752     """
7753     self.feedback_fn = feedback_fn
7754     self.source_node = self.instance.primary_node
7755
7756     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7757     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7758       self.target_node = self.instance.secondary_nodes[0]
7759       # Otherwise self.target_node has been populated either
7760       # directly, or through an iallocator.
7761
7762     self.all_nodes = [self.source_node, self.target_node]
7763     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7764                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7765
7766     if self.failover:
7767       feedback_fn("Failover instance %s" % self.instance.name)
7768       self._ExecFailover()
7769     else:
7770       feedback_fn("Migrating instance %s" % self.instance.name)
7771
7772       if self.cleanup:
7773         return self._ExecCleanup()
7774       else:
7775         return self._ExecMigration()
7776
7777
7778 def _CreateBlockDev(lu, node, instance, device, force_create,
7779                     info, force_open):
7780   """Create a tree of block devices on a given node.
7781
7782   If this device type has to be created on secondaries, create it and
7783   all its children.
7784
7785   If not, just recurse to children keeping the same 'force' value.
7786
7787   @param lu: the lu on whose behalf we execute
7788   @param node: the node on which to create the device
7789   @type instance: L{objects.Instance}
7790   @param instance: the instance which owns the device
7791   @type device: L{objects.Disk}
7792   @param device: the device to create
7793   @type force_create: boolean
7794   @param force_create: whether to force creation of this device; this
7795       will be change to True whenever we find a device which has
7796       CreateOnSecondary() attribute
7797   @param info: the extra 'metadata' we should attach to the device
7798       (this will be represented as a LVM tag)
7799   @type force_open: boolean
7800   @param force_open: this parameter will be passes to the
7801       L{backend.BlockdevCreate} function where it specifies
7802       whether we run on primary or not, and it affects both
7803       the child assembly and the device own Open() execution
7804
7805   """
7806   if device.CreateOnSecondary():
7807     force_create = True
7808
7809   if device.children:
7810     for child in device.children:
7811       _CreateBlockDev(lu, node, instance, child, force_create,
7812                       info, force_open)
7813
7814   if not force_create:
7815     return
7816
7817   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7818
7819
7820 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7821   """Create a single block device on a given node.
7822
7823   This will not recurse over children of the device, so they must be
7824   created in advance.
7825
7826   @param lu: the lu on whose behalf we execute
7827   @param node: the node on which to create the device
7828   @type instance: L{objects.Instance}
7829   @param instance: the instance which owns the device
7830   @type device: L{objects.Disk}
7831   @param device: the device to create
7832   @param info: the extra 'metadata' we should attach to the device
7833       (this will be represented as a LVM tag)
7834   @type force_open: boolean
7835   @param force_open: this parameter will be passes to the
7836       L{backend.BlockdevCreate} function where it specifies
7837       whether we run on primary or not, and it affects both
7838       the child assembly and the device own Open() execution
7839
7840   """
7841   lu.cfg.SetDiskID(device, node)
7842   result = lu.rpc.call_blockdev_create(node, device, device.size,
7843                                        instance.name, force_open, info)
7844   result.Raise("Can't create block device %s on"
7845                " node %s for instance %s" % (device, node, instance.name))
7846   if device.physical_id is None:
7847     device.physical_id = result.payload
7848
7849
7850 def _GenerateUniqueNames(lu, exts):
7851   """Generate a suitable LV name.
7852
7853   This will generate a logical volume name for the given instance.
7854
7855   """
7856   results = []
7857   for val in exts:
7858     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7859     results.append("%s%s" % (new_id, val))
7860   return results
7861
7862
7863 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7864                          iv_name, p_minor, s_minor):
7865   """Generate a drbd8 device complete with its children.
7866
7867   """
7868   assert len(vgnames) == len(names) == 2
7869   port = lu.cfg.AllocatePort()
7870   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7871   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7872                           logical_id=(vgnames[0], names[0]))
7873   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7874                           logical_id=(vgnames[1], names[1]))
7875   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7876                           logical_id=(primary, secondary, port,
7877                                       p_minor, s_minor,
7878                                       shared_secret),
7879                           children=[dev_data, dev_meta],
7880                           iv_name=iv_name)
7881   return drbd_dev
7882
7883
7884 def _GenerateDiskTemplate(lu, template_name,
7885                           instance_name, primary_node,
7886                           secondary_nodes, disk_info,
7887                           file_storage_dir, file_driver,
7888                           base_index, feedback_fn):
7889   """Generate the entire disk layout for a given template type.
7890
7891   """
7892   #TODO: compute space requirements
7893
7894   vgname = lu.cfg.GetVGName()
7895   disk_count = len(disk_info)
7896   disks = []
7897   if template_name == constants.DT_DISKLESS:
7898     pass
7899   elif template_name == constants.DT_PLAIN:
7900     if len(secondary_nodes) != 0:
7901       raise errors.ProgrammerError("Wrong template configuration")
7902
7903     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7904                                       for i in range(disk_count)])
7905     for idx, disk in enumerate(disk_info):
7906       disk_index = idx + base_index
7907       vg = disk.get(constants.IDISK_VG, vgname)
7908       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7909       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7910                               size=disk[constants.IDISK_SIZE],
7911                               logical_id=(vg, names[idx]),
7912                               iv_name="disk/%d" % disk_index,
7913                               mode=disk[constants.IDISK_MODE])
7914       disks.append(disk_dev)
7915   elif template_name == constants.DT_DRBD8:
7916     if len(secondary_nodes) != 1:
7917       raise errors.ProgrammerError("Wrong template configuration")
7918     remote_node = secondary_nodes[0]
7919     minors = lu.cfg.AllocateDRBDMinor(
7920       [primary_node, remote_node] * len(disk_info), instance_name)
7921
7922     names = []
7923     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7924                                                for i in range(disk_count)]):
7925       names.append(lv_prefix + "_data")
7926       names.append(lv_prefix + "_meta")
7927     for idx, disk in enumerate(disk_info):
7928       disk_index = idx + base_index
7929       data_vg = disk.get(constants.IDISK_VG, vgname)
7930       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7931       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7932                                       disk[constants.IDISK_SIZE],
7933                                       [data_vg, meta_vg],
7934                                       names[idx * 2:idx * 2 + 2],
7935                                       "disk/%d" % disk_index,
7936                                       minors[idx * 2], minors[idx * 2 + 1])
7937       disk_dev.mode = disk[constants.IDISK_MODE]
7938       disks.append(disk_dev)
7939   elif template_name == constants.DT_FILE:
7940     if len(secondary_nodes) != 0:
7941       raise errors.ProgrammerError("Wrong template configuration")
7942
7943     opcodes.RequireFileStorage()
7944
7945     for idx, disk in enumerate(disk_info):
7946       disk_index = idx + base_index
7947       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7948                               size=disk[constants.IDISK_SIZE],
7949                               iv_name="disk/%d" % disk_index,
7950                               logical_id=(file_driver,
7951                                           "%s/disk%d" % (file_storage_dir,
7952                                                          disk_index)),
7953                               mode=disk[constants.IDISK_MODE])
7954       disks.append(disk_dev)
7955   elif template_name == constants.DT_SHARED_FILE:
7956     if len(secondary_nodes) != 0:
7957       raise errors.ProgrammerError("Wrong template configuration")
7958
7959     opcodes.RequireSharedFileStorage()
7960
7961     for idx, disk in enumerate(disk_info):
7962       disk_index = idx + base_index
7963       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7964                               size=disk[constants.IDISK_SIZE],
7965                               iv_name="disk/%d" % disk_index,
7966                               logical_id=(file_driver,
7967                                           "%s/disk%d" % (file_storage_dir,
7968                                                          disk_index)),
7969                               mode=disk[constants.IDISK_MODE])
7970       disks.append(disk_dev)
7971   elif template_name == constants.DT_BLOCK:
7972     if len(secondary_nodes) != 0:
7973       raise errors.ProgrammerError("Wrong template configuration")
7974
7975     for idx, disk in enumerate(disk_info):
7976       disk_index = idx + base_index
7977       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7978                               size=disk[constants.IDISK_SIZE],
7979                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7980                                           disk[constants.IDISK_ADOPT]),
7981                               iv_name="disk/%d" % disk_index,
7982                               mode=disk[constants.IDISK_MODE])
7983       disks.append(disk_dev)
7984
7985   else:
7986     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7987   return disks
7988
7989
7990 def _GetInstanceInfoText(instance):
7991   """Compute that text that should be added to the disk's metadata.
7992
7993   """
7994   return "originstname+%s" % instance.name
7995
7996
7997 def _CalcEta(time_taken, written, total_size):
7998   """Calculates the ETA based on size written and total size.
7999
8000   @param time_taken: The time taken so far
8001   @param written: amount written so far
8002   @param total_size: The total size of data to be written
8003   @return: The remaining time in seconds
8004
8005   """
8006   avg_time = time_taken / float(written)
8007   return (total_size - written) * avg_time
8008
8009
8010 def _WipeDisks(lu, instance):
8011   """Wipes instance disks.
8012
8013   @type lu: L{LogicalUnit}
8014   @param lu: the logical unit on whose behalf we execute
8015   @type instance: L{objects.Instance}
8016   @param instance: the instance whose disks we should create
8017   @return: the success of the wipe
8018
8019   """
8020   node = instance.primary_node
8021
8022   for device in instance.disks:
8023     lu.cfg.SetDiskID(device, node)
8024
8025   logging.info("Pause sync of instance %s disks", instance.name)
8026   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8027
8028   for idx, success in enumerate(result.payload):
8029     if not success:
8030       logging.warn("pause-sync of instance %s for disks %d failed",
8031                    instance.name, idx)
8032
8033   try:
8034     for idx, device in enumerate(instance.disks):
8035       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8036       # MAX_WIPE_CHUNK at max
8037       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8038                             constants.MIN_WIPE_CHUNK_PERCENT)
8039       # we _must_ make this an int, otherwise rounding errors will
8040       # occur
8041       wipe_chunk_size = int(wipe_chunk_size)
8042
8043       lu.LogInfo("* Wiping disk %d", idx)
8044       logging.info("Wiping disk %d for instance %s, node %s using"
8045                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8046
8047       offset = 0
8048       size = device.size
8049       last_output = 0
8050       start_time = time.time()
8051
8052       while offset < size:
8053         wipe_size = min(wipe_chunk_size, size - offset)
8054         logging.debug("Wiping disk %d, offset %s, chunk %s",
8055                       idx, offset, wipe_size)
8056         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8057         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8058                      (idx, offset, wipe_size))
8059         now = time.time()
8060         offset += wipe_size
8061         if now - last_output >= 60:
8062           eta = _CalcEta(now - start_time, offset, size)
8063           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8064                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8065           last_output = now
8066   finally:
8067     logging.info("Resume sync of instance %s disks", instance.name)
8068
8069     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8070
8071     for idx, success in enumerate(result.payload):
8072       if not success:
8073         lu.LogWarning("Resume sync of disk %d failed, please have a"
8074                       " look at the status and troubleshoot the issue", idx)
8075         logging.warn("resume-sync of instance %s for disks %d failed",
8076                      instance.name, idx)
8077
8078
8079 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8080   """Create all disks for an instance.
8081
8082   This abstracts away some work from AddInstance.
8083
8084   @type lu: L{LogicalUnit}
8085   @param lu: the logical unit on whose behalf we execute
8086   @type instance: L{objects.Instance}
8087   @param instance: the instance whose disks we should create
8088   @type to_skip: list
8089   @param to_skip: list of indices to skip
8090   @type target_node: string
8091   @param target_node: if passed, overrides the target node for creation
8092   @rtype: boolean
8093   @return: the success of the creation
8094
8095   """
8096   info = _GetInstanceInfoText(instance)
8097   if target_node is None:
8098     pnode = instance.primary_node
8099     all_nodes = instance.all_nodes
8100   else:
8101     pnode = target_node
8102     all_nodes = [pnode]
8103
8104   if instance.disk_template in constants.DTS_FILEBASED:
8105     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8106     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8107
8108     result.Raise("Failed to create directory '%s' on"
8109                  " node %s" % (file_storage_dir, pnode))
8110
8111   # Note: this needs to be kept in sync with adding of disks in
8112   # LUInstanceSetParams
8113   for idx, device in enumerate(instance.disks):
8114     if to_skip and idx in to_skip:
8115       continue
8116     logging.info("Creating volume %s for instance %s",
8117                  device.iv_name, instance.name)
8118     #HARDCODE
8119     for node in all_nodes:
8120       f_create = node == pnode
8121       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8122
8123
8124 def _RemoveDisks(lu, instance, target_node=None):
8125   """Remove all disks for an instance.
8126
8127   This abstracts away some work from `AddInstance()` and
8128   `RemoveInstance()`. Note that in case some of the devices couldn't
8129   be removed, the removal will continue with the other ones (compare
8130   with `_CreateDisks()`).
8131
8132   @type lu: L{LogicalUnit}
8133   @param lu: the logical unit on whose behalf we execute
8134   @type instance: L{objects.Instance}
8135   @param instance: the instance whose disks we should remove
8136   @type target_node: string
8137   @param target_node: used to override the node on which to remove the disks
8138   @rtype: boolean
8139   @return: the success of the removal
8140
8141   """
8142   logging.info("Removing block devices for instance %s", instance.name)
8143
8144   all_result = True
8145   for device in instance.disks:
8146     if target_node:
8147       edata = [(target_node, device)]
8148     else:
8149       edata = device.ComputeNodeTree(instance.primary_node)
8150     for node, disk in edata:
8151       lu.cfg.SetDiskID(disk, node)
8152       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8153       if msg:
8154         lu.LogWarning("Could not remove block device %s on node %s,"
8155                       " continuing anyway: %s", device.iv_name, node, msg)
8156         all_result = False
8157
8158   if instance.disk_template == constants.DT_FILE:
8159     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8160     if target_node:
8161       tgt = target_node
8162     else:
8163       tgt = instance.primary_node
8164     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8165     if result.fail_msg:
8166       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8167                     file_storage_dir, instance.primary_node, result.fail_msg)
8168       all_result = False
8169
8170   return all_result
8171
8172
8173 def _ComputeDiskSizePerVG(disk_template, disks):
8174   """Compute disk size requirements in the volume group
8175
8176   """
8177   def _compute(disks, payload):
8178     """Universal algorithm.
8179
8180     """
8181     vgs = {}
8182     for disk in disks:
8183       vgs[disk[constants.IDISK_VG]] = \
8184         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8185
8186     return vgs
8187
8188   # Required free disk space as a function of disk and swap space
8189   req_size_dict = {
8190     constants.DT_DISKLESS: {},
8191     constants.DT_PLAIN: _compute(disks, 0),
8192     # 128 MB are added for drbd metadata for each disk
8193     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8194     constants.DT_FILE: {},
8195     constants.DT_SHARED_FILE: {},
8196   }
8197
8198   if disk_template not in req_size_dict:
8199     raise errors.ProgrammerError("Disk template '%s' size requirement"
8200                                  " is unknown" % disk_template)
8201
8202   return req_size_dict[disk_template]
8203
8204
8205 def _ComputeDiskSize(disk_template, disks):
8206   """Compute disk size requirements in the volume group
8207
8208   """
8209   # Required free disk space as a function of disk and swap space
8210   req_size_dict = {
8211     constants.DT_DISKLESS: None,
8212     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8213     # 128 MB are added for drbd metadata for each disk
8214     constants.DT_DRBD8:
8215       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8216     constants.DT_FILE: None,
8217     constants.DT_SHARED_FILE: 0,
8218     constants.DT_BLOCK: 0,
8219   }
8220
8221   if disk_template not in req_size_dict:
8222     raise errors.ProgrammerError("Disk template '%s' size requirement"
8223                                  " is unknown" % disk_template)
8224
8225   return req_size_dict[disk_template]
8226
8227
8228 def _FilterVmNodes(lu, nodenames):
8229   """Filters out non-vm_capable nodes from a list.
8230
8231   @type lu: L{LogicalUnit}
8232   @param lu: the logical unit for which we check
8233   @type nodenames: list
8234   @param nodenames: the list of nodes on which we should check
8235   @rtype: list
8236   @return: the list of vm-capable nodes
8237
8238   """
8239   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8240   return [name for name in nodenames if name not in vm_nodes]
8241
8242
8243 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8244   """Hypervisor parameter validation.
8245
8246   This function abstract the hypervisor parameter validation to be
8247   used in both instance create and instance modify.
8248
8249   @type lu: L{LogicalUnit}
8250   @param lu: the logical unit for which we check
8251   @type nodenames: list
8252   @param nodenames: the list of nodes on which we should check
8253   @type hvname: string
8254   @param hvname: the name of the hypervisor we should use
8255   @type hvparams: dict
8256   @param hvparams: the parameters which we need to check
8257   @raise errors.OpPrereqError: if the parameters are not valid
8258
8259   """
8260   nodenames = _FilterVmNodes(lu, nodenames)
8261
8262   cluster = lu.cfg.GetClusterInfo()
8263   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8264
8265   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8266   for node in nodenames:
8267     info = hvinfo[node]
8268     if info.offline:
8269       continue
8270     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8271
8272
8273 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8274   """OS parameters validation.
8275
8276   @type lu: L{LogicalUnit}
8277   @param lu: the logical unit for which we check
8278   @type required: boolean
8279   @param required: whether the validation should fail if the OS is not
8280       found
8281   @type nodenames: list
8282   @param nodenames: the list of nodes on which we should check
8283   @type osname: string
8284   @param osname: the name of the hypervisor we should use
8285   @type osparams: dict
8286   @param osparams: the parameters which we need to check
8287   @raise errors.OpPrereqError: if the parameters are not valid
8288
8289   """
8290   nodenames = _FilterVmNodes(lu, nodenames)
8291   result = lu.rpc.call_os_validate(nodenames, required, osname,
8292                                    [constants.OS_VALIDATE_PARAMETERS],
8293                                    osparams)
8294   for node, nres in result.items():
8295     # we don't check for offline cases since this should be run only
8296     # against the master node and/or an instance's nodes
8297     nres.Raise("OS Parameters validation failed on node %s" % node)
8298     if not nres.payload:
8299       lu.LogInfo("OS %s not found on node %s, validation skipped",
8300                  osname, node)
8301
8302
8303 class LUInstanceCreate(LogicalUnit):
8304   """Create an instance.
8305
8306   """
8307   HPATH = "instance-add"
8308   HTYPE = constants.HTYPE_INSTANCE
8309   REQ_BGL = False
8310
8311   def CheckArguments(self):
8312     """Check arguments.
8313
8314     """
8315     # do not require name_check to ease forward/backward compatibility
8316     # for tools
8317     if self.op.no_install and self.op.start:
8318       self.LogInfo("No-installation mode selected, disabling startup")
8319       self.op.start = False
8320     # validate/normalize the instance name
8321     self.op.instance_name = \
8322       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8323
8324     if self.op.ip_check and not self.op.name_check:
8325       # TODO: make the ip check more flexible and not depend on the name check
8326       raise errors.OpPrereqError("Cannot do IP address check without a name"
8327                                  " check", errors.ECODE_INVAL)
8328
8329     # check nics' parameter names
8330     for nic in self.op.nics:
8331       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8332
8333     # check disks. parameter names and consistent adopt/no-adopt strategy
8334     has_adopt = has_no_adopt = False
8335     for disk in self.op.disks:
8336       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8337       if constants.IDISK_ADOPT in disk:
8338         has_adopt = True
8339       else:
8340         has_no_adopt = True
8341     if has_adopt and has_no_adopt:
8342       raise errors.OpPrereqError("Either all disks are adopted or none is",
8343                                  errors.ECODE_INVAL)
8344     if has_adopt:
8345       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8346         raise errors.OpPrereqError("Disk adoption is not supported for the"
8347                                    " '%s' disk template" %
8348                                    self.op.disk_template,
8349                                    errors.ECODE_INVAL)
8350       if self.op.iallocator is not None:
8351         raise errors.OpPrereqError("Disk adoption not allowed with an"
8352                                    " iallocator script", errors.ECODE_INVAL)
8353       if self.op.mode == constants.INSTANCE_IMPORT:
8354         raise errors.OpPrereqError("Disk adoption not allowed for"
8355                                    " instance import", errors.ECODE_INVAL)
8356     else:
8357       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8358         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8359                                    " but no 'adopt' parameter given" %
8360                                    self.op.disk_template,
8361                                    errors.ECODE_INVAL)
8362
8363     self.adopt_disks = has_adopt
8364
8365     # instance name verification
8366     if self.op.name_check:
8367       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8368       self.op.instance_name = self.hostname1.name
8369       # used in CheckPrereq for ip ping check
8370       self.check_ip = self.hostname1.ip
8371     else:
8372       self.check_ip = None
8373
8374     # file storage checks
8375     if (self.op.file_driver and
8376         not self.op.file_driver in constants.FILE_DRIVER):
8377       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8378                                  self.op.file_driver, errors.ECODE_INVAL)
8379
8380     if self.op.disk_template == constants.DT_FILE:
8381       opcodes.RequireFileStorage()
8382     elif self.op.disk_template == constants.DT_SHARED_FILE:
8383       opcodes.RequireSharedFileStorage()
8384
8385     ### Node/iallocator related checks
8386     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8387
8388     if self.op.pnode is not None:
8389       if self.op.disk_template in constants.DTS_INT_MIRROR:
8390         if self.op.snode is None:
8391           raise errors.OpPrereqError("The networked disk templates need"
8392                                      " a mirror node", errors.ECODE_INVAL)
8393       elif self.op.snode:
8394         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8395                         " template")
8396         self.op.snode = None
8397
8398     self._cds = _GetClusterDomainSecret()
8399
8400     if self.op.mode == constants.INSTANCE_IMPORT:
8401       # On import force_variant must be True, because if we forced it at
8402       # initial install, our only chance when importing it back is that it
8403       # works again!
8404       self.op.force_variant = True
8405
8406       if self.op.no_install:
8407         self.LogInfo("No-installation mode has no effect during import")
8408
8409     elif self.op.mode == constants.INSTANCE_CREATE:
8410       if self.op.os_type is None:
8411         raise errors.OpPrereqError("No guest OS specified",
8412                                    errors.ECODE_INVAL)
8413       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8414         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8415                                    " installation" % self.op.os_type,
8416                                    errors.ECODE_STATE)
8417       if self.op.disk_template is None:
8418         raise errors.OpPrereqError("No disk template specified",
8419                                    errors.ECODE_INVAL)
8420
8421     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8422       # Check handshake to ensure both clusters have the same domain secret
8423       src_handshake = self.op.source_handshake
8424       if not src_handshake:
8425         raise errors.OpPrereqError("Missing source handshake",
8426                                    errors.ECODE_INVAL)
8427
8428       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8429                                                            src_handshake)
8430       if errmsg:
8431         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8432                                    errors.ECODE_INVAL)
8433
8434       # Load and check source CA
8435       self.source_x509_ca_pem = self.op.source_x509_ca
8436       if not self.source_x509_ca_pem:
8437         raise errors.OpPrereqError("Missing source X509 CA",
8438                                    errors.ECODE_INVAL)
8439
8440       try:
8441         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8442                                                     self._cds)
8443       except OpenSSL.crypto.Error, err:
8444         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8445                                    (err, ), errors.ECODE_INVAL)
8446
8447       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8448       if errcode is not None:
8449         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8450                                    errors.ECODE_INVAL)
8451
8452       self.source_x509_ca = cert
8453
8454       src_instance_name = self.op.source_instance_name
8455       if not src_instance_name:
8456         raise errors.OpPrereqError("Missing source instance name",
8457                                    errors.ECODE_INVAL)
8458
8459       self.source_instance_name = \
8460           netutils.GetHostname(name=src_instance_name).name
8461
8462     else:
8463       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8464                                  self.op.mode, errors.ECODE_INVAL)
8465
8466   def ExpandNames(self):
8467     """ExpandNames for CreateInstance.
8468
8469     Figure out the right locks for instance creation.
8470
8471     """
8472     self.needed_locks = {}
8473
8474     instance_name = self.op.instance_name
8475     # this is just a preventive check, but someone might still add this
8476     # instance in the meantime, and creation will fail at lock-add time
8477     if instance_name in self.cfg.GetInstanceList():
8478       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8479                                  instance_name, errors.ECODE_EXISTS)
8480
8481     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8482
8483     if self.op.iallocator:
8484       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8485     else:
8486       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8487       nodelist = [self.op.pnode]
8488       if self.op.snode is not None:
8489         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8490         nodelist.append(self.op.snode)
8491       self.needed_locks[locking.LEVEL_NODE] = nodelist
8492
8493     # in case of import lock the source node too
8494     if self.op.mode == constants.INSTANCE_IMPORT:
8495       src_node = self.op.src_node
8496       src_path = self.op.src_path
8497
8498       if src_path is None:
8499         self.op.src_path = src_path = self.op.instance_name
8500
8501       if src_node is None:
8502         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8503         self.op.src_node = None
8504         if os.path.isabs(src_path):
8505           raise errors.OpPrereqError("Importing an instance from a path"
8506                                      " requires a source node option",
8507                                      errors.ECODE_INVAL)
8508       else:
8509         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8510         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8511           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8512         if not os.path.isabs(src_path):
8513           self.op.src_path = src_path = \
8514             utils.PathJoin(constants.EXPORT_DIR, src_path)
8515
8516   def _RunAllocator(self):
8517     """Run the allocator based on input opcode.
8518
8519     """
8520     nics = [n.ToDict() for n in self.nics]
8521     ial = IAllocator(self.cfg, self.rpc,
8522                      mode=constants.IALLOCATOR_MODE_ALLOC,
8523                      name=self.op.instance_name,
8524                      disk_template=self.op.disk_template,
8525                      tags=self.op.tags,
8526                      os=self.op.os_type,
8527                      vcpus=self.be_full[constants.BE_VCPUS],
8528                      memory=self.be_full[constants.BE_MEMORY],
8529                      disks=self.disks,
8530                      nics=nics,
8531                      hypervisor=self.op.hypervisor,
8532                      )
8533
8534     ial.Run(self.op.iallocator)
8535
8536     if not ial.success:
8537       raise errors.OpPrereqError("Can't compute nodes using"
8538                                  " iallocator '%s': %s" %
8539                                  (self.op.iallocator, ial.info),
8540                                  errors.ECODE_NORES)
8541     if len(ial.result) != ial.required_nodes:
8542       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8543                                  " of nodes (%s), required %s" %
8544                                  (self.op.iallocator, len(ial.result),
8545                                   ial.required_nodes), errors.ECODE_FAULT)
8546     self.op.pnode = ial.result[0]
8547     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8548                  self.op.instance_name, self.op.iallocator,
8549                  utils.CommaJoin(ial.result))
8550     if ial.required_nodes == 2:
8551       self.op.snode = ial.result[1]
8552
8553   def BuildHooksEnv(self):
8554     """Build hooks env.
8555
8556     This runs on master, primary and secondary nodes of the instance.
8557
8558     """
8559     env = {
8560       "ADD_MODE": self.op.mode,
8561       }
8562     if self.op.mode == constants.INSTANCE_IMPORT:
8563       env["SRC_NODE"] = self.op.src_node
8564       env["SRC_PATH"] = self.op.src_path
8565       env["SRC_IMAGES"] = self.src_images
8566
8567     env.update(_BuildInstanceHookEnv(
8568       name=self.op.instance_name,
8569       primary_node=self.op.pnode,
8570       secondary_nodes=self.secondaries,
8571       status=self.op.start,
8572       os_type=self.op.os_type,
8573       memory=self.be_full[constants.BE_MEMORY],
8574       vcpus=self.be_full[constants.BE_VCPUS],
8575       nics=_NICListToTuple(self, self.nics),
8576       disk_template=self.op.disk_template,
8577       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8578              for d in self.disks],
8579       bep=self.be_full,
8580       hvp=self.hv_full,
8581       hypervisor_name=self.op.hypervisor,
8582       tags=self.op.tags,
8583     ))
8584
8585     return env
8586
8587   def BuildHooksNodes(self):
8588     """Build hooks nodes.
8589
8590     """
8591     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8592     return nl, nl
8593
8594   def _ReadExportInfo(self):
8595     """Reads the export information from disk.
8596
8597     It will override the opcode source node and path with the actual
8598     information, if these two were not specified before.
8599
8600     @return: the export information
8601
8602     """
8603     assert self.op.mode == constants.INSTANCE_IMPORT
8604
8605     src_node = self.op.src_node
8606     src_path = self.op.src_path
8607
8608     if src_node is None:
8609       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8610       exp_list = self.rpc.call_export_list(locked_nodes)
8611       found = False
8612       for node in exp_list:
8613         if exp_list[node].fail_msg:
8614           continue
8615         if src_path in exp_list[node].payload:
8616           found = True
8617           self.op.src_node = src_node = node
8618           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8619                                                        src_path)
8620           break
8621       if not found:
8622         raise errors.OpPrereqError("No export found for relative path %s" %
8623                                     src_path, errors.ECODE_INVAL)
8624
8625     _CheckNodeOnline(self, src_node)
8626     result = self.rpc.call_export_info(src_node, src_path)
8627     result.Raise("No export or invalid export found in dir %s" % src_path)
8628
8629     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8630     if not export_info.has_section(constants.INISECT_EXP):
8631       raise errors.ProgrammerError("Corrupted export config",
8632                                    errors.ECODE_ENVIRON)
8633
8634     ei_version = export_info.get(constants.INISECT_EXP, "version")
8635     if (int(ei_version) != constants.EXPORT_VERSION):
8636       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8637                                  (ei_version, constants.EXPORT_VERSION),
8638                                  errors.ECODE_ENVIRON)
8639     return export_info
8640
8641   def _ReadExportParams(self, einfo):
8642     """Use export parameters as defaults.
8643
8644     In case the opcode doesn't specify (as in override) some instance
8645     parameters, then try to use them from the export information, if
8646     that declares them.
8647
8648     """
8649     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8650
8651     if self.op.disk_template is None:
8652       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8653         self.op.disk_template = einfo.get(constants.INISECT_INS,
8654                                           "disk_template")
8655         if self.op.disk_template not in constants.DISK_TEMPLATES:
8656           raise errors.OpPrereqError("Disk template specified in configuration"
8657                                      " file is not one of the allowed values:"
8658                                      " %s" % " ".join(constants.DISK_TEMPLATES))
8659       else:
8660         raise errors.OpPrereqError("No disk template specified and the export"
8661                                    " is missing the disk_template information",
8662                                    errors.ECODE_INVAL)
8663
8664     if not self.op.disks:
8665       disks = []
8666       # TODO: import the disk iv_name too
8667       for idx in range(constants.MAX_DISKS):
8668         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8669           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8670           disks.append({constants.IDISK_SIZE: disk_sz})
8671       self.op.disks = disks
8672       if not disks and self.op.disk_template != constants.DT_DISKLESS:
8673         raise errors.OpPrereqError("No disk info specified and the export"
8674                                    " is missing the disk information",
8675                                    errors.ECODE_INVAL)
8676
8677     if not self.op.nics:
8678       nics = []
8679       for idx in range(constants.MAX_NICS):
8680         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8681           ndict = {}
8682           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8683             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8684             ndict[name] = v
8685           nics.append(ndict)
8686         else:
8687           break
8688       self.op.nics = nics
8689
8690     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8691       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8692
8693     if (self.op.hypervisor is None and
8694         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8695       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8696
8697     if einfo.has_section(constants.INISECT_HYP):
8698       # use the export parameters but do not override the ones
8699       # specified by the user
8700       for name, value in einfo.items(constants.INISECT_HYP):
8701         if name not in self.op.hvparams:
8702           self.op.hvparams[name] = value
8703
8704     if einfo.has_section(constants.INISECT_BEP):
8705       # use the parameters, without overriding
8706       for name, value in einfo.items(constants.INISECT_BEP):
8707         if name not in self.op.beparams:
8708           self.op.beparams[name] = value
8709     else:
8710       # try to read the parameters old style, from the main section
8711       for name in constants.BES_PARAMETERS:
8712         if (name not in self.op.beparams and
8713             einfo.has_option(constants.INISECT_INS, name)):
8714           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8715
8716     if einfo.has_section(constants.INISECT_OSP):
8717       # use the parameters, without overriding
8718       for name, value in einfo.items(constants.INISECT_OSP):
8719         if name not in self.op.osparams:
8720           self.op.osparams[name] = value
8721
8722   def _RevertToDefaults(self, cluster):
8723     """Revert the instance parameters to the default values.
8724
8725     """
8726     # hvparams
8727     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8728     for name in self.op.hvparams.keys():
8729       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8730         del self.op.hvparams[name]
8731     # beparams
8732     be_defs = cluster.SimpleFillBE({})
8733     for name in self.op.beparams.keys():
8734       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8735         del self.op.beparams[name]
8736     # nic params
8737     nic_defs = cluster.SimpleFillNIC({})
8738     for nic in self.op.nics:
8739       for name in constants.NICS_PARAMETERS:
8740         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8741           del nic[name]
8742     # osparams
8743     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8744     for name in self.op.osparams.keys():
8745       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8746         del self.op.osparams[name]
8747
8748   def _CalculateFileStorageDir(self):
8749     """Calculate final instance file storage dir.
8750
8751     """
8752     # file storage dir calculation/check
8753     self.instance_file_storage_dir = None
8754     if self.op.disk_template in constants.DTS_FILEBASED:
8755       # build the full file storage dir path
8756       joinargs = []
8757
8758       if self.op.disk_template == constants.DT_SHARED_FILE:
8759         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8760       else:
8761         get_fsd_fn = self.cfg.GetFileStorageDir
8762
8763       cfg_storagedir = get_fsd_fn()
8764       if not cfg_storagedir:
8765         raise errors.OpPrereqError("Cluster file storage dir not defined")
8766       joinargs.append(cfg_storagedir)
8767
8768       if self.op.file_storage_dir is not None:
8769         joinargs.append(self.op.file_storage_dir)
8770
8771       joinargs.append(self.op.instance_name)
8772
8773       # pylint: disable=W0142
8774       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8775
8776   def CheckPrereq(self):
8777     """Check prerequisites.
8778
8779     """
8780     self._CalculateFileStorageDir()
8781
8782     if self.op.mode == constants.INSTANCE_IMPORT:
8783       export_info = self._ReadExportInfo()
8784       self._ReadExportParams(export_info)
8785
8786     if (not self.cfg.GetVGName() and
8787         self.op.disk_template not in constants.DTS_NOT_LVM):
8788       raise errors.OpPrereqError("Cluster does not support lvm-based"
8789                                  " instances", errors.ECODE_STATE)
8790
8791     if (self.op.hypervisor is None or
8792         self.op.hypervisor == constants.VALUE_AUTO):
8793       self.op.hypervisor = self.cfg.GetHypervisorType()
8794
8795     cluster = self.cfg.GetClusterInfo()
8796     enabled_hvs = cluster.enabled_hypervisors
8797     if self.op.hypervisor not in enabled_hvs:
8798       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8799                                  " cluster (%s)" % (self.op.hypervisor,
8800                                   ",".join(enabled_hvs)),
8801                                  errors.ECODE_STATE)
8802
8803     # Check tag validity
8804     for tag in self.op.tags:
8805       objects.TaggableObject.ValidateTag(tag)
8806
8807     # check hypervisor parameter syntax (locally)
8808     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8809     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8810                                       self.op.hvparams)
8811     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8812     hv_type.CheckParameterSyntax(filled_hvp)
8813     self.hv_full = filled_hvp
8814     # check that we don't specify global parameters on an instance
8815     _CheckGlobalHvParams(self.op.hvparams)
8816
8817     # fill and remember the beparams dict
8818     default_beparams = cluster.beparams[constants.PP_DEFAULT]
8819     for param, value in self.op.beparams.iteritems():
8820       if value == constants.VALUE_AUTO:
8821         self.op.beparams[param] = default_beparams[param]
8822     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8823     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8824
8825     # build os parameters
8826     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8827
8828     # now that hvp/bep are in final format, let's reset to defaults,
8829     # if told to do so
8830     if self.op.identify_defaults:
8831       self._RevertToDefaults(cluster)
8832
8833     # NIC buildup
8834     self.nics = []
8835     for idx, nic in enumerate(self.op.nics):
8836       nic_mode_req = nic.get(constants.INIC_MODE, None)
8837       nic_mode = nic_mode_req
8838       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8839         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8840
8841       # in routed mode, for the first nic, the default ip is 'auto'
8842       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8843         default_ip_mode = constants.VALUE_AUTO
8844       else:
8845         default_ip_mode = constants.VALUE_NONE
8846
8847       # ip validity checks
8848       ip = nic.get(constants.INIC_IP, default_ip_mode)
8849       if ip is None or ip.lower() == constants.VALUE_NONE:
8850         nic_ip = None
8851       elif ip.lower() == constants.VALUE_AUTO:
8852         if not self.op.name_check:
8853           raise errors.OpPrereqError("IP address set to auto but name checks"
8854                                      " have been skipped",
8855                                      errors.ECODE_INVAL)
8856         nic_ip = self.hostname1.ip
8857       else:
8858         if not netutils.IPAddress.IsValid(ip):
8859           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8860                                      errors.ECODE_INVAL)
8861         nic_ip = ip
8862
8863       # TODO: check the ip address for uniqueness
8864       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8865         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8866                                    errors.ECODE_INVAL)
8867
8868       # MAC address verification
8869       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8870       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8871         mac = utils.NormalizeAndValidateMac(mac)
8872
8873         try:
8874           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8875         except errors.ReservationError:
8876           raise errors.OpPrereqError("MAC address %s already in use"
8877                                      " in cluster" % mac,
8878                                      errors.ECODE_NOTUNIQUE)
8879
8880       #  Build nic parameters
8881       link = nic.get(constants.INIC_LINK, None)
8882       if link == constants.VALUE_AUTO:
8883         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8884       nicparams = {}
8885       if nic_mode_req:
8886         nicparams[constants.NIC_MODE] = nic_mode
8887       if link:
8888         nicparams[constants.NIC_LINK] = link
8889
8890       check_params = cluster.SimpleFillNIC(nicparams)
8891       objects.NIC.CheckParameterSyntax(check_params)
8892       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8893
8894     # disk checks/pre-build
8895     default_vg = self.cfg.GetVGName()
8896     self.disks = []
8897     for disk in self.op.disks:
8898       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8899       if mode not in constants.DISK_ACCESS_SET:
8900         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8901                                    mode, errors.ECODE_INVAL)
8902       size = disk.get(constants.IDISK_SIZE, None)
8903       if size is None:
8904         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8905       try:
8906         size = int(size)
8907       except (TypeError, ValueError):
8908         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8909                                    errors.ECODE_INVAL)
8910
8911       data_vg = disk.get(constants.IDISK_VG, default_vg)
8912       new_disk = {
8913         constants.IDISK_SIZE: size,
8914         constants.IDISK_MODE: mode,
8915         constants.IDISK_VG: data_vg,
8916         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8917         }
8918       if constants.IDISK_ADOPT in disk:
8919         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8920       self.disks.append(new_disk)
8921
8922     if self.op.mode == constants.INSTANCE_IMPORT:
8923       disk_images = []
8924       for idx in range(len(self.disks)):
8925         option = "disk%d_dump" % idx
8926         if export_info.has_option(constants.INISECT_INS, option):
8927           # FIXME: are the old os-es, disk sizes, etc. useful?
8928           export_name = export_info.get(constants.INISECT_INS, option)
8929           image = utils.PathJoin(self.op.src_path, export_name)
8930           disk_images.append(image)
8931         else:
8932           disk_images.append(False)
8933
8934       self.src_images = disk_images
8935
8936       old_name = export_info.get(constants.INISECT_INS, "name")
8937       if self.op.instance_name == old_name:
8938         for idx, nic in enumerate(self.nics):
8939           if nic.mac == constants.VALUE_AUTO:
8940             nic_mac_ini = "nic%d_mac" % idx
8941             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8942
8943     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8944
8945     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8946     if self.op.ip_check:
8947       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8948         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8949                                    (self.check_ip, self.op.instance_name),
8950                                    errors.ECODE_NOTUNIQUE)
8951
8952     #### mac address generation
8953     # By generating here the mac address both the allocator and the hooks get
8954     # the real final mac address rather than the 'auto' or 'generate' value.
8955     # There is a race condition between the generation and the instance object
8956     # creation, which means that we know the mac is valid now, but we're not
8957     # sure it will be when we actually add the instance. If things go bad
8958     # adding the instance will abort because of a duplicate mac, and the
8959     # creation job will fail.
8960     for nic in self.nics:
8961       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8962         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8963
8964     #### allocator run
8965
8966     if self.op.iallocator is not None:
8967       self._RunAllocator()
8968
8969     #### node related checks
8970
8971     # check primary node
8972     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8973     assert self.pnode is not None, \
8974       "Cannot retrieve locked node %s" % self.op.pnode
8975     if pnode.offline:
8976       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8977                                  pnode.name, errors.ECODE_STATE)
8978     if pnode.drained:
8979       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8980                                  pnode.name, errors.ECODE_STATE)
8981     if not pnode.vm_capable:
8982       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8983                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8984
8985     self.secondaries = []
8986
8987     # mirror node verification
8988     if self.op.disk_template in constants.DTS_INT_MIRROR:
8989       if self.op.snode == pnode.name:
8990         raise errors.OpPrereqError("The secondary node cannot be the"
8991                                    " primary node", errors.ECODE_INVAL)
8992       _CheckNodeOnline(self, self.op.snode)
8993       _CheckNodeNotDrained(self, self.op.snode)
8994       _CheckNodeVmCapable(self, self.op.snode)
8995       self.secondaries.append(self.op.snode)
8996
8997     nodenames = [pnode.name] + self.secondaries
8998
8999     if not self.adopt_disks:
9000       # Check lv size requirements, if not adopting
9001       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9002       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9003
9004     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9005       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9006                                 disk[constants.IDISK_ADOPT])
9007                      for disk in self.disks])
9008       if len(all_lvs) != len(self.disks):
9009         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9010                                    errors.ECODE_INVAL)
9011       for lv_name in all_lvs:
9012         try:
9013           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9014           # to ReserveLV uses the same syntax
9015           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9016         except errors.ReservationError:
9017           raise errors.OpPrereqError("LV named %s used by another instance" %
9018                                      lv_name, errors.ECODE_NOTUNIQUE)
9019
9020       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9021       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9022
9023       node_lvs = self.rpc.call_lv_list([pnode.name],
9024                                        vg_names.payload.keys())[pnode.name]
9025       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9026       node_lvs = node_lvs.payload
9027
9028       delta = all_lvs.difference(node_lvs.keys())
9029       if delta:
9030         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9031                                    utils.CommaJoin(delta),
9032                                    errors.ECODE_INVAL)
9033       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9034       if online_lvs:
9035         raise errors.OpPrereqError("Online logical volumes found, cannot"
9036                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9037                                    errors.ECODE_STATE)
9038       # update the size of disk based on what is found
9039       for dsk in self.disks:
9040         dsk[constants.IDISK_SIZE] = \
9041           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9042                                         dsk[constants.IDISK_ADOPT])][0]))
9043
9044     elif self.op.disk_template == constants.DT_BLOCK:
9045       # Normalize and de-duplicate device paths
9046       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9047                        for disk in self.disks])
9048       if len(all_disks) != len(self.disks):
9049         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9050                                    errors.ECODE_INVAL)
9051       baddisks = [d for d in all_disks
9052                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9053       if baddisks:
9054         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9055                                    " cannot be adopted" %
9056                                    (", ".join(baddisks),
9057                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9058                                    errors.ECODE_INVAL)
9059
9060       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9061                                             list(all_disks))[pnode.name]
9062       node_disks.Raise("Cannot get block device information from node %s" %
9063                        pnode.name)
9064       node_disks = node_disks.payload
9065       delta = all_disks.difference(node_disks.keys())
9066       if delta:
9067         raise errors.OpPrereqError("Missing block device(s): %s" %
9068                                    utils.CommaJoin(delta),
9069                                    errors.ECODE_INVAL)
9070       for dsk in self.disks:
9071         dsk[constants.IDISK_SIZE] = \
9072           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9073
9074     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9075
9076     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9077     # check OS parameters (remotely)
9078     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9079
9080     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9081
9082     # memory check on primary node
9083     if self.op.start:
9084       _CheckNodeFreeMemory(self, self.pnode.name,
9085                            "creating instance %s" % self.op.instance_name,
9086                            self.be_full[constants.BE_MEMORY],
9087                            self.op.hypervisor)
9088
9089     self.dry_run_result = list(nodenames)
9090
9091   def Exec(self, feedback_fn):
9092     """Create and add the instance to the cluster.
9093
9094     """
9095     instance = self.op.instance_name
9096     pnode_name = self.pnode.name
9097
9098     ht_kind = self.op.hypervisor
9099     if ht_kind in constants.HTS_REQ_PORT:
9100       network_port = self.cfg.AllocatePort()
9101     else:
9102       network_port = None
9103
9104     disks = _GenerateDiskTemplate(self,
9105                                   self.op.disk_template,
9106                                   instance, pnode_name,
9107                                   self.secondaries,
9108                                   self.disks,
9109                                   self.instance_file_storage_dir,
9110                                   self.op.file_driver,
9111                                   0,
9112                                   feedback_fn)
9113
9114     iobj = objects.Instance(name=instance, os=self.op.os_type,
9115                             primary_node=pnode_name,
9116                             nics=self.nics, disks=disks,
9117                             disk_template=self.op.disk_template,
9118                             admin_up=False,
9119                             network_port=network_port,
9120                             beparams=self.op.beparams,
9121                             hvparams=self.op.hvparams,
9122                             hypervisor=self.op.hypervisor,
9123                             osparams=self.op.osparams,
9124                             )
9125
9126     if self.op.tags:
9127       for tag in self.op.tags:
9128         iobj.AddTag(tag)
9129
9130     if self.adopt_disks:
9131       if self.op.disk_template == constants.DT_PLAIN:
9132         # rename LVs to the newly-generated names; we need to construct
9133         # 'fake' LV disks with the old data, plus the new unique_id
9134         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9135         rename_to = []
9136         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9137           rename_to.append(t_dsk.logical_id)
9138           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9139           self.cfg.SetDiskID(t_dsk, pnode_name)
9140         result = self.rpc.call_blockdev_rename(pnode_name,
9141                                                zip(tmp_disks, rename_to))
9142         result.Raise("Failed to rename adoped LVs")
9143     else:
9144       feedback_fn("* creating instance disks...")
9145       try:
9146         _CreateDisks(self, iobj)
9147       except errors.OpExecError:
9148         self.LogWarning("Device creation failed, reverting...")
9149         try:
9150           _RemoveDisks(self, iobj)
9151         finally:
9152           self.cfg.ReleaseDRBDMinors(instance)
9153           raise
9154
9155     feedback_fn("adding instance %s to cluster config" % instance)
9156
9157     self.cfg.AddInstance(iobj, self.proc.GetECId())
9158
9159     # Declare that we don't want to remove the instance lock anymore, as we've
9160     # added the instance to the config
9161     del self.remove_locks[locking.LEVEL_INSTANCE]
9162
9163     if self.op.mode == constants.INSTANCE_IMPORT:
9164       # Release unused nodes
9165       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9166     else:
9167       # Release all nodes
9168       _ReleaseLocks(self, locking.LEVEL_NODE)
9169
9170     disk_abort = False
9171     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9172       feedback_fn("* wiping instance disks...")
9173       try:
9174         _WipeDisks(self, iobj)
9175       except errors.OpExecError, err:
9176         logging.exception("Wiping disks failed")
9177         self.LogWarning("Wiping instance disks failed (%s)", err)
9178         disk_abort = True
9179
9180     if disk_abort:
9181       # Something is already wrong with the disks, don't do anything else
9182       pass
9183     elif self.op.wait_for_sync:
9184       disk_abort = not _WaitForSync(self, iobj)
9185     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9186       # make sure the disks are not degraded (still sync-ing is ok)
9187       feedback_fn("* checking mirrors status")
9188       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9189     else:
9190       disk_abort = False
9191
9192     if disk_abort:
9193       _RemoveDisks(self, iobj)
9194       self.cfg.RemoveInstance(iobj.name)
9195       # Make sure the instance lock gets removed
9196       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9197       raise errors.OpExecError("There are some degraded disks for"
9198                                " this instance")
9199
9200     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9201       if self.op.mode == constants.INSTANCE_CREATE:
9202         if not self.op.no_install:
9203           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9204                         not self.op.wait_for_sync)
9205           if pause_sync:
9206             feedback_fn("* pausing disk sync to install instance OS")
9207             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9208                                                               iobj.disks, True)
9209             for idx, success in enumerate(result.payload):
9210               if not success:
9211                 logging.warn("pause-sync of instance %s for disk %d failed",
9212                              instance, idx)
9213
9214           feedback_fn("* running the instance OS create scripts...")
9215           # FIXME: pass debug option from opcode to backend
9216           os_add_result = \
9217             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9218                                           self.op.debug_level)
9219           if pause_sync:
9220             feedback_fn("* resuming disk sync")
9221             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9222                                                               iobj.disks, False)
9223             for idx, success in enumerate(result.payload):
9224               if not success:
9225                 logging.warn("resume-sync of instance %s for disk %d failed",
9226                              instance, idx)
9227
9228           os_add_result.Raise("Could not add os for instance %s"
9229                               " on node %s" % (instance, pnode_name))
9230
9231       elif self.op.mode == constants.INSTANCE_IMPORT:
9232         feedback_fn("* running the instance OS import scripts...")
9233
9234         transfers = []
9235
9236         for idx, image in enumerate(self.src_images):
9237           if not image:
9238             continue
9239
9240           # FIXME: pass debug option from opcode to backend
9241           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9242                                              constants.IEIO_FILE, (image, ),
9243                                              constants.IEIO_SCRIPT,
9244                                              (iobj.disks[idx], idx),
9245                                              None)
9246           transfers.append(dt)
9247
9248         import_result = \
9249           masterd.instance.TransferInstanceData(self, feedback_fn,
9250                                                 self.op.src_node, pnode_name,
9251                                                 self.pnode.secondary_ip,
9252                                                 iobj, transfers)
9253         if not compat.all(import_result):
9254           self.LogWarning("Some disks for instance %s on node %s were not"
9255                           " imported successfully" % (instance, pnode_name))
9256
9257       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9258         feedback_fn("* preparing remote import...")
9259         # The source cluster will stop the instance before attempting to make a
9260         # connection. In some cases stopping an instance can take a long time,
9261         # hence the shutdown timeout is added to the connection timeout.
9262         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9263                            self.op.source_shutdown_timeout)
9264         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9265
9266         assert iobj.primary_node == self.pnode.name
9267         disk_results = \
9268           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9269                                         self.source_x509_ca,
9270                                         self._cds, timeouts)
9271         if not compat.all(disk_results):
9272           # TODO: Should the instance still be started, even if some disks
9273           # failed to import (valid for local imports, too)?
9274           self.LogWarning("Some disks for instance %s on node %s were not"
9275                           " imported successfully" % (instance, pnode_name))
9276
9277         # Run rename script on newly imported instance
9278         assert iobj.name == instance
9279         feedback_fn("Running rename script for %s" % instance)
9280         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9281                                                    self.source_instance_name,
9282                                                    self.op.debug_level)
9283         if result.fail_msg:
9284           self.LogWarning("Failed to run rename script for %s on node"
9285                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9286
9287       else:
9288         # also checked in the prereq part
9289         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9290                                      % self.op.mode)
9291
9292     if self.op.start:
9293       iobj.admin_up = True
9294       self.cfg.Update(iobj, feedback_fn)
9295       logging.info("Starting instance %s on node %s", instance, pnode_name)
9296       feedback_fn("* starting instance...")
9297       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9298                                             False)
9299       result.Raise("Could not start instance")
9300
9301     return list(iobj.all_nodes)
9302
9303
9304 class LUInstanceConsole(NoHooksLU):
9305   """Connect to an instance's console.
9306
9307   This is somewhat special in that it returns the command line that
9308   you need to run on the master node in order to connect to the
9309   console.
9310
9311   """
9312   REQ_BGL = False
9313
9314   def ExpandNames(self):
9315     self._ExpandAndLockInstance()
9316
9317   def CheckPrereq(self):
9318     """Check prerequisites.
9319
9320     This checks that the instance is in the cluster.
9321
9322     """
9323     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9324     assert self.instance is not None, \
9325       "Cannot retrieve locked instance %s" % self.op.instance_name
9326     _CheckNodeOnline(self, self.instance.primary_node)
9327
9328   def Exec(self, feedback_fn):
9329     """Connect to the console of an instance
9330
9331     """
9332     instance = self.instance
9333     node = instance.primary_node
9334
9335     node_insts = self.rpc.call_instance_list([node],
9336                                              [instance.hypervisor])[node]
9337     node_insts.Raise("Can't get node information from %s" % node)
9338
9339     if instance.name not in node_insts.payload:
9340       if instance.admin_up:
9341         state = constants.INSTST_ERRORDOWN
9342       else:
9343         state = constants.INSTST_ADMINDOWN
9344       raise errors.OpExecError("Instance %s is not running (state %s)" %
9345                                (instance.name, state))
9346
9347     logging.debug("Connecting to console of %s on %s", instance.name, node)
9348
9349     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9350
9351
9352 def _GetInstanceConsole(cluster, instance):
9353   """Returns console information for an instance.
9354
9355   @type cluster: L{objects.Cluster}
9356   @type instance: L{objects.Instance}
9357   @rtype: dict
9358
9359   """
9360   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9361   # beparams and hvparams are passed separately, to avoid editing the
9362   # instance and then saving the defaults in the instance itself.
9363   hvparams = cluster.FillHV(instance)
9364   beparams = cluster.FillBE(instance)
9365   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9366
9367   assert console.instance == instance.name
9368   assert console.Validate()
9369
9370   return console.ToDict()
9371
9372
9373 class LUInstanceReplaceDisks(LogicalUnit):
9374   """Replace the disks of an instance.
9375
9376   """
9377   HPATH = "mirrors-replace"
9378   HTYPE = constants.HTYPE_INSTANCE
9379   REQ_BGL = False
9380
9381   def CheckArguments(self):
9382     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9383                                   self.op.iallocator)
9384
9385   def ExpandNames(self):
9386     self._ExpandAndLockInstance()
9387
9388     assert locking.LEVEL_NODE not in self.needed_locks
9389     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9390
9391     assert self.op.iallocator is None or self.op.remote_node is None, \
9392       "Conflicting options"
9393
9394     if self.op.remote_node is not None:
9395       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9396
9397       # Warning: do not remove the locking of the new secondary here
9398       # unless DRBD8.AddChildren is changed to work in parallel;
9399       # currently it doesn't since parallel invocations of
9400       # FindUnusedMinor will conflict
9401       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9402       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9403     else:
9404       self.needed_locks[locking.LEVEL_NODE] = []
9405       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9406
9407       if self.op.iallocator is not None:
9408         # iallocator will select a new node in the same group
9409         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9410
9411     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9412                                    self.op.iallocator, self.op.remote_node,
9413                                    self.op.disks, False, self.op.early_release)
9414
9415     self.tasklets = [self.replacer]
9416
9417   def DeclareLocks(self, level):
9418     if level == locking.LEVEL_NODEGROUP:
9419       assert self.op.remote_node is None
9420       assert self.op.iallocator is not None
9421       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9422
9423       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9424       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9425         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9426
9427     elif level == locking.LEVEL_NODE:
9428       if self.op.iallocator is not None:
9429         assert self.op.remote_node is None
9430         assert not self.needed_locks[locking.LEVEL_NODE]
9431
9432         # Lock member nodes of all locked groups
9433         self.needed_locks[locking.LEVEL_NODE] = [node_name
9434           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9435           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9436       else:
9437         self._LockInstancesNodes()
9438
9439   def BuildHooksEnv(self):
9440     """Build hooks env.
9441
9442     This runs on the master, the primary and all the secondaries.
9443
9444     """
9445     instance = self.replacer.instance
9446     env = {
9447       "MODE": self.op.mode,
9448       "NEW_SECONDARY": self.op.remote_node,
9449       "OLD_SECONDARY": instance.secondary_nodes[0],
9450       }
9451     env.update(_BuildInstanceHookEnvByObject(self, instance))
9452     return env
9453
9454   def BuildHooksNodes(self):
9455     """Build hooks nodes.
9456
9457     """
9458     instance = self.replacer.instance
9459     nl = [
9460       self.cfg.GetMasterNode(),
9461       instance.primary_node,
9462       ]
9463     if self.op.remote_node is not None:
9464       nl.append(self.op.remote_node)
9465     return nl, nl
9466
9467   def CheckPrereq(self):
9468     """Check prerequisites.
9469
9470     """
9471     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9472             self.op.iallocator is None)
9473
9474     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9475     if owned_groups:
9476       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9477
9478     return LogicalUnit.CheckPrereq(self)
9479
9480
9481 class TLReplaceDisks(Tasklet):
9482   """Replaces disks for an instance.
9483
9484   Note: Locking is not within the scope of this class.
9485
9486   """
9487   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9488                disks, delay_iallocator, early_release):
9489     """Initializes this class.
9490
9491     """
9492     Tasklet.__init__(self, lu)
9493
9494     # Parameters
9495     self.instance_name = instance_name
9496     self.mode = mode
9497     self.iallocator_name = iallocator_name
9498     self.remote_node = remote_node
9499     self.disks = disks
9500     self.delay_iallocator = delay_iallocator
9501     self.early_release = early_release
9502
9503     # Runtime data
9504     self.instance = None
9505     self.new_node = None
9506     self.target_node = None
9507     self.other_node = None
9508     self.remote_node_info = None
9509     self.node_secondary_ip = None
9510
9511   @staticmethod
9512   def CheckArguments(mode, remote_node, iallocator):
9513     """Helper function for users of this class.
9514
9515     """
9516     # check for valid parameter combination
9517     if mode == constants.REPLACE_DISK_CHG:
9518       if remote_node is None and iallocator is None:
9519         raise errors.OpPrereqError("When changing the secondary either an"
9520                                    " iallocator script must be used or the"
9521                                    " new node given", errors.ECODE_INVAL)
9522
9523       if remote_node is not None and iallocator is not None:
9524         raise errors.OpPrereqError("Give either the iallocator or the new"
9525                                    " secondary, not both", errors.ECODE_INVAL)
9526
9527     elif remote_node is not None or iallocator is not None:
9528       # Not replacing the secondary
9529       raise errors.OpPrereqError("The iallocator and new node options can"
9530                                  " only be used when changing the"
9531                                  " secondary node", errors.ECODE_INVAL)
9532
9533   @staticmethod
9534   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9535     """Compute a new secondary node using an IAllocator.
9536
9537     """
9538     ial = IAllocator(lu.cfg, lu.rpc,
9539                      mode=constants.IALLOCATOR_MODE_RELOC,
9540                      name=instance_name,
9541                      relocate_from=list(relocate_from))
9542
9543     ial.Run(iallocator_name)
9544
9545     if not ial.success:
9546       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9547                                  " %s" % (iallocator_name, ial.info),
9548                                  errors.ECODE_NORES)
9549
9550     if len(ial.result) != ial.required_nodes:
9551       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9552                                  " of nodes (%s), required %s" %
9553                                  (iallocator_name,
9554                                   len(ial.result), ial.required_nodes),
9555                                  errors.ECODE_FAULT)
9556
9557     remote_node_name = ial.result[0]
9558
9559     lu.LogInfo("Selected new secondary for instance '%s': %s",
9560                instance_name, remote_node_name)
9561
9562     return remote_node_name
9563
9564   def _FindFaultyDisks(self, node_name):
9565     """Wrapper for L{_FindFaultyInstanceDisks}.
9566
9567     """
9568     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9569                                     node_name, True)
9570
9571   def _CheckDisksActivated(self, instance):
9572     """Checks if the instance disks are activated.
9573
9574     @param instance: The instance to check disks
9575     @return: True if they are activated, False otherwise
9576
9577     """
9578     nodes = instance.all_nodes
9579
9580     for idx, dev in enumerate(instance.disks):
9581       for node in nodes:
9582         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9583         self.cfg.SetDiskID(dev, node)
9584
9585         result = self.rpc.call_blockdev_find(node, dev)
9586
9587         if result.offline:
9588           continue
9589         elif result.fail_msg or not result.payload:
9590           return False
9591
9592     return True
9593
9594   def CheckPrereq(self):
9595     """Check prerequisites.
9596
9597     This checks that the instance is in the cluster.
9598
9599     """
9600     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9601     assert instance is not None, \
9602       "Cannot retrieve locked instance %s" % self.instance_name
9603
9604     if instance.disk_template != constants.DT_DRBD8:
9605       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9606                                  " instances", errors.ECODE_INVAL)
9607
9608     if len(instance.secondary_nodes) != 1:
9609       raise errors.OpPrereqError("The instance has a strange layout,"
9610                                  " expected one secondary but found %d" %
9611                                  len(instance.secondary_nodes),
9612                                  errors.ECODE_FAULT)
9613
9614     if not self.delay_iallocator:
9615       self._CheckPrereq2()
9616
9617   def _CheckPrereq2(self):
9618     """Check prerequisites, second part.
9619
9620     This function should always be part of CheckPrereq. It was separated and is
9621     now called from Exec because during node evacuation iallocator was only
9622     called with an unmodified cluster model, not taking planned changes into
9623     account.
9624
9625     """
9626     instance = self.instance
9627     secondary_node = instance.secondary_nodes[0]
9628
9629     if self.iallocator_name is None:
9630       remote_node = self.remote_node
9631     else:
9632       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9633                                        instance.name, instance.secondary_nodes)
9634
9635     if remote_node is None:
9636       self.remote_node_info = None
9637     else:
9638       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9639              "Remote node '%s' is not locked" % remote_node
9640
9641       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9642       assert self.remote_node_info is not None, \
9643         "Cannot retrieve locked node %s" % remote_node
9644
9645     if remote_node == self.instance.primary_node:
9646       raise errors.OpPrereqError("The specified node is the primary node of"
9647                                  " the instance", errors.ECODE_INVAL)
9648
9649     if remote_node == secondary_node:
9650       raise errors.OpPrereqError("The specified node is already the"
9651                                  " secondary node of the instance",
9652                                  errors.ECODE_INVAL)
9653
9654     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9655                                     constants.REPLACE_DISK_CHG):
9656       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9657                                  errors.ECODE_INVAL)
9658
9659     if self.mode == constants.REPLACE_DISK_AUTO:
9660       if not self._CheckDisksActivated(instance):
9661         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9662                                    " first" % self.instance_name,
9663                                    errors.ECODE_STATE)
9664       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9665       faulty_secondary = self._FindFaultyDisks(secondary_node)
9666
9667       if faulty_primary and faulty_secondary:
9668         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9669                                    " one node and can not be repaired"
9670                                    " automatically" % self.instance_name,
9671                                    errors.ECODE_STATE)
9672
9673       if faulty_primary:
9674         self.disks = faulty_primary
9675         self.target_node = instance.primary_node
9676         self.other_node = secondary_node
9677         check_nodes = [self.target_node, self.other_node]
9678       elif faulty_secondary:
9679         self.disks = faulty_secondary
9680         self.target_node = secondary_node
9681         self.other_node = instance.primary_node
9682         check_nodes = [self.target_node, self.other_node]
9683       else:
9684         self.disks = []
9685         check_nodes = []
9686
9687     else:
9688       # Non-automatic modes
9689       if self.mode == constants.REPLACE_DISK_PRI:
9690         self.target_node = instance.primary_node
9691         self.other_node = secondary_node
9692         check_nodes = [self.target_node, self.other_node]
9693
9694       elif self.mode == constants.REPLACE_DISK_SEC:
9695         self.target_node = secondary_node
9696         self.other_node = instance.primary_node
9697         check_nodes = [self.target_node, self.other_node]
9698
9699       elif self.mode == constants.REPLACE_DISK_CHG:
9700         self.new_node = remote_node
9701         self.other_node = instance.primary_node
9702         self.target_node = secondary_node
9703         check_nodes = [self.new_node, self.other_node]
9704
9705         _CheckNodeNotDrained(self.lu, remote_node)
9706         _CheckNodeVmCapable(self.lu, remote_node)
9707
9708         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9709         assert old_node_info is not None
9710         if old_node_info.offline and not self.early_release:
9711           # doesn't make sense to delay the release
9712           self.early_release = True
9713           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9714                           " early-release mode", secondary_node)
9715
9716       else:
9717         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9718                                      self.mode)
9719
9720       # If not specified all disks should be replaced
9721       if not self.disks:
9722         self.disks = range(len(self.instance.disks))
9723
9724     for node in check_nodes:
9725       _CheckNodeOnline(self.lu, node)
9726
9727     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9728                                                           self.other_node,
9729                                                           self.target_node]
9730                               if node_name is not None)
9731
9732     # Release unneeded node locks
9733     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9734
9735     # Release any owned node group
9736     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9737       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9738
9739     # Check whether disks are valid
9740     for disk_idx in self.disks:
9741       instance.FindDisk(disk_idx)
9742
9743     # Get secondary node IP addresses
9744     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9745                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9746
9747   def Exec(self, feedback_fn):
9748     """Execute disk replacement.
9749
9750     This dispatches the disk replacement to the appropriate handler.
9751
9752     """
9753     if self.delay_iallocator:
9754       self._CheckPrereq2()
9755
9756     if __debug__:
9757       # Verify owned locks before starting operation
9758       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9759       assert set(owned_nodes) == set(self.node_secondary_ip), \
9760           ("Incorrect node locks, owning %s, expected %s" %
9761            (owned_nodes, self.node_secondary_ip.keys()))
9762
9763       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9764       assert list(owned_instances) == [self.instance_name], \
9765           "Instance '%s' not locked" % self.instance_name
9766
9767       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9768           "Should not own any node group lock at this point"
9769
9770     if not self.disks:
9771       feedback_fn("No disks need replacement")
9772       return
9773
9774     feedback_fn("Replacing disk(s) %s for %s" %
9775                 (utils.CommaJoin(self.disks), self.instance.name))
9776
9777     activate_disks = (not self.instance.admin_up)
9778
9779     # Activate the instance disks if we're replacing them on a down instance
9780     if activate_disks:
9781       _StartInstanceDisks(self.lu, self.instance, True)
9782
9783     try:
9784       # Should we replace the secondary node?
9785       if self.new_node is not None:
9786         fn = self._ExecDrbd8Secondary
9787       else:
9788         fn = self._ExecDrbd8DiskOnly
9789
9790       result = fn(feedback_fn)
9791     finally:
9792       # Deactivate the instance disks if we're replacing them on a
9793       # down instance
9794       if activate_disks:
9795         _SafeShutdownInstanceDisks(self.lu, self.instance)
9796
9797     if __debug__:
9798       # Verify owned locks
9799       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9800       nodes = frozenset(self.node_secondary_ip)
9801       assert ((self.early_release and not owned_nodes) or
9802               (not self.early_release and not (set(owned_nodes) - nodes))), \
9803         ("Not owning the correct locks, early_release=%s, owned=%r,"
9804          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9805
9806     return result
9807
9808   def _CheckVolumeGroup(self, nodes):
9809     self.lu.LogInfo("Checking volume groups")
9810
9811     vgname = self.cfg.GetVGName()
9812
9813     # Make sure volume group exists on all involved nodes
9814     results = self.rpc.call_vg_list(nodes)
9815     if not results:
9816       raise errors.OpExecError("Can't list volume groups on the nodes")
9817
9818     for node in nodes:
9819       res = results[node]
9820       res.Raise("Error checking node %s" % node)
9821       if vgname not in res.payload:
9822         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9823                                  (vgname, node))
9824
9825   def _CheckDisksExistence(self, nodes):
9826     # Check disk existence
9827     for idx, dev in enumerate(self.instance.disks):
9828       if idx not in self.disks:
9829         continue
9830
9831       for node in nodes:
9832         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9833         self.cfg.SetDiskID(dev, node)
9834
9835         result = self.rpc.call_blockdev_find(node, dev)
9836
9837         msg = result.fail_msg
9838         if msg or not result.payload:
9839           if not msg:
9840             msg = "disk not found"
9841           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9842                                    (idx, node, msg))
9843
9844   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9845     for idx, dev in enumerate(self.instance.disks):
9846       if idx not in self.disks:
9847         continue
9848
9849       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9850                       (idx, node_name))
9851
9852       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9853                                    ldisk=ldisk):
9854         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9855                                  " replace disks for instance %s" %
9856                                  (node_name, self.instance.name))
9857
9858   def _CreateNewStorage(self, node_name):
9859     """Create new storage on the primary or secondary node.
9860
9861     This is only used for same-node replaces, not for changing the
9862     secondary node, hence we don't want to modify the existing disk.
9863
9864     """
9865     iv_names = {}
9866
9867     for idx, dev in enumerate(self.instance.disks):
9868       if idx not in self.disks:
9869         continue
9870
9871       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9872
9873       self.cfg.SetDiskID(dev, node_name)
9874
9875       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9876       names = _GenerateUniqueNames(self.lu, lv_names)
9877
9878       vg_data = dev.children[0].logical_id[0]
9879       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9880                              logical_id=(vg_data, names[0]))
9881       vg_meta = dev.children[1].logical_id[0]
9882       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9883                              logical_id=(vg_meta, names[1]))
9884
9885       new_lvs = [lv_data, lv_meta]
9886       old_lvs = [child.Copy() for child in dev.children]
9887       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9888
9889       # we pass force_create=True to force the LVM creation
9890       for new_lv in new_lvs:
9891         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9892                         _GetInstanceInfoText(self.instance), False)
9893
9894     return iv_names
9895
9896   def _CheckDevices(self, node_name, iv_names):
9897     for name, (dev, _, _) in iv_names.iteritems():
9898       self.cfg.SetDiskID(dev, node_name)
9899
9900       result = self.rpc.call_blockdev_find(node_name, dev)
9901
9902       msg = result.fail_msg
9903       if msg or not result.payload:
9904         if not msg:
9905           msg = "disk not found"
9906         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9907                                  (name, msg))
9908
9909       if result.payload.is_degraded:
9910         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9911
9912   def _RemoveOldStorage(self, node_name, iv_names):
9913     for name, (_, old_lvs, _) in iv_names.iteritems():
9914       self.lu.LogInfo("Remove logical volumes for %s" % name)
9915
9916       for lv in old_lvs:
9917         self.cfg.SetDiskID(lv, node_name)
9918
9919         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9920         if msg:
9921           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9922                              hint="remove unused LVs manually")
9923
9924   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9925     """Replace a disk on the primary or secondary for DRBD 8.
9926
9927     The algorithm for replace is quite complicated:
9928
9929       1. for each disk to be replaced:
9930
9931         1. create new LVs on the target node with unique names
9932         1. detach old LVs from the drbd device
9933         1. rename old LVs to name_replaced.<time_t>
9934         1. rename new LVs to old LVs
9935         1. attach the new LVs (with the old names now) to the drbd device
9936
9937       1. wait for sync across all devices
9938
9939       1. for each modified disk:
9940
9941         1. remove old LVs (which have the name name_replaces.<time_t>)
9942
9943     Failures are not very well handled.
9944
9945     """
9946     steps_total = 6
9947
9948     # Step: check device activation
9949     self.lu.LogStep(1, steps_total, "Check device existence")
9950     self._CheckDisksExistence([self.other_node, self.target_node])
9951     self._CheckVolumeGroup([self.target_node, self.other_node])
9952
9953     # Step: check other node consistency
9954     self.lu.LogStep(2, steps_total, "Check peer consistency")
9955     self._CheckDisksConsistency(self.other_node,
9956                                 self.other_node == self.instance.primary_node,
9957                                 False)
9958
9959     # Step: create new storage
9960     self.lu.LogStep(3, steps_total, "Allocate new storage")
9961     iv_names = self._CreateNewStorage(self.target_node)
9962
9963     # Step: for each lv, detach+rename*2+attach
9964     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9965     for dev, old_lvs, new_lvs in iv_names.itervalues():
9966       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9967
9968       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9969                                                      old_lvs)
9970       result.Raise("Can't detach drbd from local storage on node"
9971                    " %s for device %s" % (self.target_node, dev.iv_name))
9972       #dev.children = []
9973       #cfg.Update(instance)
9974
9975       # ok, we created the new LVs, so now we know we have the needed
9976       # storage; as such, we proceed on the target node to rename
9977       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9978       # using the assumption that logical_id == physical_id (which in
9979       # turn is the unique_id on that node)
9980
9981       # FIXME(iustin): use a better name for the replaced LVs
9982       temp_suffix = int(time.time())
9983       ren_fn = lambda d, suff: (d.physical_id[0],
9984                                 d.physical_id[1] + "_replaced-%s" % suff)
9985
9986       # Build the rename list based on what LVs exist on the node
9987       rename_old_to_new = []
9988       for to_ren in old_lvs:
9989         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9990         if not result.fail_msg and result.payload:
9991           # device exists
9992           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9993
9994       self.lu.LogInfo("Renaming the old LVs on the target node")
9995       result = self.rpc.call_blockdev_rename(self.target_node,
9996                                              rename_old_to_new)
9997       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9998
9999       # Now we rename the new LVs to the old LVs
10000       self.lu.LogInfo("Renaming the new LVs on the target node")
10001       rename_new_to_old = [(new, old.physical_id)
10002                            for old, new in zip(old_lvs, new_lvs)]
10003       result = self.rpc.call_blockdev_rename(self.target_node,
10004                                              rename_new_to_old)
10005       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10006
10007       # Intermediate steps of in memory modifications
10008       for old, new in zip(old_lvs, new_lvs):
10009         new.logical_id = old.logical_id
10010         self.cfg.SetDiskID(new, self.target_node)
10011
10012       # We need to modify old_lvs so that removal later removes the
10013       # right LVs, not the newly added ones; note that old_lvs is a
10014       # copy here
10015       for disk in old_lvs:
10016         disk.logical_id = ren_fn(disk, temp_suffix)
10017         self.cfg.SetDiskID(disk, self.target_node)
10018
10019       # Now that the new lvs have the old name, we can add them to the device
10020       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10021       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10022                                                   new_lvs)
10023       msg = result.fail_msg
10024       if msg:
10025         for new_lv in new_lvs:
10026           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10027                                                new_lv).fail_msg
10028           if msg2:
10029             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10030                                hint=("cleanup manually the unused logical"
10031                                      "volumes"))
10032         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10033
10034     cstep = 5
10035     if self.early_release:
10036       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10037       cstep += 1
10038       self._RemoveOldStorage(self.target_node, iv_names)
10039       # WARNING: we release both node locks here, do not do other RPCs
10040       # than WaitForSync to the primary node
10041       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10042                     names=[self.target_node, self.other_node])
10043
10044     # Wait for sync
10045     # This can fail as the old devices are degraded and _WaitForSync
10046     # does a combined result over all disks, so we don't check its return value
10047     self.lu.LogStep(cstep, steps_total, "Sync devices")
10048     cstep += 1
10049     _WaitForSync(self.lu, self.instance)
10050
10051     # Check all devices manually
10052     self._CheckDevices(self.instance.primary_node, iv_names)
10053
10054     # Step: remove old storage
10055     if not self.early_release:
10056       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10057       cstep += 1
10058       self._RemoveOldStorage(self.target_node, iv_names)
10059
10060   def _ExecDrbd8Secondary(self, feedback_fn):
10061     """Replace the secondary node for DRBD 8.
10062
10063     The algorithm for replace is quite complicated:
10064       - for all disks of the instance:
10065         - create new LVs on the new node with same names
10066         - shutdown the drbd device on the old secondary
10067         - disconnect the drbd network on the primary
10068         - create the drbd device on the new secondary
10069         - network attach the drbd on the primary, using an artifice:
10070           the drbd code for Attach() will connect to the network if it
10071           finds a device which is connected to the good local disks but
10072           not network enabled
10073       - wait for sync across all devices
10074       - remove all disks from the old secondary
10075
10076     Failures are not very well handled.
10077
10078     """
10079     steps_total = 6
10080
10081     pnode = self.instance.primary_node
10082
10083     # Step: check device activation
10084     self.lu.LogStep(1, steps_total, "Check device existence")
10085     self._CheckDisksExistence([self.instance.primary_node])
10086     self._CheckVolumeGroup([self.instance.primary_node])
10087
10088     # Step: check other node consistency
10089     self.lu.LogStep(2, steps_total, "Check peer consistency")
10090     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10091
10092     # Step: create new storage
10093     self.lu.LogStep(3, steps_total, "Allocate new storage")
10094     for idx, dev in enumerate(self.instance.disks):
10095       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10096                       (self.new_node, idx))
10097       # we pass force_create=True to force LVM creation
10098       for new_lv in dev.children:
10099         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10100                         _GetInstanceInfoText(self.instance), False)
10101
10102     # Step 4: dbrd minors and drbd setups changes
10103     # after this, we must manually remove the drbd minors on both the
10104     # error and the success paths
10105     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10106     minors = self.cfg.AllocateDRBDMinor([self.new_node
10107                                          for dev in self.instance.disks],
10108                                         self.instance.name)
10109     logging.debug("Allocated minors %r", minors)
10110
10111     iv_names = {}
10112     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10113       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10114                       (self.new_node, idx))
10115       # create new devices on new_node; note that we create two IDs:
10116       # one without port, so the drbd will be activated without
10117       # networking information on the new node at this stage, and one
10118       # with network, for the latter activation in step 4
10119       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10120       if self.instance.primary_node == o_node1:
10121         p_minor = o_minor1
10122       else:
10123         assert self.instance.primary_node == o_node2, "Three-node instance?"
10124         p_minor = o_minor2
10125
10126       new_alone_id = (self.instance.primary_node, self.new_node, None,
10127                       p_minor, new_minor, o_secret)
10128       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10129                     p_minor, new_minor, o_secret)
10130
10131       iv_names[idx] = (dev, dev.children, new_net_id)
10132       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10133                     new_net_id)
10134       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10135                               logical_id=new_alone_id,
10136                               children=dev.children,
10137                               size=dev.size)
10138       try:
10139         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10140                               _GetInstanceInfoText(self.instance), False)
10141       except errors.GenericError:
10142         self.cfg.ReleaseDRBDMinors(self.instance.name)
10143         raise
10144
10145     # We have new devices, shutdown the drbd on the old secondary
10146     for idx, dev in enumerate(self.instance.disks):
10147       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10148       self.cfg.SetDiskID(dev, self.target_node)
10149       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10150       if msg:
10151         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10152                            "node: %s" % (idx, msg),
10153                            hint=("Please cleanup this device manually as"
10154                                  " soon as possible"))
10155
10156     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10157     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10158                                                self.instance.disks)[pnode]
10159
10160     msg = result.fail_msg
10161     if msg:
10162       # detaches didn't succeed (unlikely)
10163       self.cfg.ReleaseDRBDMinors(self.instance.name)
10164       raise errors.OpExecError("Can't detach the disks from the network on"
10165                                " old node: %s" % (msg,))
10166
10167     # if we managed to detach at least one, we update all the disks of
10168     # the instance to point to the new secondary
10169     self.lu.LogInfo("Updating instance configuration")
10170     for dev, _, new_logical_id in iv_names.itervalues():
10171       dev.logical_id = new_logical_id
10172       self.cfg.SetDiskID(dev, self.instance.primary_node)
10173
10174     self.cfg.Update(self.instance, feedback_fn)
10175
10176     # and now perform the drbd attach
10177     self.lu.LogInfo("Attaching primary drbds to new secondary"
10178                     " (standalone => connected)")
10179     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10180                                             self.new_node],
10181                                            self.node_secondary_ip,
10182                                            self.instance.disks,
10183                                            self.instance.name,
10184                                            False)
10185     for to_node, to_result in result.items():
10186       msg = to_result.fail_msg
10187       if msg:
10188         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10189                            to_node, msg,
10190                            hint=("please do a gnt-instance info to see the"
10191                                  " status of disks"))
10192     cstep = 5
10193     if self.early_release:
10194       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10195       cstep += 1
10196       self._RemoveOldStorage(self.target_node, iv_names)
10197       # WARNING: we release all node locks here, do not do other RPCs
10198       # than WaitForSync to the primary node
10199       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10200                     names=[self.instance.primary_node,
10201                            self.target_node,
10202                            self.new_node])
10203
10204     # Wait for sync
10205     # This can fail as the old devices are degraded and _WaitForSync
10206     # does a combined result over all disks, so we don't check its return value
10207     self.lu.LogStep(cstep, steps_total, "Sync devices")
10208     cstep += 1
10209     _WaitForSync(self.lu, self.instance)
10210
10211     # Check all devices manually
10212     self._CheckDevices(self.instance.primary_node, iv_names)
10213
10214     # Step: remove old storage
10215     if not self.early_release:
10216       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10217       self._RemoveOldStorage(self.target_node, iv_names)
10218
10219
10220 class LURepairNodeStorage(NoHooksLU):
10221   """Repairs the volume group on a node.
10222
10223   """
10224   REQ_BGL = False
10225
10226   def CheckArguments(self):
10227     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10228
10229     storage_type = self.op.storage_type
10230
10231     if (constants.SO_FIX_CONSISTENCY not in
10232         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10233       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10234                                  " repaired" % storage_type,
10235                                  errors.ECODE_INVAL)
10236
10237   def ExpandNames(self):
10238     self.needed_locks = {
10239       locking.LEVEL_NODE: [self.op.node_name],
10240       }
10241
10242   def _CheckFaultyDisks(self, instance, node_name):
10243     """Ensure faulty disks abort the opcode or at least warn."""
10244     try:
10245       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10246                                   node_name, True):
10247         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10248                                    " node '%s'" % (instance.name, node_name),
10249                                    errors.ECODE_STATE)
10250     except errors.OpPrereqError, err:
10251       if self.op.ignore_consistency:
10252         self.proc.LogWarning(str(err.args[0]))
10253       else:
10254         raise
10255
10256   def CheckPrereq(self):
10257     """Check prerequisites.
10258
10259     """
10260     # Check whether any instance on this node has faulty disks
10261     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10262       if not inst.admin_up:
10263         continue
10264       check_nodes = set(inst.all_nodes)
10265       check_nodes.discard(self.op.node_name)
10266       for inst_node_name in check_nodes:
10267         self._CheckFaultyDisks(inst, inst_node_name)
10268
10269   def Exec(self, feedback_fn):
10270     feedback_fn("Repairing storage unit '%s' on %s ..." %
10271                 (self.op.name, self.op.node_name))
10272
10273     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10274     result = self.rpc.call_storage_execute(self.op.node_name,
10275                                            self.op.storage_type, st_args,
10276                                            self.op.name,
10277                                            constants.SO_FIX_CONSISTENCY)
10278     result.Raise("Failed to repair storage unit '%s' on %s" %
10279                  (self.op.name, self.op.node_name))
10280
10281
10282 class LUNodeEvacuate(NoHooksLU):
10283   """Evacuates instances off a list of nodes.
10284
10285   """
10286   REQ_BGL = False
10287
10288   def CheckArguments(self):
10289     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10290
10291   def ExpandNames(self):
10292     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10293
10294     if self.op.remote_node is not None:
10295       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10296       assert self.op.remote_node
10297
10298       if self.op.remote_node == self.op.node_name:
10299         raise errors.OpPrereqError("Can not use evacuated node as a new"
10300                                    " secondary node", errors.ECODE_INVAL)
10301
10302       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10303         raise errors.OpPrereqError("Without the use of an iallocator only"
10304                                    " secondary instances can be evacuated",
10305                                    errors.ECODE_INVAL)
10306
10307     # Declare locks
10308     self.share_locks = _ShareAll()
10309     self.needed_locks = {
10310       locking.LEVEL_INSTANCE: [],
10311       locking.LEVEL_NODEGROUP: [],
10312       locking.LEVEL_NODE: [],
10313       }
10314
10315     if self.op.remote_node is None:
10316       # Iallocator will choose any node(s) in the same group
10317       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10318     else:
10319       group_nodes = frozenset([self.op.remote_node])
10320
10321     # Determine nodes to be locked
10322     self.lock_nodes = set([self.op.node_name]) | group_nodes
10323
10324   def _DetermineInstances(self):
10325     """Builds list of instances to operate on.
10326
10327     """
10328     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10329
10330     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10331       # Primary instances only
10332       inst_fn = _GetNodePrimaryInstances
10333       assert self.op.remote_node is None, \
10334         "Evacuating primary instances requires iallocator"
10335     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10336       # Secondary instances only
10337       inst_fn = _GetNodeSecondaryInstances
10338     else:
10339       # All instances
10340       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10341       inst_fn = _GetNodeInstances
10342
10343     return inst_fn(self.cfg, self.op.node_name)
10344
10345   def DeclareLocks(self, level):
10346     if level == locking.LEVEL_INSTANCE:
10347       # Lock instances optimistically, needs verification once node and group
10348       # locks have been acquired
10349       self.needed_locks[locking.LEVEL_INSTANCE] = \
10350         set(i.name for i in self._DetermineInstances())
10351
10352     elif level == locking.LEVEL_NODEGROUP:
10353       # Lock node groups optimistically, needs verification once nodes have
10354       # been acquired
10355       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10356         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10357
10358     elif level == locking.LEVEL_NODE:
10359       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10360
10361   def CheckPrereq(self):
10362     # Verify locks
10363     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10364     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10365     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10366
10367     assert owned_nodes == self.lock_nodes
10368
10369     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10370     if owned_groups != wanted_groups:
10371       raise errors.OpExecError("Node groups changed since locks were acquired,"
10372                                " current groups are '%s', used to be '%s'" %
10373                                (utils.CommaJoin(wanted_groups),
10374                                 utils.CommaJoin(owned_groups)))
10375
10376     # Determine affected instances
10377     self.instances = self._DetermineInstances()
10378     self.instance_names = [i.name for i in self.instances]
10379
10380     if set(self.instance_names) != owned_instances:
10381       raise errors.OpExecError("Instances on node '%s' changed since locks"
10382                                " were acquired, current instances are '%s',"
10383                                " used to be '%s'" %
10384                                (self.op.node_name,
10385                                 utils.CommaJoin(self.instance_names),
10386                                 utils.CommaJoin(owned_instances)))
10387
10388     if self.instance_names:
10389       self.LogInfo("Evacuating instances from node '%s': %s",
10390                    self.op.node_name,
10391                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10392     else:
10393       self.LogInfo("No instances to evacuate from node '%s'",
10394                    self.op.node_name)
10395
10396     if self.op.remote_node is not None:
10397       for i in self.instances:
10398         if i.primary_node == self.op.remote_node:
10399           raise errors.OpPrereqError("Node %s is the primary node of"
10400                                      " instance %s, cannot use it as"
10401                                      " secondary" %
10402                                      (self.op.remote_node, i.name),
10403                                      errors.ECODE_INVAL)
10404
10405   def Exec(self, feedback_fn):
10406     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10407
10408     if not self.instance_names:
10409       # No instances to evacuate
10410       jobs = []
10411
10412     elif self.op.iallocator is not None:
10413       # TODO: Implement relocation to other group
10414       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10415                        evac_mode=self.op.mode,
10416                        instances=list(self.instance_names))
10417
10418       ial.Run(self.op.iallocator)
10419
10420       if not ial.success:
10421         raise errors.OpPrereqError("Can't compute node evacuation using"
10422                                    " iallocator '%s': %s" %
10423                                    (self.op.iallocator, ial.info),
10424                                    errors.ECODE_NORES)
10425
10426       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10427
10428     elif self.op.remote_node is not None:
10429       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10430       jobs = [
10431         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10432                                         remote_node=self.op.remote_node,
10433                                         disks=[],
10434                                         mode=constants.REPLACE_DISK_CHG,
10435                                         early_release=self.op.early_release)]
10436         for instance_name in self.instance_names
10437         ]
10438
10439     else:
10440       raise errors.ProgrammerError("No iallocator or remote node")
10441
10442     return ResultWithJobs(jobs)
10443
10444
10445 def _SetOpEarlyRelease(early_release, op):
10446   """Sets C{early_release} flag on opcodes if available.
10447
10448   """
10449   try:
10450     op.early_release = early_release
10451   except AttributeError:
10452     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10453
10454   return op
10455
10456
10457 def _NodeEvacDest(use_nodes, group, nodes):
10458   """Returns group or nodes depending on caller's choice.
10459
10460   """
10461   if use_nodes:
10462     return utils.CommaJoin(nodes)
10463   else:
10464     return group
10465
10466
10467 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10468   """Unpacks the result of change-group and node-evacuate iallocator requests.
10469
10470   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10471   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10472
10473   @type lu: L{LogicalUnit}
10474   @param lu: Logical unit instance
10475   @type alloc_result: tuple/list
10476   @param alloc_result: Result from iallocator
10477   @type early_release: bool
10478   @param early_release: Whether to release locks early if possible
10479   @type use_nodes: bool
10480   @param use_nodes: Whether to display node names instead of groups
10481
10482   """
10483   (moved, failed, jobs) = alloc_result
10484
10485   if failed:
10486     lu.LogWarning("Unable to evacuate instances %s",
10487                   utils.CommaJoin("%s (%s)" % (name, reason)
10488                                   for (name, reason) in failed))
10489
10490   if moved:
10491     lu.LogInfo("Instances to be moved: %s",
10492                utils.CommaJoin("%s (to %s)" %
10493                                (name, _NodeEvacDest(use_nodes, group, nodes))
10494                                for (name, group, nodes) in moved))
10495
10496   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10497               map(opcodes.OpCode.LoadOpCode, ops))
10498           for ops in jobs]
10499
10500
10501 class LUInstanceGrowDisk(LogicalUnit):
10502   """Grow a disk of an instance.
10503
10504   """
10505   HPATH = "disk-grow"
10506   HTYPE = constants.HTYPE_INSTANCE
10507   REQ_BGL = False
10508
10509   def ExpandNames(self):
10510     self._ExpandAndLockInstance()
10511     self.needed_locks[locking.LEVEL_NODE] = []
10512     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10513
10514   def DeclareLocks(self, level):
10515     if level == locking.LEVEL_NODE:
10516       self._LockInstancesNodes()
10517
10518   def BuildHooksEnv(self):
10519     """Build hooks env.
10520
10521     This runs on the master, the primary and all the secondaries.
10522
10523     """
10524     env = {
10525       "DISK": self.op.disk,
10526       "AMOUNT": self.op.amount,
10527       }
10528     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10529     return env
10530
10531   def BuildHooksNodes(self):
10532     """Build hooks nodes.
10533
10534     """
10535     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10536     return (nl, nl)
10537
10538   def CheckPrereq(self):
10539     """Check prerequisites.
10540
10541     This checks that the instance is in the cluster.
10542
10543     """
10544     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10545     assert instance is not None, \
10546       "Cannot retrieve locked instance %s" % self.op.instance_name
10547     nodenames = list(instance.all_nodes)
10548     for node in nodenames:
10549       _CheckNodeOnline(self, node)
10550
10551     self.instance = instance
10552
10553     if instance.disk_template not in constants.DTS_GROWABLE:
10554       raise errors.OpPrereqError("Instance's disk layout does not support"
10555                                  " growing", errors.ECODE_INVAL)
10556
10557     self.disk = instance.FindDisk(self.op.disk)
10558
10559     if instance.disk_template not in (constants.DT_FILE,
10560                                       constants.DT_SHARED_FILE):
10561       # TODO: check the free disk space for file, when that feature will be
10562       # supported
10563       _CheckNodesFreeDiskPerVG(self, nodenames,
10564                                self.disk.ComputeGrowth(self.op.amount))
10565
10566   def Exec(self, feedback_fn):
10567     """Execute disk grow.
10568
10569     """
10570     instance = self.instance
10571     disk = self.disk
10572
10573     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10574     if not disks_ok:
10575       raise errors.OpExecError("Cannot activate block device to grow")
10576
10577     # First run all grow ops in dry-run mode
10578     for node in instance.all_nodes:
10579       self.cfg.SetDiskID(disk, node)
10580       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10581       result.Raise("Grow request failed to node %s" % node)
10582
10583     # We know that (as far as we can test) operations across different
10584     # nodes will succeed, time to run it for real
10585     for node in instance.all_nodes:
10586       self.cfg.SetDiskID(disk, node)
10587       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10588       result.Raise("Grow request failed to node %s" % node)
10589
10590       # TODO: Rewrite code to work properly
10591       # DRBD goes into sync mode for a short amount of time after executing the
10592       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10593       # calling "resize" in sync mode fails. Sleeping for a short amount of
10594       # time is a work-around.
10595       time.sleep(5)
10596
10597     disk.RecordGrow(self.op.amount)
10598     self.cfg.Update(instance, feedback_fn)
10599     if self.op.wait_for_sync:
10600       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10601       if disk_abort:
10602         self.proc.LogWarning("Disk sync-ing has not returned a good"
10603                              " status; please check the instance")
10604       if not instance.admin_up:
10605         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10606     elif not instance.admin_up:
10607       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10608                            " not supposed to be running because no wait for"
10609                            " sync mode was requested")
10610
10611
10612 class LUInstanceQueryData(NoHooksLU):
10613   """Query runtime instance data.
10614
10615   """
10616   REQ_BGL = False
10617
10618   def ExpandNames(self):
10619     self.needed_locks = {}
10620
10621     # Use locking if requested or when non-static information is wanted
10622     if not (self.op.static or self.op.use_locking):
10623       self.LogWarning("Non-static data requested, locks need to be acquired")
10624       self.op.use_locking = True
10625
10626     if self.op.instances or not self.op.use_locking:
10627       # Expand instance names right here
10628       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10629     else:
10630       # Will use acquired locks
10631       self.wanted_names = None
10632
10633     if self.op.use_locking:
10634       self.share_locks = _ShareAll()
10635
10636       if self.wanted_names is None:
10637         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10638       else:
10639         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10640
10641       self.needed_locks[locking.LEVEL_NODE] = []
10642       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10643
10644   def DeclareLocks(self, level):
10645     if self.op.use_locking and level == locking.LEVEL_NODE:
10646       self._LockInstancesNodes()
10647
10648   def CheckPrereq(self):
10649     """Check prerequisites.
10650
10651     This only checks the optional instance list against the existing names.
10652
10653     """
10654     if self.wanted_names is None:
10655       assert self.op.use_locking, "Locking was not used"
10656       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10657
10658     self.wanted_instances = \
10659         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10660
10661   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10662     """Returns the status of a block device
10663
10664     """
10665     if self.op.static or not node:
10666       return None
10667
10668     self.cfg.SetDiskID(dev, node)
10669
10670     result = self.rpc.call_blockdev_find(node, dev)
10671     if result.offline:
10672       return None
10673
10674     result.Raise("Can't compute disk status for %s" % instance_name)
10675
10676     status = result.payload
10677     if status is None:
10678       return None
10679
10680     return (status.dev_path, status.major, status.minor,
10681             status.sync_percent, status.estimated_time,
10682             status.is_degraded, status.ldisk_status)
10683
10684   def _ComputeDiskStatus(self, instance, snode, dev):
10685     """Compute block device status.
10686
10687     """
10688     if dev.dev_type in constants.LDS_DRBD:
10689       # we change the snode then (otherwise we use the one passed in)
10690       if dev.logical_id[0] == instance.primary_node:
10691         snode = dev.logical_id[1]
10692       else:
10693         snode = dev.logical_id[0]
10694
10695     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10696                                               instance.name, dev)
10697     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10698
10699     if dev.children:
10700       dev_children = map(compat.partial(self._ComputeDiskStatus,
10701                                         instance, snode),
10702                          dev.children)
10703     else:
10704       dev_children = []
10705
10706     return {
10707       "iv_name": dev.iv_name,
10708       "dev_type": dev.dev_type,
10709       "logical_id": dev.logical_id,
10710       "physical_id": dev.physical_id,
10711       "pstatus": dev_pstatus,
10712       "sstatus": dev_sstatus,
10713       "children": dev_children,
10714       "mode": dev.mode,
10715       "size": dev.size,
10716       }
10717
10718   def Exec(self, feedback_fn):
10719     """Gather and return data"""
10720     result = {}
10721
10722     cluster = self.cfg.GetClusterInfo()
10723
10724     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10725                                           for i in self.wanted_instances)
10726     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10727       if self.op.static or pnode.offline:
10728         remote_state = None
10729         if pnode.offline:
10730           self.LogWarning("Primary node %s is marked offline, returning static"
10731                           " information only for instance %s" %
10732                           (pnode.name, instance.name))
10733       else:
10734         remote_info = self.rpc.call_instance_info(instance.primary_node,
10735                                                   instance.name,
10736                                                   instance.hypervisor)
10737         remote_info.Raise("Error checking node %s" % instance.primary_node)
10738         remote_info = remote_info.payload
10739         if remote_info and "state" in remote_info:
10740           remote_state = "up"
10741         else:
10742           remote_state = "down"
10743
10744       if instance.admin_up:
10745         config_state = "up"
10746       else:
10747         config_state = "down"
10748
10749       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10750                   instance.disks)
10751
10752       result[instance.name] = {
10753         "name": instance.name,
10754         "config_state": config_state,
10755         "run_state": remote_state,
10756         "pnode": instance.primary_node,
10757         "snodes": instance.secondary_nodes,
10758         "os": instance.os,
10759         # this happens to be the same format used for hooks
10760         "nics": _NICListToTuple(self, instance.nics),
10761         "disk_template": instance.disk_template,
10762         "disks": disks,
10763         "hypervisor": instance.hypervisor,
10764         "network_port": instance.network_port,
10765         "hv_instance": instance.hvparams,
10766         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10767         "be_instance": instance.beparams,
10768         "be_actual": cluster.FillBE(instance),
10769         "os_instance": instance.osparams,
10770         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10771         "serial_no": instance.serial_no,
10772         "mtime": instance.mtime,
10773         "ctime": instance.ctime,
10774         "uuid": instance.uuid,
10775         }
10776
10777     return result
10778
10779
10780 class LUInstanceSetParams(LogicalUnit):
10781   """Modifies an instances's parameters.
10782
10783   """
10784   HPATH = "instance-modify"
10785   HTYPE = constants.HTYPE_INSTANCE
10786   REQ_BGL = False
10787
10788   def CheckArguments(self):
10789     if not (self.op.nics or self.op.disks or self.op.disk_template or
10790             self.op.hvparams or self.op.beparams or self.op.os_name):
10791       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10792
10793     if self.op.hvparams:
10794       _CheckGlobalHvParams(self.op.hvparams)
10795
10796     # Disk validation
10797     disk_addremove = 0
10798     for disk_op, disk_dict in self.op.disks:
10799       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10800       if disk_op == constants.DDM_REMOVE:
10801         disk_addremove += 1
10802         continue
10803       elif disk_op == constants.DDM_ADD:
10804         disk_addremove += 1
10805       else:
10806         if not isinstance(disk_op, int):
10807           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10808         if not isinstance(disk_dict, dict):
10809           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10810           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10811
10812       if disk_op == constants.DDM_ADD:
10813         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10814         if mode not in constants.DISK_ACCESS_SET:
10815           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10816                                      errors.ECODE_INVAL)
10817         size = disk_dict.get(constants.IDISK_SIZE, None)
10818         if size is None:
10819           raise errors.OpPrereqError("Required disk parameter size missing",
10820                                      errors.ECODE_INVAL)
10821         try:
10822           size = int(size)
10823         except (TypeError, ValueError), err:
10824           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10825                                      str(err), errors.ECODE_INVAL)
10826         disk_dict[constants.IDISK_SIZE] = size
10827       else:
10828         # modification of disk
10829         if constants.IDISK_SIZE in disk_dict:
10830           raise errors.OpPrereqError("Disk size change not possible, use"
10831                                      " grow-disk", errors.ECODE_INVAL)
10832
10833     if disk_addremove > 1:
10834       raise errors.OpPrereqError("Only one disk add or remove operation"
10835                                  " supported at a time", errors.ECODE_INVAL)
10836
10837     if self.op.disks and self.op.disk_template is not None:
10838       raise errors.OpPrereqError("Disk template conversion and other disk"
10839                                  " changes not supported at the same time",
10840                                  errors.ECODE_INVAL)
10841
10842     if (self.op.disk_template and
10843         self.op.disk_template in constants.DTS_INT_MIRROR and
10844         self.op.remote_node is None):
10845       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10846                                  " one requires specifying a secondary node",
10847                                  errors.ECODE_INVAL)
10848
10849     # NIC validation
10850     nic_addremove = 0
10851     for nic_op, nic_dict in self.op.nics:
10852       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10853       if nic_op == constants.DDM_REMOVE:
10854         nic_addremove += 1
10855         continue
10856       elif nic_op == constants.DDM_ADD:
10857         nic_addremove += 1
10858       else:
10859         if not isinstance(nic_op, int):
10860           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10861         if not isinstance(nic_dict, dict):
10862           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10863           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10864
10865       # nic_dict should be a dict
10866       nic_ip = nic_dict.get(constants.INIC_IP, None)
10867       if nic_ip is not None:
10868         if nic_ip.lower() == constants.VALUE_NONE:
10869           nic_dict[constants.INIC_IP] = None
10870         else:
10871           if not netutils.IPAddress.IsValid(nic_ip):
10872             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10873                                        errors.ECODE_INVAL)
10874
10875       nic_bridge = nic_dict.get("bridge", None)
10876       nic_link = nic_dict.get(constants.INIC_LINK, None)
10877       if nic_bridge and nic_link:
10878         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10879                                    " at the same time", errors.ECODE_INVAL)
10880       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10881         nic_dict["bridge"] = None
10882       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10883         nic_dict[constants.INIC_LINK] = None
10884
10885       if nic_op == constants.DDM_ADD:
10886         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10887         if nic_mac is None:
10888           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10889
10890       if constants.INIC_MAC in nic_dict:
10891         nic_mac = nic_dict[constants.INIC_MAC]
10892         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10893           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10894
10895         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10896           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10897                                      " modifying an existing nic",
10898                                      errors.ECODE_INVAL)
10899
10900     if nic_addremove > 1:
10901       raise errors.OpPrereqError("Only one NIC add or remove operation"
10902                                  " supported at a time", errors.ECODE_INVAL)
10903
10904   def ExpandNames(self):
10905     self._ExpandAndLockInstance()
10906     self.needed_locks[locking.LEVEL_NODE] = []
10907     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10908
10909   def DeclareLocks(self, level):
10910     if level == locking.LEVEL_NODE:
10911       self._LockInstancesNodes()
10912       if self.op.disk_template and self.op.remote_node:
10913         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10914         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10915
10916   def BuildHooksEnv(self):
10917     """Build hooks env.
10918
10919     This runs on the master, primary and secondaries.
10920
10921     """
10922     args = dict()
10923     if constants.BE_MEMORY in self.be_new:
10924       args["memory"] = self.be_new[constants.BE_MEMORY]
10925     if constants.BE_VCPUS in self.be_new:
10926       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10927     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10928     # information at all.
10929     if self.op.nics:
10930       args["nics"] = []
10931       nic_override = dict(self.op.nics)
10932       for idx, nic in enumerate(self.instance.nics):
10933         if idx in nic_override:
10934           this_nic_override = nic_override[idx]
10935         else:
10936           this_nic_override = {}
10937         if constants.INIC_IP in this_nic_override:
10938           ip = this_nic_override[constants.INIC_IP]
10939         else:
10940           ip = nic.ip
10941         if constants.INIC_MAC in this_nic_override:
10942           mac = this_nic_override[constants.INIC_MAC]
10943         else:
10944           mac = nic.mac
10945         if idx in self.nic_pnew:
10946           nicparams = self.nic_pnew[idx]
10947         else:
10948           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10949         mode = nicparams[constants.NIC_MODE]
10950         link = nicparams[constants.NIC_LINK]
10951         args["nics"].append((ip, mac, mode, link))
10952       if constants.DDM_ADD in nic_override:
10953         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10954         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10955         nicparams = self.nic_pnew[constants.DDM_ADD]
10956         mode = nicparams[constants.NIC_MODE]
10957         link = nicparams[constants.NIC_LINK]
10958         args["nics"].append((ip, mac, mode, link))
10959       elif constants.DDM_REMOVE in nic_override:
10960         del args["nics"][-1]
10961
10962     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10963     if self.op.disk_template:
10964       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10965
10966     return env
10967
10968   def BuildHooksNodes(self):
10969     """Build hooks nodes.
10970
10971     """
10972     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10973     return (nl, nl)
10974
10975   def CheckPrereq(self):
10976     """Check prerequisites.
10977
10978     This only checks the instance list against the existing names.
10979
10980     """
10981     # checking the new params on the primary/secondary nodes
10982
10983     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10984     cluster = self.cluster = self.cfg.GetClusterInfo()
10985     assert self.instance is not None, \
10986       "Cannot retrieve locked instance %s" % self.op.instance_name
10987     pnode = instance.primary_node
10988     nodelist = list(instance.all_nodes)
10989
10990     # OS change
10991     if self.op.os_name and not self.op.force:
10992       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10993                       self.op.force_variant)
10994       instance_os = self.op.os_name
10995     else:
10996       instance_os = instance.os
10997
10998     if self.op.disk_template:
10999       if instance.disk_template == self.op.disk_template:
11000         raise errors.OpPrereqError("Instance already has disk template %s" %
11001                                    instance.disk_template, errors.ECODE_INVAL)
11002
11003       if (instance.disk_template,
11004           self.op.disk_template) not in self._DISK_CONVERSIONS:
11005         raise errors.OpPrereqError("Unsupported disk template conversion from"
11006                                    " %s to %s" % (instance.disk_template,
11007                                                   self.op.disk_template),
11008                                    errors.ECODE_INVAL)
11009       _CheckInstanceDown(self, instance, "cannot change disk template")
11010       if self.op.disk_template in constants.DTS_INT_MIRROR:
11011         if self.op.remote_node == pnode:
11012           raise errors.OpPrereqError("Given new secondary node %s is the same"
11013                                      " as the primary node of the instance" %
11014                                      self.op.remote_node, errors.ECODE_STATE)
11015         _CheckNodeOnline(self, self.op.remote_node)
11016         _CheckNodeNotDrained(self, self.op.remote_node)
11017         # FIXME: here we assume that the old instance type is DT_PLAIN
11018         assert instance.disk_template == constants.DT_PLAIN
11019         disks = [{constants.IDISK_SIZE: d.size,
11020                   constants.IDISK_VG: d.logical_id[0]}
11021                  for d in instance.disks]
11022         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11023         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11024
11025     # hvparams processing
11026     if self.op.hvparams:
11027       hv_type = instance.hypervisor
11028       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11029       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11030       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11031
11032       # local check
11033       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11034       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11035       self.hv_proposed = self.hv_new = hv_new # the new actual values
11036       self.hv_inst = i_hvdict # the new dict (without defaults)
11037     else:
11038       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11039                                               instance.hvparams)
11040       self.hv_new = self.hv_inst = {}
11041
11042     # beparams processing
11043     if self.op.beparams:
11044       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11045                                    use_none=True)
11046       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11047       be_new = cluster.SimpleFillBE(i_bedict)
11048       self.be_proposed = self.be_new = be_new # the new actual values
11049       self.be_inst = i_bedict # the new dict (without defaults)
11050     else:
11051       self.be_new = self.be_inst = {}
11052       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11053     be_old = cluster.FillBE(instance)
11054
11055     # CPU param validation -- checking every time a paramtere is
11056     # changed to cover all cases where either CPU mask or vcpus have
11057     # changed
11058     if (constants.BE_VCPUS in self.be_proposed and
11059         constants.HV_CPU_MASK in self.hv_proposed):
11060       cpu_list = \
11061         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11062       # Verify mask is consistent with number of vCPUs. Can skip this
11063       # test if only 1 entry in the CPU mask, which means same mask
11064       # is applied to all vCPUs.
11065       if (len(cpu_list) > 1 and
11066           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11067         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11068                                    " CPU mask [%s]" %
11069                                    (self.be_proposed[constants.BE_VCPUS],
11070                                     self.hv_proposed[constants.HV_CPU_MASK]),
11071                                    errors.ECODE_INVAL)
11072
11073       # Only perform this test if a new CPU mask is given
11074       if constants.HV_CPU_MASK in self.hv_new:
11075         # Calculate the largest CPU number requested
11076         max_requested_cpu = max(map(max, cpu_list))
11077         # Check that all of the instance's nodes have enough physical CPUs to
11078         # satisfy the requested CPU mask
11079         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11080                                 max_requested_cpu + 1, instance.hypervisor)
11081
11082     # osparams processing
11083     if self.op.osparams:
11084       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11085       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11086       self.os_inst = i_osdict # the new dict (without defaults)
11087     else:
11088       self.os_inst = {}
11089
11090     self.warn = []
11091
11092     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11093         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11094       mem_check_list = [pnode]
11095       if be_new[constants.BE_AUTO_BALANCE]:
11096         # either we changed auto_balance to yes or it was from before
11097         mem_check_list.extend(instance.secondary_nodes)
11098       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11099                                                   instance.hypervisor)
11100       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11101                                          instance.hypervisor)
11102       pninfo = nodeinfo[pnode]
11103       msg = pninfo.fail_msg
11104       if msg:
11105         # Assume the primary node is unreachable and go ahead
11106         self.warn.append("Can't get info from primary node %s: %s" %
11107                          (pnode, msg))
11108       elif not isinstance(pninfo.payload.get("memory_free", None), int):
11109         self.warn.append("Node data from primary node %s doesn't contain"
11110                          " free memory information" % pnode)
11111       elif instance_info.fail_msg:
11112         self.warn.append("Can't get instance runtime information: %s" %
11113                         instance_info.fail_msg)
11114       else:
11115         if instance_info.payload:
11116           current_mem = int(instance_info.payload["memory"])
11117         else:
11118           # Assume instance not running
11119           # (there is a slight race condition here, but it's not very probable,
11120           # and we have no other way to check)
11121           current_mem = 0
11122         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11123                     pninfo.payload["memory_free"])
11124         if miss_mem > 0:
11125           raise errors.OpPrereqError("This change will prevent the instance"
11126                                      " from starting, due to %d MB of memory"
11127                                      " missing on its primary node" % miss_mem,
11128                                      errors.ECODE_NORES)
11129
11130       if be_new[constants.BE_AUTO_BALANCE]:
11131         for node, nres in nodeinfo.items():
11132           if node not in instance.secondary_nodes:
11133             continue
11134           nres.Raise("Can't get info from secondary node %s" % node,
11135                      prereq=True, ecode=errors.ECODE_STATE)
11136           if not isinstance(nres.payload.get("memory_free", None), int):
11137             raise errors.OpPrereqError("Secondary node %s didn't return free"
11138                                        " memory information" % node,
11139                                        errors.ECODE_STATE)
11140           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11141             raise errors.OpPrereqError("This change will prevent the instance"
11142                                        " from failover to its secondary node"
11143                                        " %s, due to not enough memory" % node,
11144                                        errors.ECODE_STATE)
11145
11146     # NIC processing
11147     self.nic_pnew = {}
11148     self.nic_pinst = {}
11149     for nic_op, nic_dict in self.op.nics:
11150       if nic_op == constants.DDM_REMOVE:
11151         if not instance.nics:
11152           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11153                                      errors.ECODE_INVAL)
11154         continue
11155       if nic_op != constants.DDM_ADD:
11156         # an existing nic
11157         if not instance.nics:
11158           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11159                                      " no NICs" % nic_op,
11160                                      errors.ECODE_INVAL)
11161         if nic_op < 0 or nic_op >= len(instance.nics):
11162           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11163                                      " are 0 to %d" %
11164                                      (nic_op, len(instance.nics) - 1),
11165                                      errors.ECODE_INVAL)
11166         old_nic_params = instance.nics[nic_op].nicparams
11167         old_nic_ip = instance.nics[nic_op].ip
11168       else:
11169         old_nic_params = {}
11170         old_nic_ip = None
11171
11172       update_params_dict = dict([(key, nic_dict[key])
11173                                  for key in constants.NICS_PARAMETERS
11174                                  if key in nic_dict])
11175
11176       if "bridge" in nic_dict:
11177         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11178
11179       new_nic_params = _GetUpdatedParams(old_nic_params,
11180                                          update_params_dict)
11181       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11182       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11183       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11184       self.nic_pinst[nic_op] = new_nic_params
11185       self.nic_pnew[nic_op] = new_filled_nic_params
11186       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11187
11188       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11189         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11190         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11191         if msg:
11192           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11193           if self.op.force:
11194             self.warn.append(msg)
11195           else:
11196             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11197       if new_nic_mode == constants.NIC_MODE_ROUTED:
11198         if constants.INIC_IP in nic_dict:
11199           nic_ip = nic_dict[constants.INIC_IP]
11200         else:
11201           nic_ip = old_nic_ip
11202         if nic_ip is None:
11203           raise errors.OpPrereqError("Cannot set the nic ip to None"
11204                                      " on a routed nic", errors.ECODE_INVAL)
11205       if constants.INIC_MAC in nic_dict:
11206         nic_mac = nic_dict[constants.INIC_MAC]
11207         if nic_mac is None:
11208           raise errors.OpPrereqError("Cannot set the nic mac to None",
11209                                      errors.ECODE_INVAL)
11210         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11211           # otherwise generate the mac
11212           nic_dict[constants.INIC_MAC] = \
11213             self.cfg.GenerateMAC(self.proc.GetECId())
11214         else:
11215           # or validate/reserve the current one
11216           try:
11217             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11218           except errors.ReservationError:
11219             raise errors.OpPrereqError("MAC address %s already in use"
11220                                        " in cluster" % nic_mac,
11221                                        errors.ECODE_NOTUNIQUE)
11222
11223     # DISK processing
11224     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11225       raise errors.OpPrereqError("Disk operations not supported for"
11226                                  " diskless instances",
11227                                  errors.ECODE_INVAL)
11228     for disk_op, _ in self.op.disks:
11229       if disk_op == constants.DDM_REMOVE:
11230         if len(instance.disks) == 1:
11231           raise errors.OpPrereqError("Cannot remove the last disk of"
11232                                      " an instance", errors.ECODE_INVAL)
11233         _CheckInstanceDown(self, instance, "cannot remove disks")
11234
11235       if (disk_op == constants.DDM_ADD and
11236           len(instance.disks) >= constants.MAX_DISKS):
11237         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11238                                    " add more" % constants.MAX_DISKS,
11239                                    errors.ECODE_STATE)
11240       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11241         # an existing disk
11242         if disk_op < 0 or disk_op >= len(instance.disks):
11243           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11244                                      " are 0 to %d" %
11245                                      (disk_op, len(instance.disks)),
11246                                      errors.ECODE_INVAL)
11247
11248     return
11249
11250   def _ConvertPlainToDrbd(self, feedback_fn):
11251     """Converts an instance from plain to drbd.
11252
11253     """
11254     feedback_fn("Converting template to drbd")
11255     instance = self.instance
11256     pnode = instance.primary_node
11257     snode = self.op.remote_node
11258
11259     # create a fake disk info for _GenerateDiskTemplate
11260     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11261                   constants.IDISK_VG: d.logical_id[0]}
11262                  for d in instance.disks]
11263     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11264                                       instance.name, pnode, [snode],
11265                                       disk_info, None, None, 0, feedback_fn)
11266     info = _GetInstanceInfoText(instance)
11267     feedback_fn("Creating aditional volumes...")
11268     # first, create the missing data and meta devices
11269     for disk in new_disks:
11270       # unfortunately this is... not too nice
11271       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11272                             info, True)
11273       for child in disk.children:
11274         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11275     # at this stage, all new LVs have been created, we can rename the
11276     # old ones
11277     feedback_fn("Renaming original volumes...")
11278     rename_list = [(o, n.children[0].logical_id)
11279                    for (o, n) in zip(instance.disks, new_disks)]
11280     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11281     result.Raise("Failed to rename original LVs")
11282
11283     feedback_fn("Initializing DRBD devices...")
11284     # all child devices are in place, we can now create the DRBD devices
11285     for disk in new_disks:
11286       for node in [pnode, snode]:
11287         f_create = node == pnode
11288         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11289
11290     # at this point, the instance has been modified
11291     instance.disk_template = constants.DT_DRBD8
11292     instance.disks = new_disks
11293     self.cfg.Update(instance, feedback_fn)
11294
11295     # disks are created, waiting for sync
11296     disk_abort = not _WaitForSync(self, instance,
11297                                   oneshot=not self.op.wait_for_sync)
11298     if disk_abort:
11299       raise errors.OpExecError("There are some degraded disks for"
11300                                " this instance, please cleanup manually")
11301
11302   def _ConvertDrbdToPlain(self, feedback_fn):
11303     """Converts an instance from drbd to plain.
11304
11305     """
11306     instance = self.instance
11307     assert len(instance.secondary_nodes) == 1
11308     pnode = instance.primary_node
11309     snode = instance.secondary_nodes[0]
11310     feedback_fn("Converting template to plain")
11311
11312     old_disks = instance.disks
11313     new_disks = [d.children[0] for d in old_disks]
11314
11315     # copy over size and mode
11316     for parent, child in zip(old_disks, new_disks):
11317       child.size = parent.size
11318       child.mode = parent.mode
11319
11320     # update instance structure
11321     instance.disks = new_disks
11322     instance.disk_template = constants.DT_PLAIN
11323     self.cfg.Update(instance, feedback_fn)
11324
11325     feedback_fn("Removing volumes on the secondary node...")
11326     for disk in old_disks:
11327       self.cfg.SetDiskID(disk, snode)
11328       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11329       if msg:
11330         self.LogWarning("Could not remove block device %s on node %s,"
11331                         " continuing anyway: %s", disk.iv_name, snode, msg)
11332
11333     feedback_fn("Removing unneeded volumes on the primary node...")
11334     for idx, disk in enumerate(old_disks):
11335       meta = disk.children[1]
11336       self.cfg.SetDiskID(meta, pnode)
11337       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11338       if msg:
11339         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11340                         " continuing anyway: %s", idx, pnode, msg)
11341
11342   def Exec(self, feedback_fn):
11343     """Modifies an instance.
11344
11345     All parameters take effect only at the next restart of the instance.
11346
11347     """
11348     # Process here the warnings from CheckPrereq, as we don't have a
11349     # feedback_fn there.
11350     for warn in self.warn:
11351       feedback_fn("WARNING: %s" % warn)
11352
11353     result = []
11354     instance = self.instance
11355     # disk changes
11356     for disk_op, disk_dict in self.op.disks:
11357       if disk_op == constants.DDM_REMOVE:
11358         # remove the last disk
11359         device = instance.disks.pop()
11360         device_idx = len(instance.disks)
11361         for node, disk in device.ComputeNodeTree(instance.primary_node):
11362           self.cfg.SetDiskID(disk, node)
11363           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11364           if msg:
11365             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11366                             " continuing anyway", device_idx, node, msg)
11367         result.append(("disk/%d" % device_idx, "remove"))
11368       elif disk_op == constants.DDM_ADD:
11369         # add a new disk
11370         if instance.disk_template in (constants.DT_FILE,
11371                                         constants.DT_SHARED_FILE):
11372           file_driver, file_path = instance.disks[0].logical_id
11373           file_path = os.path.dirname(file_path)
11374         else:
11375           file_driver = file_path = None
11376         disk_idx_base = len(instance.disks)
11377         new_disk = _GenerateDiskTemplate(self,
11378                                          instance.disk_template,
11379                                          instance.name, instance.primary_node,
11380                                          instance.secondary_nodes,
11381                                          [disk_dict],
11382                                          file_path,
11383                                          file_driver,
11384                                          disk_idx_base, feedback_fn)[0]
11385         instance.disks.append(new_disk)
11386         info = _GetInstanceInfoText(instance)
11387
11388         logging.info("Creating volume %s for instance %s",
11389                      new_disk.iv_name, instance.name)
11390         # Note: this needs to be kept in sync with _CreateDisks
11391         #HARDCODE
11392         for node in instance.all_nodes:
11393           f_create = node == instance.primary_node
11394           try:
11395             _CreateBlockDev(self, node, instance, new_disk,
11396                             f_create, info, f_create)
11397           except errors.OpExecError, err:
11398             self.LogWarning("Failed to create volume %s (%s) on"
11399                             " node %s: %s",
11400                             new_disk.iv_name, new_disk, node, err)
11401         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11402                        (new_disk.size, new_disk.mode)))
11403       else:
11404         # change a given disk
11405         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11406         result.append(("disk.mode/%d" % disk_op,
11407                        disk_dict[constants.IDISK_MODE]))
11408
11409     if self.op.disk_template:
11410       r_shut = _ShutdownInstanceDisks(self, instance)
11411       if not r_shut:
11412         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11413                                  " proceed with disk template conversion")
11414       mode = (instance.disk_template, self.op.disk_template)
11415       try:
11416         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11417       except:
11418         self.cfg.ReleaseDRBDMinors(instance.name)
11419         raise
11420       result.append(("disk_template", self.op.disk_template))
11421
11422     # NIC changes
11423     for nic_op, nic_dict in self.op.nics:
11424       if nic_op == constants.DDM_REMOVE:
11425         # remove the last nic
11426         del instance.nics[-1]
11427         result.append(("nic.%d" % len(instance.nics), "remove"))
11428       elif nic_op == constants.DDM_ADD:
11429         # mac and bridge should be set, by now
11430         mac = nic_dict[constants.INIC_MAC]
11431         ip = nic_dict.get(constants.INIC_IP, None)
11432         nicparams = self.nic_pinst[constants.DDM_ADD]
11433         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11434         instance.nics.append(new_nic)
11435         result.append(("nic.%d" % (len(instance.nics) - 1),
11436                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11437                        (new_nic.mac, new_nic.ip,
11438                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11439                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11440                        )))
11441       else:
11442         for key in (constants.INIC_MAC, constants.INIC_IP):
11443           if key in nic_dict:
11444             setattr(instance.nics[nic_op], key, nic_dict[key])
11445         if nic_op in self.nic_pinst:
11446           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11447         for key, val in nic_dict.iteritems():
11448           result.append(("nic.%s/%d" % (key, nic_op), val))
11449
11450     # hvparams changes
11451     if self.op.hvparams:
11452       instance.hvparams = self.hv_inst
11453       for key, val in self.op.hvparams.iteritems():
11454         result.append(("hv/%s" % key, val))
11455
11456     # beparams changes
11457     if self.op.beparams:
11458       instance.beparams = self.be_inst
11459       for key, val in self.op.beparams.iteritems():
11460         result.append(("be/%s" % key, val))
11461
11462     # OS change
11463     if self.op.os_name:
11464       instance.os = self.op.os_name
11465
11466     # osparams changes
11467     if self.op.osparams:
11468       instance.osparams = self.os_inst
11469       for key, val in self.op.osparams.iteritems():
11470         result.append(("os/%s" % key, val))
11471
11472     self.cfg.Update(instance, feedback_fn)
11473
11474     return result
11475
11476   _DISK_CONVERSIONS = {
11477     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11478     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11479     }
11480
11481
11482 class LUInstanceChangeGroup(LogicalUnit):
11483   HPATH = "instance-change-group"
11484   HTYPE = constants.HTYPE_INSTANCE
11485   REQ_BGL = False
11486
11487   def ExpandNames(self):
11488     self.share_locks = _ShareAll()
11489     self.needed_locks = {
11490       locking.LEVEL_NODEGROUP: [],
11491       locking.LEVEL_NODE: [],
11492       }
11493
11494     self._ExpandAndLockInstance()
11495
11496     if self.op.target_groups:
11497       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11498                                   self.op.target_groups)
11499     else:
11500       self.req_target_uuids = None
11501
11502     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11503
11504   def DeclareLocks(self, level):
11505     if level == locking.LEVEL_NODEGROUP:
11506       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11507
11508       if self.req_target_uuids:
11509         lock_groups = set(self.req_target_uuids)
11510
11511         # Lock all groups used by instance optimistically; this requires going
11512         # via the node before it's locked, requiring verification later on
11513         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11514         lock_groups.update(instance_groups)
11515       else:
11516         # No target groups, need to lock all of them
11517         lock_groups = locking.ALL_SET
11518
11519       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11520
11521     elif level == locking.LEVEL_NODE:
11522       if self.req_target_uuids:
11523         # Lock all nodes used by instances
11524         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11525         self._LockInstancesNodes()
11526
11527         # Lock all nodes in all potential target groups
11528         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11529                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11530         member_nodes = [node_name
11531                         for group in lock_groups
11532                         for node_name in self.cfg.GetNodeGroup(group).members]
11533         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11534       else:
11535         # Lock all nodes as all groups are potential targets
11536         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11537
11538   def CheckPrereq(self):
11539     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11540     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11541     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11542
11543     assert (self.req_target_uuids is None or
11544             owned_groups.issuperset(self.req_target_uuids))
11545     assert owned_instances == set([self.op.instance_name])
11546
11547     # Get instance information
11548     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11549
11550     # Check if node groups for locked instance are still correct
11551     assert owned_nodes.issuperset(self.instance.all_nodes), \
11552       ("Instance %s's nodes changed while we kept the lock" %
11553        self.op.instance_name)
11554
11555     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11556                                            owned_groups)
11557
11558     if self.req_target_uuids:
11559       # User requested specific target groups
11560       self.target_uuids = self.req_target_uuids
11561     else:
11562       # All groups except those used by the instance are potential targets
11563       self.target_uuids = owned_groups - inst_groups
11564
11565     conflicting_groups = self.target_uuids & inst_groups
11566     if conflicting_groups:
11567       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11568                                  " used by the instance '%s'" %
11569                                  (utils.CommaJoin(conflicting_groups),
11570                                   self.op.instance_name),
11571                                  errors.ECODE_INVAL)
11572
11573     if not self.target_uuids:
11574       raise errors.OpPrereqError("There are no possible target groups",
11575                                  errors.ECODE_INVAL)
11576
11577   def BuildHooksEnv(self):
11578     """Build hooks env.
11579
11580     """
11581     assert self.target_uuids
11582
11583     env = {
11584       "TARGET_GROUPS": " ".join(self.target_uuids),
11585       }
11586
11587     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11588
11589     return env
11590
11591   def BuildHooksNodes(self):
11592     """Build hooks nodes.
11593
11594     """
11595     mn = self.cfg.GetMasterNode()
11596     return ([mn], [mn])
11597
11598   def Exec(self, feedback_fn):
11599     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11600
11601     assert instances == [self.op.instance_name], "Instance not locked"
11602
11603     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11604                      instances=instances, target_groups=list(self.target_uuids))
11605
11606     ial.Run(self.op.iallocator)
11607
11608     if not ial.success:
11609       raise errors.OpPrereqError("Can't compute solution for changing group of"
11610                                  " instance '%s' using iallocator '%s': %s" %
11611                                  (self.op.instance_name, self.op.iallocator,
11612                                   ial.info),
11613                                  errors.ECODE_NORES)
11614
11615     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11616
11617     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11618                  " instance '%s'", len(jobs), self.op.instance_name)
11619
11620     return ResultWithJobs(jobs)
11621
11622
11623 class LUBackupQuery(NoHooksLU):
11624   """Query the exports list
11625
11626   """
11627   REQ_BGL = False
11628
11629   def ExpandNames(self):
11630     self.needed_locks = {}
11631     self.share_locks[locking.LEVEL_NODE] = 1
11632     if not self.op.nodes:
11633       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11634     else:
11635       self.needed_locks[locking.LEVEL_NODE] = \
11636         _GetWantedNodes(self, self.op.nodes)
11637
11638   def Exec(self, feedback_fn):
11639     """Compute the list of all the exported system images.
11640
11641     @rtype: dict
11642     @return: a dictionary with the structure node->(export-list)
11643         where export-list is a list of the instances exported on
11644         that node.
11645
11646     """
11647     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11648     rpcresult = self.rpc.call_export_list(self.nodes)
11649     result = {}
11650     for node in rpcresult:
11651       if rpcresult[node].fail_msg:
11652         result[node] = False
11653       else:
11654         result[node] = rpcresult[node].payload
11655
11656     return result
11657
11658
11659 class LUBackupPrepare(NoHooksLU):
11660   """Prepares an instance for an export and returns useful information.
11661
11662   """
11663   REQ_BGL = False
11664
11665   def ExpandNames(self):
11666     self._ExpandAndLockInstance()
11667
11668   def CheckPrereq(self):
11669     """Check prerequisites.
11670
11671     """
11672     instance_name = self.op.instance_name
11673
11674     self.instance = self.cfg.GetInstanceInfo(instance_name)
11675     assert self.instance is not None, \
11676           "Cannot retrieve locked instance %s" % self.op.instance_name
11677     _CheckNodeOnline(self, self.instance.primary_node)
11678
11679     self._cds = _GetClusterDomainSecret()
11680
11681   def Exec(self, feedback_fn):
11682     """Prepares an instance for an export.
11683
11684     """
11685     instance = self.instance
11686
11687     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11688       salt = utils.GenerateSecret(8)
11689
11690       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11691       result = self.rpc.call_x509_cert_create(instance.primary_node,
11692                                               constants.RIE_CERT_VALIDITY)
11693       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11694
11695       (name, cert_pem) = result.payload
11696
11697       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11698                                              cert_pem)
11699
11700       return {
11701         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11702         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11703                           salt),
11704         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11705         }
11706
11707     return None
11708
11709
11710 class LUBackupExport(LogicalUnit):
11711   """Export an instance to an image in the cluster.
11712
11713   """
11714   HPATH = "instance-export"
11715   HTYPE = constants.HTYPE_INSTANCE
11716   REQ_BGL = False
11717
11718   def CheckArguments(self):
11719     """Check the arguments.
11720
11721     """
11722     self.x509_key_name = self.op.x509_key_name
11723     self.dest_x509_ca_pem = self.op.destination_x509_ca
11724
11725     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11726       if not self.x509_key_name:
11727         raise errors.OpPrereqError("Missing X509 key name for encryption",
11728                                    errors.ECODE_INVAL)
11729
11730       if not self.dest_x509_ca_pem:
11731         raise errors.OpPrereqError("Missing destination X509 CA",
11732                                    errors.ECODE_INVAL)
11733
11734   def ExpandNames(self):
11735     self._ExpandAndLockInstance()
11736
11737     # Lock all nodes for local exports
11738     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11739       # FIXME: lock only instance primary and destination node
11740       #
11741       # Sad but true, for now we have do lock all nodes, as we don't know where
11742       # the previous export might be, and in this LU we search for it and
11743       # remove it from its current node. In the future we could fix this by:
11744       #  - making a tasklet to search (share-lock all), then create the
11745       #    new one, then one to remove, after
11746       #  - removing the removal operation altogether
11747       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11748
11749   def DeclareLocks(self, level):
11750     """Last minute lock declaration."""
11751     # All nodes are locked anyway, so nothing to do here.
11752
11753   def BuildHooksEnv(self):
11754     """Build hooks env.
11755
11756     This will run on the master, primary node and target node.
11757
11758     """
11759     env = {
11760       "EXPORT_MODE": self.op.mode,
11761       "EXPORT_NODE": self.op.target_node,
11762       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11763       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11764       # TODO: Generic function for boolean env variables
11765       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11766       }
11767
11768     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11769
11770     return env
11771
11772   def BuildHooksNodes(self):
11773     """Build hooks nodes.
11774
11775     """
11776     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11777
11778     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11779       nl.append(self.op.target_node)
11780
11781     return (nl, nl)
11782
11783   def CheckPrereq(self):
11784     """Check prerequisites.
11785
11786     This checks that the instance and node names are valid.
11787
11788     """
11789     instance_name = self.op.instance_name
11790
11791     self.instance = self.cfg.GetInstanceInfo(instance_name)
11792     assert self.instance is not None, \
11793           "Cannot retrieve locked instance %s" % self.op.instance_name
11794     _CheckNodeOnline(self, self.instance.primary_node)
11795
11796     if (self.op.remove_instance and self.instance.admin_up and
11797         not self.op.shutdown):
11798       raise errors.OpPrereqError("Can not remove instance without shutting it"
11799                                  " down before")
11800
11801     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11802       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11803       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11804       assert self.dst_node is not None
11805
11806       _CheckNodeOnline(self, self.dst_node.name)
11807       _CheckNodeNotDrained(self, self.dst_node.name)
11808
11809       self._cds = None
11810       self.dest_disk_info = None
11811       self.dest_x509_ca = None
11812
11813     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11814       self.dst_node = None
11815
11816       if len(self.op.target_node) != len(self.instance.disks):
11817         raise errors.OpPrereqError(("Received destination information for %s"
11818                                     " disks, but instance %s has %s disks") %
11819                                    (len(self.op.target_node), instance_name,
11820                                     len(self.instance.disks)),
11821                                    errors.ECODE_INVAL)
11822
11823       cds = _GetClusterDomainSecret()
11824
11825       # Check X509 key name
11826       try:
11827         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11828       except (TypeError, ValueError), err:
11829         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11830
11831       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11832         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11833                                    errors.ECODE_INVAL)
11834
11835       # Load and verify CA
11836       try:
11837         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11838       except OpenSSL.crypto.Error, err:
11839         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11840                                    (err, ), errors.ECODE_INVAL)
11841
11842       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11843       if errcode is not None:
11844         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11845                                    (msg, ), errors.ECODE_INVAL)
11846
11847       self.dest_x509_ca = cert
11848
11849       # Verify target information
11850       disk_info = []
11851       for idx, disk_data in enumerate(self.op.target_node):
11852         try:
11853           (host, port, magic) = \
11854             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11855         except errors.GenericError, err:
11856           raise errors.OpPrereqError("Target info for disk %s: %s" %
11857                                      (idx, err), errors.ECODE_INVAL)
11858
11859         disk_info.append((host, port, magic))
11860
11861       assert len(disk_info) == len(self.op.target_node)
11862       self.dest_disk_info = disk_info
11863
11864     else:
11865       raise errors.ProgrammerError("Unhandled export mode %r" %
11866                                    self.op.mode)
11867
11868     # instance disk type verification
11869     # TODO: Implement export support for file-based disks
11870     for disk in self.instance.disks:
11871       if disk.dev_type == constants.LD_FILE:
11872         raise errors.OpPrereqError("Export not supported for instances with"
11873                                    " file-based disks", errors.ECODE_INVAL)
11874
11875   def _CleanupExports(self, feedback_fn):
11876     """Removes exports of current instance from all other nodes.
11877
11878     If an instance in a cluster with nodes A..D was exported to node C, its
11879     exports will be removed from the nodes A, B and D.
11880
11881     """
11882     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11883
11884     nodelist = self.cfg.GetNodeList()
11885     nodelist.remove(self.dst_node.name)
11886
11887     # on one-node clusters nodelist will be empty after the removal
11888     # if we proceed the backup would be removed because OpBackupQuery
11889     # substitutes an empty list with the full cluster node list.
11890     iname = self.instance.name
11891     if nodelist:
11892       feedback_fn("Removing old exports for instance %s" % iname)
11893       exportlist = self.rpc.call_export_list(nodelist)
11894       for node in exportlist:
11895         if exportlist[node].fail_msg:
11896           continue
11897         if iname in exportlist[node].payload:
11898           msg = self.rpc.call_export_remove(node, iname).fail_msg
11899           if msg:
11900             self.LogWarning("Could not remove older export for instance %s"
11901                             " on node %s: %s", iname, node, msg)
11902
11903   def Exec(self, feedback_fn):
11904     """Export an instance to an image in the cluster.
11905
11906     """
11907     assert self.op.mode in constants.EXPORT_MODES
11908
11909     instance = self.instance
11910     src_node = instance.primary_node
11911
11912     if self.op.shutdown:
11913       # shutdown the instance, but not the disks
11914       feedback_fn("Shutting down instance %s" % instance.name)
11915       result = self.rpc.call_instance_shutdown(src_node, instance,
11916                                                self.op.shutdown_timeout)
11917       # TODO: Maybe ignore failures if ignore_remove_failures is set
11918       result.Raise("Could not shutdown instance %s on"
11919                    " node %s" % (instance.name, src_node))
11920
11921     # set the disks ID correctly since call_instance_start needs the
11922     # correct drbd minor to create the symlinks
11923     for disk in instance.disks:
11924       self.cfg.SetDiskID(disk, src_node)
11925
11926     activate_disks = (not instance.admin_up)
11927
11928     if activate_disks:
11929       # Activate the instance disks if we'exporting a stopped instance
11930       feedback_fn("Activating disks for %s" % instance.name)
11931       _StartInstanceDisks(self, instance, None)
11932
11933     try:
11934       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11935                                                      instance)
11936
11937       helper.CreateSnapshots()
11938       try:
11939         if (self.op.shutdown and instance.admin_up and
11940             not self.op.remove_instance):
11941           assert not activate_disks
11942           feedback_fn("Starting instance %s" % instance.name)
11943           result = self.rpc.call_instance_start(src_node,
11944                                                 (instance, None, None), False)
11945           msg = result.fail_msg
11946           if msg:
11947             feedback_fn("Failed to start instance: %s" % msg)
11948             _ShutdownInstanceDisks(self, instance)
11949             raise errors.OpExecError("Could not start instance: %s" % msg)
11950
11951         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11952           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11953         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11954           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11955           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11956
11957           (key_name, _, _) = self.x509_key_name
11958
11959           dest_ca_pem = \
11960             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11961                                             self.dest_x509_ca)
11962
11963           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11964                                                      key_name, dest_ca_pem,
11965                                                      timeouts)
11966       finally:
11967         helper.Cleanup()
11968
11969       # Check for backwards compatibility
11970       assert len(dresults) == len(instance.disks)
11971       assert compat.all(isinstance(i, bool) for i in dresults), \
11972              "Not all results are boolean: %r" % dresults
11973
11974     finally:
11975       if activate_disks:
11976         feedback_fn("Deactivating disks for %s" % instance.name)
11977         _ShutdownInstanceDisks(self, instance)
11978
11979     if not (compat.all(dresults) and fin_resu):
11980       failures = []
11981       if not fin_resu:
11982         failures.append("export finalization")
11983       if not compat.all(dresults):
11984         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11985                                if not dsk)
11986         failures.append("disk export: disk(s) %s" % fdsk)
11987
11988       raise errors.OpExecError("Export failed, errors in %s" %
11989                                utils.CommaJoin(failures))
11990
11991     # At this point, the export was successful, we can cleanup/finish
11992
11993     # Remove instance if requested
11994     if self.op.remove_instance:
11995       feedback_fn("Removing instance %s" % instance.name)
11996       _RemoveInstance(self, feedback_fn, instance,
11997                       self.op.ignore_remove_failures)
11998
11999     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12000       self._CleanupExports(feedback_fn)
12001
12002     return fin_resu, dresults
12003
12004
12005 class LUBackupRemove(NoHooksLU):
12006   """Remove exports related to the named instance.
12007
12008   """
12009   REQ_BGL = False
12010
12011   def ExpandNames(self):
12012     self.needed_locks = {}
12013     # We need all nodes to be locked in order for RemoveExport to work, but we
12014     # don't need to lock the instance itself, as nothing will happen to it (and
12015     # we can remove exports also for a removed instance)
12016     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12017
12018   def Exec(self, feedback_fn):
12019     """Remove any export.
12020
12021     """
12022     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12023     # If the instance was not found we'll try with the name that was passed in.
12024     # This will only work if it was an FQDN, though.
12025     fqdn_warn = False
12026     if not instance_name:
12027       fqdn_warn = True
12028       instance_name = self.op.instance_name
12029
12030     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12031     exportlist = self.rpc.call_export_list(locked_nodes)
12032     found = False
12033     for node in exportlist:
12034       msg = exportlist[node].fail_msg
12035       if msg:
12036         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12037         continue
12038       if instance_name in exportlist[node].payload:
12039         found = True
12040         result = self.rpc.call_export_remove(node, instance_name)
12041         msg = result.fail_msg
12042         if msg:
12043           logging.error("Could not remove export for instance %s"
12044                         " on node %s: %s", instance_name, node, msg)
12045
12046     if fqdn_warn and not found:
12047       feedback_fn("Export not found. If trying to remove an export belonging"
12048                   " to a deleted instance please use its Fully Qualified"
12049                   " Domain Name.")
12050
12051
12052 class LUGroupAdd(LogicalUnit):
12053   """Logical unit for creating node groups.
12054
12055   """
12056   HPATH = "group-add"
12057   HTYPE = constants.HTYPE_GROUP
12058   REQ_BGL = False
12059
12060   def ExpandNames(self):
12061     # We need the new group's UUID here so that we can create and acquire the
12062     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12063     # that it should not check whether the UUID exists in the configuration.
12064     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12065     self.needed_locks = {}
12066     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12067
12068   def CheckPrereq(self):
12069     """Check prerequisites.
12070
12071     This checks that the given group name is not an existing node group
12072     already.
12073
12074     """
12075     try:
12076       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12077     except errors.OpPrereqError:
12078       pass
12079     else:
12080       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12081                                  " node group (UUID: %s)" %
12082                                  (self.op.group_name, existing_uuid),
12083                                  errors.ECODE_EXISTS)
12084
12085     if self.op.ndparams:
12086       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12087
12088   def BuildHooksEnv(self):
12089     """Build hooks env.
12090
12091     """
12092     return {
12093       "GROUP_NAME": self.op.group_name,
12094       }
12095
12096   def BuildHooksNodes(self):
12097     """Build hooks nodes.
12098
12099     """
12100     mn = self.cfg.GetMasterNode()
12101     return ([mn], [mn])
12102
12103   def Exec(self, feedback_fn):
12104     """Add the node group to the cluster.
12105
12106     """
12107     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12108                                   uuid=self.group_uuid,
12109                                   alloc_policy=self.op.alloc_policy,
12110                                   ndparams=self.op.ndparams)
12111
12112     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12113     del self.remove_locks[locking.LEVEL_NODEGROUP]
12114
12115
12116 class LUGroupAssignNodes(NoHooksLU):
12117   """Logical unit for assigning nodes to groups.
12118
12119   """
12120   REQ_BGL = False
12121
12122   def ExpandNames(self):
12123     # These raise errors.OpPrereqError on their own:
12124     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12125     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12126
12127     # We want to lock all the affected nodes and groups. We have readily
12128     # available the list of nodes, and the *destination* group. To gather the
12129     # list of "source" groups, we need to fetch node information later on.
12130     self.needed_locks = {
12131       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12132       locking.LEVEL_NODE: self.op.nodes,
12133       }
12134
12135   def DeclareLocks(self, level):
12136     if level == locking.LEVEL_NODEGROUP:
12137       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12138
12139       # Try to get all affected nodes' groups without having the group or node
12140       # lock yet. Needs verification later in the code flow.
12141       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12142
12143       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12144
12145   def CheckPrereq(self):
12146     """Check prerequisites.
12147
12148     """
12149     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12150     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12151             frozenset(self.op.nodes))
12152
12153     expected_locks = (set([self.group_uuid]) |
12154                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12155     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12156     if actual_locks != expected_locks:
12157       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12158                                " current groups are '%s', used to be '%s'" %
12159                                (utils.CommaJoin(expected_locks),
12160                                 utils.CommaJoin(actual_locks)))
12161
12162     self.node_data = self.cfg.GetAllNodesInfo()
12163     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12164     instance_data = self.cfg.GetAllInstancesInfo()
12165
12166     if self.group is None:
12167       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12168                                (self.op.group_name, self.group_uuid))
12169
12170     (new_splits, previous_splits) = \
12171       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12172                                              for node in self.op.nodes],
12173                                             self.node_data, instance_data)
12174
12175     if new_splits:
12176       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12177
12178       if not self.op.force:
12179         raise errors.OpExecError("The following instances get split by this"
12180                                  " change and --force was not given: %s" %
12181                                  fmt_new_splits)
12182       else:
12183         self.LogWarning("This operation will split the following instances: %s",
12184                         fmt_new_splits)
12185
12186         if previous_splits:
12187           self.LogWarning("In addition, these already-split instances continue"
12188                           " to be split across groups: %s",
12189                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12190
12191   def Exec(self, feedback_fn):
12192     """Assign nodes to a new group.
12193
12194     """
12195     for node in self.op.nodes:
12196       self.node_data[node].group = self.group_uuid
12197
12198     # FIXME: Depends on side-effects of modifying the result of
12199     # C{cfg.GetAllNodesInfo}
12200
12201     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12202
12203   @staticmethod
12204   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12205     """Check for split instances after a node assignment.
12206
12207     This method considers a series of node assignments as an atomic operation,
12208     and returns information about split instances after applying the set of
12209     changes.
12210
12211     In particular, it returns information about newly split instances, and
12212     instances that were already split, and remain so after the change.
12213
12214     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12215     considered.
12216
12217     @type changes: list of (node_name, new_group_uuid) pairs.
12218     @param changes: list of node assignments to consider.
12219     @param node_data: a dict with data for all nodes
12220     @param instance_data: a dict with all instances to consider
12221     @rtype: a two-tuple
12222     @return: a list of instances that were previously okay and result split as a
12223       consequence of this change, and a list of instances that were previously
12224       split and this change does not fix.
12225
12226     """
12227     changed_nodes = dict((node, group) for node, group in changes
12228                          if node_data[node].group != group)
12229
12230     all_split_instances = set()
12231     previously_split_instances = set()
12232
12233     def InstanceNodes(instance):
12234       return [instance.primary_node] + list(instance.secondary_nodes)
12235
12236     for inst in instance_data.values():
12237       if inst.disk_template not in constants.DTS_INT_MIRROR:
12238         continue
12239
12240       instance_nodes = InstanceNodes(inst)
12241
12242       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12243         previously_split_instances.add(inst.name)
12244
12245       if len(set(changed_nodes.get(node, node_data[node].group)
12246                  for node in instance_nodes)) > 1:
12247         all_split_instances.add(inst.name)
12248
12249     return (list(all_split_instances - previously_split_instances),
12250             list(previously_split_instances & all_split_instances))
12251
12252
12253 class _GroupQuery(_QueryBase):
12254   FIELDS = query.GROUP_FIELDS
12255
12256   def ExpandNames(self, lu):
12257     lu.needed_locks = {}
12258
12259     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12260     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12261
12262     if not self.names:
12263       self.wanted = [name_to_uuid[name]
12264                      for name in utils.NiceSort(name_to_uuid.keys())]
12265     else:
12266       # Accept names to be either names or UUIDs.
12267       missing = []
12268       self.wanted = []
12269       all_uuid = frozenset(self._all_groups.keys())
12270
12271       for name in self.names:
12272         if name in all_uuid:
12273           self.wanted.append(name)
12274         elif name in name_to_uuid:
12275           self.wanted.append(name_to_uuid[name])
12276         else:
12277           missing.append(name)
12278
12279       if missing:
12280         raise errors.OpPrereqError("Some groups do not exist: %s" %
12281                                    utils.CommaJoin(missing),
12282                                    errors.ECODE_NOENT)
12283
12284   def DeclareLocks(self, lu, level):
12285     pass
12286
12287   def _GetQueryData(self, lu):
12288     """Computes the list of node groups and their attributes.
12289
12290     """
12291     do_nodes = query.GQ_NODE in self.requested_data
12292     do_instances = query.GQ_INST in self.requested_data
12293
12294     group_to_nodes = None
12295     group_to_instances = None
12296
12297     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12298     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12299     # latter GetAllInstancesInfo() is not enough, for we have to go through
12300     # instance->node. Hence, we will need to process nodes even if we only need
12301     # instance information.
12302     if do_nodes or do_instances:
12303       all_nodes = lu.cfg.GetAllNodesInfo()
12304       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12305       node_to_group = {}
12306
12307       for node in all_nodes.values():
12308         if node.group in group_to_nodes:
12309           group_to_nodes[node.group].append(node.name)
12310           node_to_group[node.name] = node.group
12311
12312       if do_instances:
12313         all_instances = lu.cfg.GetAllInstancesInfo()
12314         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12315
12316         for instance in all_instances.values():
12317           node = instance.primary_node
12318           if node in node_to_group:
12319             group_to_instances[node_to_group[node]].append(instance.name)
12320
12321         if not do_nodes:
12322           # Do not pass on node information if it was not requested.
12323           group_to_nodes = None
12324
12325     return query.GroupQueryData([self._all_groups[uuid]
12326                                  for uuid in self.wanted],
12327                                 group_to_nodes, group_to_instances)
12328
12329
12330 class LUGroupQuery(NoHooksLU):
12331   """Logical unit for querying node groups.
12332
12333   """
12334   REQ_BGL = False
12335
12336   def CheckArguments(self):
12337     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12338                           self.op.output_fields, False)
12339
12340   def ExpandNames(self):
12341     self.gq.ExpandNames(self)
12342
12343   def DeclareLocks(self, level):
12344     self.gq.DeclareLocks(self, level)
12345
12346   def Exec(self, feedback_fn):
12347     return self.gq.OldStyleQuery(self)
12348
12349
12350 class LUGroupSetParams(LogicalUnit):
12351   """Modifies the parameters of a node group.
12352
12353   """
12354   HPATH = "group-modify"
12355   HTYPE = constants.HTYPE_GROUP
12356   REQ_BGL = False
12357
12358   def CheckArguments(self):
12359     all_changes = [
12360       self.op.ndparams,
12361       self.op.alloc_policy,
12362       ]
12363
12364     if all_changes.count(None) == len(all_changes):
12365       raise errors.OpPrereqError("Please pass at least one modification",
12366                                  errors.ECODE_INVAL)
12367
12368   def ExpandNames(self):
12369     # This raises errors.OpPrereqError on its own:
12370     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12371
12372     self.needed_locks = {
12373       locking.LEVEL_NODEGROUP: [self.group_uuid],
12374       }
12375
12376   def CheckPrereq(self):
12377     """Check prerequisites.
12378
12379     """
12380     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12381
12382     if self.group is None:
12383       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12384                                (self.op.group_name, self.group_uuid))
12385
12386     if self.op.ndparams:
12387       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12388       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12389       self.new_ndparams = new_ndparams
12390
12391   def BuildHooksEnv(self):
12392     """Build hooks env.
12393
12394     """
12395     return {
12396       "GROUP_NAME": self.op.group_name,
12397       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12398       }
12399
12400   def BuildHooksNodes(self):
12401     """Build hooks nodes.
12402
12403     """
12404     mn = self.cfg.GetMasterNode()
12405     return ([mn], [mn])
12406
12407   def Exec(self, feedback_fn):
12408     """Modifies the node group.
12409
12410     """
12411     result = []
12412
12413     if self.op.ndparams:
12414       self.group.ndparams = self.new_ndparams
12415       result.append(("ndparams", str(self.group.ndparams)))
12416
12417     if self.op.alloc_policy:
12418       self.group.alloc_policy = self.op.alloc_policy
12419
12420     self.cfg.Update(self.group, feedback_fn)
12421     return result
12422
12423
12424 class LUGroupRemove(LogicalUnit):
12425   HPATH = "group-remove"
12426   HTYPE = constants.HTYPE_GROUP
12427   REQ_BGL = False
12428
12429   def ExpandNames(self):
12430     # This will raises errors.OpPrereqError on its own:
12431     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12432     self.needed_locks = {
12433       locking.LEVEL_NODEGROUP: [self.group_uuid],
12434       }
12435
12436   def CheckPrereq(self):
12437     """Check prerequisites.
12438
12439     This checks that the given group name exists as a node group, that is
12440     empty (i.e., contains no nodes), and that is not the last group of the
12441     cluster.
12442
12443     """
12444     # Verify that the group is empty.
12445     group_nodes = [node.name
12446                    for node in self.cfg.GetAllNodesInfo().values()
12447                    if node.group == self.group_uuid]
12448
12449     if group_nodes:
12450       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12451                                  " nodes: %s" %
12452                                  (self.op.group_name,
12453                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12454                                  errors.ECODE_STATE)
12455
12456     # Verify the cluster would not be left group-less.
12457     if len(self.cfg.GetNodeGroupList()) == 1:
12458       raise errors.OpPrereqError("Group '%s' is the only group,"
12459                                  " cannot be removed" %
12460                                  self.op.group_name,
12461                                  errors.ECODE_STATE)
12462
12463   def BuildHooksEnv(self):
12464     """Build hooks env.
12465
12466     """
12467     return {
12468       "GROUP_NAME": self.op.group_name,
12469       }
12470
12471   def BuildHooksNodes(self):
12472     """Build hooks nodes.
12473
12474     """
12475     mn = self.cfg.GetMasterNode()
12476     return ([mn], [mn])
12477
12478   def Exec(self, feedback_fn):
12479     """Remove the node group.
12480
12481     """
12482     try:
12483       self.cfg.RemoveNodeGroup(self.group_uuid)
12484     except errors.ConfigurationError:
12485       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12486                                (self.op.group_name, self.group_uuid))
12487
12488     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12489
12490
12491 class LUGroupRename(LogicalUnit):
12492   HPATH = "group-rename"
12493   HTYPE = constants.HTYPE_GROUP
12494   REQ_BGL = False
12495
12496   def ExpandNames(self):
12497     # This raises errors.OpPrereqError on its own:
12498     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12499
12500     self.needed_locks = {
12501       locking.LEVEL_NODEGROUP: [self.group_uuid],
12502       }
12503
12504   def CheckPrereq(self):
12505     """Check prerequisites.
12506
12507     Ensures requested new name is not yet used.
12508
12509     """
12510     try:
12511       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12512     except errors.OpPrereqError:
12513       pass
12514     else:
12515       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12516                                  " node group (UUID: %s)" %
12517                                  (self.op.new_name, new_name_uuid),
12518                                  errors.ECODE_EXISTS)
12519
12520   def BuildHooksEnv(self):
12521     """Build hooks env.
12522
12523     """
12524     return {
12525       "OLD_NAME": self.op.group_name,
12526       "NEW_NAME": self.op.new_name,
12527       }
12528
12529   def BuildHooksNodes(self):
12530     """Build hooks nodes.
12531
12532     """
12533     mn = self.cfg.GetMasterNode()
12534
12535     all_nodes = self.cfg.GetAllNodesInfo()
12536     all_nodes.pop(mn, None)
12537
12538     run_nodes = [mn]
12539     run_nodes.extend(node.name for node in all_nodes.values()
12540                      if node.group == self.group_uuid)
12541
12542     return (run_nodes, run_nodes)
12543
12544   def Exec(self, feedback_fn):
12545     """Rename the node group.
12546
12547     """
12548     group = self.cfg.GetNodeGroup(self.group_uuid)
12549
12550     if group is None:
12551       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12552                                (self.op.group_name, self.group_uuid))
12553
12554     group.name = self.op.new_name
12555     self.cfg.Update(group, feedback_fn)
12556
12557     return self.op.new_name
12558
12559
12560 class LUGroupEvacuate(LogicalUnit):
12561   HPATH = "group-evacuate"
12562   HTYPE = constants.HTYPE_GROUP
12563   REQ_BGL = False
12564
12565   def ExpandNames(self):
12566     # This raises errors.OpPrereqError on its own:
12567     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12568
12569     if self.op.target_groups:
12570       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12571                                   self.op.target_groups)
12572     else:
12573       self.req_target_uuids = []
12574
12575     if self.group_uuid in self.req_target_uuids:
12576       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12577                                  " as a target group (targets are %s)" %
12578                                  (self.group_uuid,
12579                                   utils.CommaJoin(self.req_target_uuids)),
12580                                  errors.ECODE_INVAL)
12581
12582     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12583
12584     self.share_locks = _ShareAll()
12585     self.needed_locks = {
12586       locking.LEVEL_INSTANCE: [],
12587       locking.LEVEL_NODEGROUP: [],
12588       locking.LEVEL_NODE: [],
12589       }
12590
12591   def DeclareLocks(self, level):
12592     if level == locking.LEVEL_INSTANCE:
12593       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12594
12595       # Lock instances optimistically, needs verification once node and group
12596       # locks have been acquired
12597       self.needed_locks[locking.LEVEL_INSTANCE] = \
12598         self.cfg.GetNodeGroupInstances(self.group_uuid)
12599
12600     elif level == locking.LEVEL_NODEGROUP:
12601       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12602
12603       if self.req_target_uuids:
12604         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12605
12606         # Lock all groups used by instances optimistically; this requires going
12607         # via the node before it's locked, requiring verification later on
12608         lock_groups.update(group_uuid
12609                            for instance_name in
12610                              self.owned_locks(locking.LEVEL_INSTANCE)
12611                            for group_uuid in
12612                              self.cfg.GetInstanceNodeGroups(instance_name))
12613       else:
12614         # No target groups, need to lock all of them
12615         lock_groups = locking.ALL_SET
12616
12617       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12618
12619     elif level == locking.LEVEL_NODE:
12620       # This will only lock the nodes in the group to be evacuated which
12621       # contain actual instances
12622       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12623       self._LockInstancesNodes()
12624
12625       # Lock all nodes in group to be evacuated and target groups
12626       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12627       assert self.group_uuid in owned_groups
12628       member_nodes = [node_name
12629                       for group in owned_groups
12630                       for node_name in self.cfg.GetNodeGroup(group).members]
12631       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12632
12633   def CheckPrereq(self):
12634     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12635     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12636     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12637
12638     assert owned_groups.issuperset(self.req_target_uuids)
12639     assert self.group_uuid in owned_groups
12640
12641     # Check if locked instances are still correct
12642     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12643
12644     # Get instance information
12645     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12646
12647     # Check if node groups for locked instances are still correct
12648     for instance_name in owned_instances:
12649       inst = self.instances[instance_name]
12650       assert owned_nodes.issuperset(inst.all_nodes), \
12651         "Instance %s's nodes changed while we kept the lock" % instance_name
12652
12653       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12654                                              owned_groups)
12655
12656       assert self.group_uuid in inst_groups, \
12657         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12658
12659     if self.req_target_uuids:
12660       # User requested specific target groups
12661       self.target_uuids = self.req_target_uuids
12662     else:
12663       # All groups except the one to be evacuated are potential targets
12664       self.target_uuids = [group_uuid for group_uuid in owned_groups
12665                            if group_uuid != self.group_uuid]
12666
12667       if not self.target_uuids:
12668         raise errors.OpPrereqError("There are no possible target groups",
12669                                    errors.ECODE_INVAL)
12670
12671   def BuildHooksEnv(self):
12672     """Build hooks env.
12673
12674     """
12675     return {
12676       "GROUP_NAME": self.op.group_name,
12677       "TARGET_GROUPS": " ".join(self.target_uuids),
12678       }
12679
12680   def BuildHooksNodes(self):
12681     """Build hooks nodes.
12682
12683     """
12684     mn = self.cfg.GetMasterNode()
12685
12686     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12687
12688     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12689
12690     return (run_nodes, run_nodes)
12691
12692   def Exec(self, feedback_fn):
12693     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12694
12695     assert self.group_uuid not in self.target_uuids
12696
12697     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12698                      instances=instances, target_groups=self.target_uuids)
12699
12700     ial.Run(self.op.iallocator)
12701
12702     if not ial.success:
12703       raise errors.OpPrereqError("Can't compute group evacuation using"
12704                                  " iallocator '%s': %s" %
12705                                  (self.op.iallocator, ial.info),
12706                                  errors.ECODE_NORES)
12707
12708     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12709
12710     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12711                  len(jobs), self.op.group_name)
12712
12713     return ResultWithJobs(jobs)
12714
12715
12716 class TagsLU(NoHooksLU): # pylint: disable=W0223
12717   """Generic tags LU.
12718
12719   This is an abstract class which is the parent of all the other tags LUs.
12720
12721   """
12722   def ExpandNames(self):
12723     self.group_uuid = None
12724     self.needed_locks = {}
12725     if self.op.kind == constants.TAG_NODE:
12726       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12727       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12728     elif self.op.kind == constants.TAG_INSTANCE:
12729       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12730       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12731     elif self.op.kind == constants.TAG_NODEGROUP:
12732       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12733
12734     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12735     # not possible to acquire the BGL based on opcode parameters)
12736
12737   def CheckPrereq(self):
12738     """Check prerequisites.
12739
12740     """
12741     if self.op.kind == constants.TAG_CLUSTER:
12742       self.target = self.cfg.GetClusterInfo()
12743     elif self.op.kind == constants.TAG_NODE:
12744       self.target = self.cfg.GetNodeInfo(self.op.name)
12745     elif self.op.kind == constants.TAG_INSTANCE:
12746       self.target = self.cfg.GetInstanceInfo(self.op.name)
12747     elif self.op.kind == constants.TAG_NODEGROUP:
12748       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12749     else:
12750       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12751                                  str(self.op.kind), errors.ECODE_INVAL)
12752
12753
12754 class LUTagsGet(TagsLU):
12755   """Returns the tags of a given object.
12756
12757   """
12758   REQ_BGL = False
12759
12760   def ExpandNames(self):
12761     TagsLU.ExpandNames(self)
12762
12763     # Share locks as this is only a read operation
12764     self.share_locks = _ShareAll()
12765
12766   def Exec(self, feedback_fn):
12767     """Returns the tag list.
12768
12769     """
12770     return list(self.target.GetTags())
12771
12772
12773 class LUTagsSearch(NoHooksLU):
12774   """Searches the tags for a given pattern.
12775
12776   """
12777   REQ_BGL = False
12778
12779   def ExpandNames(self):
12780     self.needed_locks = {}
12781
12782   def CheckPrereq(self):
12783     """Check prerequisites.
12784
12785     This checks the pattern passed for validity by compiling it.
12786
12787     """
12788     try:
12789       self.re = re.compile(self.op.pattern)
12790     except re.error, err:
12791       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12792                                  (self.op.pattern, err), errors.ECODE_INVAL)
12793
12794   def Exec(self, feedback_fn):
12795     """Returns the tag list.
12796
12797     """
12798     cfg = self.cfg
12799     tgts = [("/cluster", cfg.GetClusterInfo())]
12800     ilist = cfg.GetAllInstancesInfo().values()
12801     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12802     nlist = cfg.GetAllNodesInfo().values()
12803     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12804     tgts.extend(("/nodegroup/%s" % n.name, n)
12805                 for n in cfg.GetAllNodeGroupsInfo().values())
12806     results = []
12807     for path, target in tgts:
12808       for tag in target.GetTags():
12809         if self.re.search(tag):
12810           results.append((path, tag))
12811     return results
12812
12813
12814 class LUTagsSet(TagsLU):
12815   """Sets a tag on a given object.
12816
12817   """
12818   REQ_BGL = False
12819
12820   def CheckPrereq(self):
12821     """Check prerequisites.
12822
12823     This checks the type and length of the tag name and value.
12824
12825     """
12826     TagsLU.CheckPrereq(self)
12827     for tag in self.op.tags:
12828       objects.TaggableObject.ValidateTag(tag)
12829
12830   def Exec(self, feedback_fn):
12831     """Sets the tag.
12832
12833     """
12834     try:
12835       for tag in self.op.tags:
12836         self.target.AddTag(tag)
12837     except errors.TagError, err:
12838       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12839     self.cfg.Update(self.target, feedback_fn)
12840
12841
12842 class LUTagsDel(TagsLU):
12843   """Delete a list of tags from a given object.
12844
12845   """
12846   REQ_BGL = False
12847
12848   def CheckPrereq(self):
12849     """Check prerequisites.
12850
12851     This checks that we have the given tag.
12852
12853     """
12854     TagsLU.CheckPrereq(self)
12855     for tag in self.op.tags:
12856       objects.TaggableObject.ValidateTag(tag)
12857     del_tags = frozenset(self.op.tags)
12858     cur_tags = self.target.GetTags()
12859
12860     diff_tags = del_tags - cur_tags
12861     if diff_tags:
12862       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12863       raise errors.OpPrereqError("Tag(s) %s not found" %
12864                                  (utils.CommaJoin(diff_names), ),
12865                                  errors.ECODE_NOENT)
12866
12867   def Exec(self, feedback_fn):
12868     """Remove the tag from the object.
12869
12870     """
12871     for tag in self.op.tags:
12872       self.target.RemoveTag(tag)
12873     self.cfg.Update(self.target, feedback_fn)
12874
12875
12876 class LUTestDelay(NoHooksLU):
12877   """Sleep for a specified amount of time.
12878
12879   This LU sleeps on the master and/or nodes for a specified amount of
12880   time.
12881
12882   """
12883   REQ_BGL = False
12884
12885   def ExpandNames(self):
12886     """Expand names and set required locks.
12887
12888     This expands the node list, if any.
12889
12890     """
12891     self.needed_locks = {}
12892     if self.op.on_nodes:
12893       # _GetWantedNodes can be used here, but is not always appropriate to use
12894       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12895       # more information.
12896       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12897       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12898
12899   def _TestDelay(self):
12900     """Do the actual sleep.
12901
12902     """
12903     if self.op.on_master:
12904       if not utils.TestDelay(self.op.duration):
12905         raise errors.OpExecError("Error during master delay test")
12906     if self.op.on_nodes:
12907       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12908       for node, node_result in result.items():
12909         node_result.Raise("Failure during rpc call to node %s" % node)
12910
12911   def Exec(self, feedback_fn):
12912     """Execute the test delay opcode, with the wanted repetitions.
12913
12914     """
12915     if self.op.repeat == 0:
12916       self._TestDelay()
12917     else:
12918       top_value = self.op.repeat - 1
12919       for i in range(self.op.repeat):
12920         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12921         self._TestDelay()
12922
12923
12924 class LUTestJqueue(NoHooksLU):
12925   """Utility LU to test some aspects of the job queue.
12926
12927   """
12928   REQ_BGL = False
12929
12930   # Must be lower than default timeout for WaitForJobChange to see whether it
12931   # notices changed jobs
12932   _CLIENT_CONNECT_TIMEOUT = 20.0
12933   _CLIENT_CONFIRM_TIMEOUT = 60.0
12934
12935   @classmethod
12936   def _NotifyUsingSocket(cls, cb, errcls):
12937     """Opens a Unix socket and waits for another program to connect.
12938
12939     @type cb: callable
12940     @param cb: Callback to send socket name to client
12941     @type errcls: class
12942     @param errcls: Exception class to use for errors
12943
12944     """
12945     # Using a temporary directory as there's no easy way to create temporary
12946     # sockets without writing a custom loop around tempfile.mktemp and
12947     # socket.bind
12948     tmpdir = tempfile.mkdtemp()
12949     try:
12950       tmpsock = utils.PathJoin(tmpdir, "sock")
12951
12952       logging.debug("Creating temporary socket at %s", tmpsock)
12953       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12954       try:
12955         sock.bind(tmpsock)
12956         sock.listen(1)
12957
12958         # Send details to client
12959         cb(tmpsock)
12960
12961         # Wait for client to connect before continuing
12962         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12963         try:
12964           (conn, _) = sock.accept()
12965         except socket.error, err:
12966           raise errcls("Client didn't connect in time (%s)" % err)
12967       finally:
12968         sock.close()
12969     finally:
12970       # Remove as soon as client is connected
12971       shutil.rmtree(tmpdir)
12972
12973     # Wait for client to close
12974     try:
12975       try:
12976         # pylint: disable=E1101
12977         # Instance of '_socketobject' has no ... member
12978         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12979         conn.recv(1)
12980       except socket.error, err:
12981         raise errcls("Client failed to confirm notification (%s)" % err)
12982     finally:
12983       conn.close()
12984
12985   def _SendNotification(self, test, arg, sockname):
12986     """Sends a notification to the client.
12987
12988     @type test: string
12989     @param test: Test name
12990     @param arg: Test argument (depends on test)
12991     @type sockname: string
12992     @param sockname: Socket path
12993
12994     """
12995     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12996
12997   def _Notify(self, prereq, test, arg):
12998     """Notifies the client of a test.
12999
13000     @type prereq: bool
13001     @param prereq: Whether this is a prereq-phase test
13002     @type test: string
13003     @param test: Test name
13004     @param arg: Test argument (depends on test)
13005
13006     """
13007     if prereq:
13008       errcls = errors.OpPrereqError
13009     else:
13010       errcls = errors.OpExecError
13011
13012     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13013                                                   test, arg),
13014                                    errcls)
13015
13016   def CheckArguments(self):
13017     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13018     self.expandnames_calls = 0
13019
13020   def ExpandNames(self):
13021     checkargs_calls = getattr(self, "checkargs_calls", 0)
13022     if checkargs_calls < 1:
13023       raise errors.ProgrammerError("CheckArguments was not called")
13024
13025     self.expandnames_calls += 1
13026
13027     if self.op.notify_waitlock:
13028       self._Notify(True, constants.JQT_EXPANDNAMES, None)
13029
13030     self.LogInfo("Expanding names")
13031
13032     # Get lock on master node (just to get a lock, not for a particular reason)
13033     self.needed_locks = {
13034       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13035       }
13036
13037   def Exec(self, feedback_fn):
13038     if self.expandnames_calls < 1:
13039       raise errors.ProgrammerError("ExpandNames was not called")
13040
13041     if self.op.notify_exec:
13042       self._Notify(False, constants.JQT_EXEC, None)
13043
13044     self.LogInfo("Executing")
13045
13046     if self.op.log_messages:
13047       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13048       for idx, msg in enumerate(self.op.log_messages):
13049         self.LogInfo("Sending log message %s", idx + 1)
13050         feedback_fn(constants.JQT_MSGPREFIX + msg)
13051         # Report how many test messages have been sent
13052         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13053
13054     if self.op.fail:
13055       raise errors.OpExecError("Opcode failure was requested")
13056
13057     return True
13058
13059
13060 class IAllocator(object):
13061   """IAllocator framework.
13062
13063   An IAllocator instance has three sets of attributes:
13064     - cfg that is needed to query the cluster
13065     - input data (all members of the _KEYS class attribute are required)
13066     - four buffer attributes (in|out_data|text), that represent the
13067       input (to the external script) in text and data structure format,
13068       and the output from it, again in two formats
13069     - the result variables from the script (success, info, nodes) for
13070       easy usage
13071
13072   """
13073   # pylint: disable=R0902
13074   # lots of instance attributes
13075
13076   def __init__(self, cfg, rpc_runner, mode, **kwargs):
13077     self.cfg = cfg
13078     self.rpc = rpc_runner
13079     # init buffer variables
13080     self.in_text = self.out_text = self.in_data = self.out_data = None
13081     # init all input fields so that pylint is happy
13082     self.mode = mode
13083     self.memory = self.disks = self.disk_template = None
13084     self.os = self.tags = self.nics = self.vcpus = None
13085     self.hypervisor = None
13086     self.relocate_from = None
13087     self.name = None
13088     self.instances = None
13089     self.evac_mode = None
13090     self.target_groups = []
13091     # computed fields
13092     self.required_nodes = None
13093     # init result fields
13094     self.success = self.info = self.result = None
13095
13096     try:
13097       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13098     except KeyError:
13099       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13100                                    " IAllocator" % self.mode)
13101
13102     keyset = [n for (n, _) in keydata]
13103
13104     for key in kwargs:
13105       if key not in keyset:
13106         raise errors.ProgrammerError("Invalid input parameter '%s' to"
13107                                      " IAllocator" % key)
13108       setattr(self, key, kwargs[key])
13109
13110     for key in keyset:
13111       if key not in kwargs:
13112         raise errors.ProgrammerError("Missing input parameter '%s' to"
13113                                      " IAllocator" % key)
13114     self._BuildInputData(compat.partial(fn, self), keydata)
13115
13116   def _ComputeClusterData(self):
13117     """Compute the generic allocator input data.
13118
13119     This is the data that is independent of the actual operation.
13120
13121     """
13122     cfg = self.cfg
13123     cluster_info = cfg.GetClusterInfo()
13124     # cluster data
13125     data = {
13126       "version": constants.IALLOCATOR_VERSION,
13127       "cluster_name": cfg.GetClusterName(),
13128       "cluster_tags": list(cluster_info.GetTags()),
13129       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13130       # we don't have job IDs
13131       }
13132     ninfo = cfg.GetAllNodesInfo()
13133     iinfo = cfg.GetAllInstancesInfo().values()
13134     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13135
13136     # node data
13137     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13138
13139     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13140       hypervisor_name = self.hypervisor
13141     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13142       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13143     else:
13144       hypervisor_name = cluster_info.enabled_hypervisors[0]
13145
13146     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13147                                         hypervisor_name)
13148     node_iinfo = \
13149       self.rpc.call_all_instances_info(node_list,
13150                                        cluster_info.enabled_hypervisors)
13151
13152     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13153
13154     config_ndata = self._ComputeBasicNodeData(ninfo)
13155     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13156                                                  i_list, config_ndata)
13157     assert len(data["nodes"]) == len(ninfo), \
13158         "Incomplete node data computed"
13159
13160     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13161
13162     self.in_data = data
13163
13164   @staticmethod
13165   def _ComputeNodeGroupData(cfg):
13166     """Compute node groups data.
13167
13168     """
13169     ng = dict((guuid, {
13170       "name": gdata.name,
13171       "alloc_policy": gdata.alloc_policy,
13172       })
13173       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13174
13175     return ng
13176
13177   @staticmethod
13178   def _ComputeBasicNodeData(node_cfg):
13179     """Compute global node data.
13180
13181     @rtype: dict
13182     @returns: a dict of name: (node dict, node config)
13183
13184     """
13185     # fill in static (config-based) values
13186     node_results = dict((ninfo.name, {
13187       "tags": list(ninfo.GetTags()),
13188       "primary_ip": ninfo.primary_ip,
13189       "secondary_ip": ninfo.secondary_ip,
13190       "offline": ninfo.offline,
13191       "drained": ninfo.drained,
13192       "master_candidate": ninfo.master_candidate,
13193       "group": ninfo.group,
13194       "master_capable": ninfo.master_capable,
13195       "vm_capable": ninfo.vm_capable,
13196       })
13197       for ninfo in node_cfg.values())
13198
13199     return node_results
13200
13201   @staticmethod
13202   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13203                               node_results):
13204     """Compute global node data.
13205
13206     @param node_results: the basic node structures as filled from the config
13207
13208     """
13209     # make a copy of the current dict
13210     node_results = dict(node_results)
13211     for nname, nresult in node_data.items():
13212       assert nname in node_results, "Missing basic data for node %s" % nname
13213       ninfo = node_cfg[nname]
13214
13215       if not (ninfo.offline or ninfo.drained):
13216         nresult.Raise("Can't get data for node %s" % nname)
13217         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13218                                 nname)
13219         remote_info = nresult.payload
13220
13221         for attr in ["memory_total", "memory_free", "memory_dom0",
13222                      "vg_size", "vg_free", "cpu_total"]:
13223           if attr not in remote_info:
13224             raise errors.OpExecError("Node '%s' didn't return attribute"
13225                                      " '%s'" % (nname, attr))
13226           if not isinstance(remote_info[attr], int):
13227             raise errors.OpExecError("Node '%s' returned invalid value"
13228                                      " for '%s': %s" %
13229                                      (nname, attr, remote_info[attr]))
13230         # compute memory used by primary instances
13231         i_p_mem = i_p_up_mem = 0
13232         for iinfo, beinfo in i_list:
13233           if iinfo.primary_node == nname:
13234             i_p_mem += beinfo[constants.BE_MEMORY]
13235             if iinfo.name not in node_iinfo[nname].payload:
13236               i_used_mem = 0
13237             else:
13238               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13239             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13240             remote_info["memory_free"] -= max(0, i_mem_diff)
13241
13242             if iinfo.admin_up:
13243               i_p_up_mem += beinfo[constants.BE_MEMORY]
13244
13245         # compute memory used by instances
13246         pnr_dyn = {
13247           "total_memory": remote_info["memory_total"],
13248           "reserved_memory": remote_info["memory_dom0"],
13249           "free_memory": remote_info["memory_free"],
13250           "total_disk": remote_info["vg_size"],
13251           "free_disk": remote_info["vg_free"],
13252           "total_cpus": remote_info["cpu_total"],
13253           "i_pri_memory": i_p_mem,
13254           "i_pri_up_memory": i_p_up_mem,
13255           }
13256         pnr_dyn.update(node_results[nname])
13257         node_results[nname] = pnr_dyn
13258
13259     return node_results
13260
13261   @staticmethod
13262   def _ComputeInstanceData(cluster_info, i_list):
13263     """Compute global instance data.
13264
13265     """
13266     instance_data = {}
13267     for iinfo, beinfo in i_list:
13268       nic_data = []
13269       for nic in iinfo.nics:
13270         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13271         nic_dict = {
13272           "mac": nic.mac,
13273           "ip": nic.ip,
13274           "mode": filled_params[constants.NIC_MODE],
13275           "link": filled_params[constants.NIC_LINK],
13276           }
13277         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13278           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13279         nic_data.append(nic_dict)
13280       pir = {
13281         "tags": list(iinfo.GetTags()),
13282         "admin_up": iinfo.admin_up,
13283         "vcpus": beinfo[constants.BE_VCPUS],
13284         "memory": beinfo[constants.BE_MEMORY],
13285         "os": iinfo.os,
13286         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13287         "nics": nic_data,
13288         "disks": [{constants.IDISK_SIZE: dsk.size,
13289                    constants.IDISK_MODE: dsk.mode}
13290                   for dsk in iinfo.disks],
13291         "disk_template": iinfo.disk_template,
13292         "hypervisor": iinfo.hypervisor,
13293         }
13294       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13295                                                  pir["disks"])
13296       instance_data[iinfo.name] = pir
13297
13298     return instance_data
13299
13300   def _AddNewInstance(self):
13301     """Add new instance data to allocator structure.
13302
13303     This in combination with _AllocatorGetClusterData will create the
13304     correct structure needed as input for the allocator.
13305
13306     The checks for the completeness of the opcode must have already been
13307     done.
13308
13309     """
13310     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13311
13312     if self.disk_template in constants.DTS_INT_MIRROR:
13313       self.required_nodes = 2
13314     else:
13315       self.required_nodes = 1
13316
13317     request = {
13318       "name": self.name,
13319       "disk_template": self.disk_template,
13320       "tags": self.tags,
13321       "os": self.os,
13322       "vcpus": self.vcpus,
13323       "memory": self.memory,
13324       "disks": self.disks,
13325       "disk_space_total": disk_space,
13326       "nics": self.nics,
13327       "required_nodes": self.required_nodes,
13328       "hypervisor": self.hypervisor,
13329       }
13330
13331     return request
13332
13333   def _AddRelocateInstance(self):
13334     """Add relocate instance data to allocator structure.
13335
13336     This in combination with _IAllocatorGetClusterData will create the
13337     correct structure needed as input for the allocator.
13338
13339     The checks for the completeness of the opcode must have already been
13340     done.
13341
13342     """
13343     instance = self.cfg.GetInstanceInfo(self.name)
13344     if instance is None:
13345       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13346                                    " IAllocator" % self.name)
13347
13348     if instance.disk_template not in constants.DTS_MIRRORED:
13349       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13350                                  errors.ECODE_INVAL)
13351
13352     if instance.disk_template in constants.DTS_INT_MIRROR and \
13353         len(instance.secondary_nodes) != 1:
13354       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13355                                  errors.ECODE_STATE)
13356
13357     self.required_nodes = 1
13358     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13359     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13360
13361     request = {
13362       "name": self.name,
13363       "disk_space_total": disk_space,
13364       "required_nodes": self.required_nodes,
13365       "relocate_from": self.relocate_from,
13366       }
13367     return request
13368
13369   def _AddNodeEvacuate(self):
13370     """Get data for node-evacuate requests.
13371
13372     """
13373     return {
13374       "instances": self.instances,
13375       "evac_mode": self.evac_mode,
13376       }
13377
13378   def _AddChangeGroup(self):
13379     """Get data for node-evacuate requests.
13380
13381     """
13382     return {
13383       "instances": self.instances,
13384       "target_groups": self.target_groups,
13385       }
13386
13387   def _BuildInputData(self, fn, keydata):
13388     """Build input data structures.
13389
13390     """
13391     self._ComputeClusterData()
13392
13393     request = fn()
13394     request["type"] = self.mode
13395     for keyname, keytype in keydata:
13396       if keyname not in request:
13397         raise errors.ProgrammerError("Request parameter %s is missing" %
13398                                      keyname)
13399       val = request[keyname]
13400       if not keytype(val):
13401         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13402                                      " validation, value %s, expected"
13403                                      " type %s" % (keyname, val, keytype))
13404     self.in_data["request"] = request
13405
13406     self.in_text = serializer.Dump(self.in_data)
13407
13408   _STRING_LIST = ht.TListOf(ht.TString)
13409   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13410      # pylint: disable=E1101
13411      # Class '...' has no 'OP_ID' member
13412      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13413                           opcodes.OpInstanceMigrate.OP_ID,
13414                           opcodes.OpInstanceReplaceDisks.OP_ID])
13415      })))
13416
13417   _NEVAC_MOVED = \
13418     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13419                        ht.TItems([ht.TNonEmptyString,
13420                                   ht.TNonEmptyString,
13421                                   ht.TListOf(ht.TNonEmptyString),
13422                                  ])))
13423   _NEVAC_FAILED = \
13424     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13425                        ht.TItems([ht.TNonEmptyString,
13426                                   ht.TMaybeString,
13427                                  ])))
13428   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13429                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13430
13431   _MODE_DATA = {
13432     constants.IALLOCATOR_MODE_ALLOC:
13433       (_AddNewInstance,
13434        [
13435         ("name", ht.TString),
13436         ("memory", ht.TInt),
13437         ("disks", ht.TListOf(ht.TDict)),
13438         ("disk_template", ht.TString),
13439         ("os", ht.TString),
13440         ("tags", _STRING_LIST),
13441         ("nics", ht.TListOf(ht.TDict)),
13442         ("vcpus", ht.TInt),
13443         ("hypervisor", ht.TString),
13444         ], ht.TList),
13445     constants.IALLOCATOR_MODE_RELOC:
13446       (_AddRelocateInstance,
13447        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13448        ht.TList),
13449      constants.IALLOCATOR_MODE_NODE_EVAC:
13450       (_AddNodeEvacuate, [
13451         ("instances", _STRING_LIST),
13452         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13453         ], _NEVAC_RESULT),
13454      constants.IALLOCATOR_MODE_CHG_GROUP:
13455       (_AddChangeGroup, [
13456         ("instances", _STRING_LIST),
13457         ("target_groups", _STRING_LIST),
13458         ], _NEVAC_RESULT),
13459     }
13460
13461   def Run(self, name, validate=True, call_fn=None):
13462     """Run an instance allocator and return the results.
13463
13464     """
13465     if call_fn is None:
13466       call_fn = self.rpc.call_iallocator_runner
13467
13468     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13469     result.Raise("Failure while running the iallocator script")
13470
13471     self.out_text = result.payload
13472     if validate:
13473       self._ValidateResult()
13474
13475   def _ValidateResult(self):
13476     """Process the allocator results.
13477
13478     This will process and if successful save the result in
13479     self.out_data and the other parameters.
13480
13481     """
13482     try:
13483       rdict = serializer.Load(self.out_text)
13484     except Exception, err:
13485       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13486
13487     if not isinstance(rdict, dict):
13488       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13489
13490     # TODO: remove backwards compatiblity in later versions
13491     if "nodes" in rdict and "result" not in rdict:
13492       rdict["result"] = rdict["nodes"]
13493       del rdict["nodes"]
13494
13495     for key in "success", "info", "result":
13496       if key not in rdict:
13497         raise errors.OpExecError("Can't parse iallocator results:"
13498                                  " missing key '%s'" % key)
13499       setattr(self, key, rdict[key])
13500
13501     if not self._result_check(self.result):
13502       raise errors.OpExecError("Iallocator returned invalid result,"
13503                                " expected %s, got %s" %
13504                                (self._result_check, self.result),
13505                                errors.ECODE_INVAL)
13506
13507     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13508       assert self.relocate_from is not None
13509       assert self.required_nodes == 1
13510
13511       node2group = dict((name, ndata["group"])
13512                         for (name, ndata) in self.in_data["nodes"].items())
13513
13514       fn = compat.partial(self._NodesToGroups, node2group,
13515                           self.in_data["nodegroups"])
13516
13517       instance = self.cfg.GetInstanceInfo(self.name)
13518       request_groups = fn(self.relocate_from + [instance.primary_node])
13519       result_groups = fn(rdict["result"] + [instance.primary_node])
13520
13521       if self.success and not set(result_groups).issubset(request_groups):
13522         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13523                                  " differ from original groups (%s)" %
13524                                  (utils.CommaJoin(result_groups),
13525                                   utils.CommaJoin(request_groups)))
13526
13527     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13528       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13529
13530     self.out_data = rdict
13531
13532   @staticmethod
13533   def _NodesToGroups(node2group, groups, nodes):
13534     """Returns a list of unique group names for a list of nodes.
13535
13536     @type node2group: dict
13537     @param node2group: Map from node name to group UUID
13538     @type groups: dict
13539     @param groups: Group information
13540     @type nodes: list
13541     @param nodes: Node names
13542
13543     """
13544     result = set()
13545
13546     for node in nodes:
13547       try:
13548         group_uuid = node2group[node]
13549       except KeyError:
13550         # Ignore unknown node
13551         pass
13552       else:
13553         try:
13554           group = groups[group_uuid]
13555         except KeyError:
13556           # Can't find group, let's use UUID
13557           group_name = group_uuid
13558         else:
13559           group_name = group["name"]
13560
13561         result.add(group_name)
13562
13563     return sorted(result)
13564
13565
13566 class LUTestAllocator(NoHooksLU):
13567   """Run allocator tests.
13568
13569   This LU runs the allocator tests
13570
13571   """
13572   def CheckPrereq(self):
13573     """Check prerequisites.
13574
13575     This checks the opcode parameters depending on the director and mode test.
13576
13577     """
13578     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13579       for attr in ["memory", "disks", "disk_template",
13580                    "os", "tags", "nics", "vcpus"]:
13581         if not hasattr(self.op, attr):
13582           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13583                                      attr, errors.ECODE_INVAL)
13584       iname = self.cfg.ExpandInstanceName(self.op.name)
13585       if iname is not None:
13586         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13587                                    iname, errors.ECODE_EXISTS)
13588       if not isinstance(self.op.nics, list):
13589         raise errors.OpPrereqError("Invalid parameter 'nics'",
13590                                    errors.ECODE_INVAL)
13591       if not isinstance(self.op.disks, list):
13592         raise errors.OpPrereqError("Invalid parameter 'disks'",
13593                                    errors.ECODE_INVAL)
13594       for row in self.op.disks:
13595         if (not isinstance(row, dict) or
13596             constants.IDISK_SIZE not in row or
13597             not isinstance(row[constants.IDISK_SIZE], int) or
13598             constants.IDISK_MODE not in row or
13599             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13600           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13601                                      " parameter", errors.ECODE_INVAL)
13602       if self.op.hypervisor is None:
13603         self.op.hypervisor = self.cfg.GetHypervisorType()
13604     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13605       fname = _ExpandInstanceName(self.cfg, self.op.name)
13606       self.op.name = fname
13607       self.relocate_from = \
13608           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13609     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13610                           constants.IALLOCATOR_MODE_NODE_EVAC):
13611       if not self.op.instances:
13612         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13613       self.op.instances = _GetWantedInstances(self, self.op.instances)
13614     else:
13615       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13616                                  self.op.mode, errors.ECODE_INVAL)
13617
13618     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13619       if self.op.allocator is None:
13620         raise errors.OpPrereqError("Missing allocator name",
13621                                    errors.ECODE_INVAL)
13622     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13623       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13624                                  self.op.direction, errors.ECODE_INVAL)
13625
13626   def Exec(self, feedback_fn):
13627     """Run the allocator test.
13628
13629     """
13630     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13631       ial = IAllocator(self.cfg, self.rpc,
13632                        mode=self.op.mode,
13633                        name=self.op.name,
13634                        memory=self.op.memory,
13635                        disks=self.op.disks,
13636                        disk_template=self.op.disk_template,
13637                        os=self.op.os,
13638                        tags=self.op.tags,
13639                        nics=self.op.nics,
13640                        vcpus=self.op.vcpus,
13641                        hypervisor=self.op.hypervisor,
13642                        )
13643     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13644       ial = IAllocator(self.cfg, self.rpc,
13645                        mode=self.op.mode,
13646                        name=self.op.name,
13647                        relocate_from=list(self.relocate_from),
13648                        )
13649     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13650       ial = IAllocator(self.cfg, self.rpc,
13651                        mode=self.op.mode,
13652                        instances=self.op.instances,
13653                        target_groups=self.op.target_groups)
13654     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13655       ial = IAllocator(self.cfg, self.rpc,
13656                        mode=self.op.mode,
13657                        instances=self.op.instances,
13658                        evac_mode=self.op.evac_mode)
13659     else:
13660       raise errors.ProgrammerError("Uncatched mode %s in"
13661                                    " LUTestAllocator.Exec", self.op.mode)
13662
13663     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13664       result = ial.in_text
13665     else:
13666       ial.Run(self.op.allocator, validate=False)
13667       result = ial.out_text
13668     return result
13669
13670
13671 #: Query type implementations
13672 _QUERY_IMPL = {
13673   constants.QR_INSTANCE: _InstanceQuery,
13674   constants.QR_NODE: _NodeQuery,
13675   constants.QR_GROUP: _GroupQuery,
13676   constants.QR_OS: _OsQuery,
13677   }
13678
13679 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13680
13681
13682 def _GetQueryImplementation(name):
13683   """Returns the implemtnation for a query type.
13684
13685   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13686
13687   """
13688   try:
13689     return _QUERY_IMPL[name]
13690   except KeyError:
13691     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13692                                errors.ECODE_INVAL)