code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70
  71 class ResultWithJobs:
  72   """Data container for LU results with jobs.
  73
  74   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  75   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  76   contained in the C{jobs} attribute and include the job IDs in the opcode
  77   result.
  78
  79   """
  80   def __init__(self, jobs, **kwargs):
  81     """Initializes this class.
  82
  83     Additional return values can be specified as keyword arguments.
  84
  85     @type jobs: list of lists of L{opcode.OpCode}
  86     @param jobs: A list of lists of opcode objects
  87
  88     """
  89     self.jobs = jobs
  90     self.other = kwargs
  91
  92
  93 class LogicalUnit(object):
  94   """Logical Unit base class.
  95
  96   Subclasses must follow these rules:
  97     - implement ExpandNames
  98     - implement CheckPrereq (except when tasklets are used)
  99     - implement Exec (except when tasklets are used)
 100     - implement BuildHooksEnv
 101     - implement BuildHooksNodes
 102     - redefine HPATH and HTYPE
 103     - optionally redefine their run requirements:
 104         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 105
 106   Note that all commands require root permissions.
 107
 108   @ivar dry_run_result: the value (if any) that will be returned to the caller
 109       in dry-run mode (signalled by opcode dry_run parameter)
 110
 111   """
 112   HPATH = None
 113   HTYPE = None
 114   REQ_BGL = True
 115
 116   def __init__(self, processor, op, context, rpc_runner):
 117     """Constructor for LogicalUnit.
 118
 119     This needs to be overridden in derived classes in order to check op
 120     validity.
 121
 122     """
 123     self.proc = processor
 124     self.op = op
 125     self.cfg = context.cfg
 126     self.glm = context.glm
 127     # readability alias
 128     self.owned_locks = context.glm.list_owned
 129     self.context = context
 130     self.rpc = rpc_runner
 131     # Dicts used to declare locking needs to mcpu
 132     self.needed_locks = None
 133     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 134     self.add_locks = {}
 135     self.remove_locks = {}
 136     # Used to force good behavior when calling helper functions
 137     self.recalculate_locks = {}
 138     # logging
 139     self.Log = processor.Log # pylint: disable=C0103
 140     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 141     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 142     self.LogStep = processor.LogStep # pylint: disable=C0103
 143     # support for dry-run
 144     self.dry_run_result = None
 145     # support for generic debug attribute
 146     if (not hasattr(self.op, "debug_level") or
 147         not isinstance(self.op.debug_level, int)):
 148       self.op.debug_level = 0
 149
 150     # Tasklets
 151     self.tasklets = None
 152
 153     # Validate opcode parameters and set defaults
 154     self.op.Validate(True)
 155
 156     self.CheckArguments()
 157
 158   def CheckArguments(self):
 159     """Check syntactic validity for the opcode arguments.
 160
 161     This method is for doing a simple syntactic check and ensure
 162     validity of opcode parameters, without any cluster-related
 163     checks. While the same can be accomplished in ExpandNames and/or
 164     CheckPrereq, doing these separate is better because:
 165
 166       - ExpandNames is left as as purely a lock-related function
 167       - CheckPrereq is run after we have acquired locks (and possible
 168         waited for them)
 169
 170     The function is allowed to change the self.op attribute so that
 171     later methods can no longer worry about missing parameters.
 172
 173     """
 174     pass
 175
 176   def ExpandNames(self):
 177     """Expand names for this LU.
 178
 179     This method is called before starting to execute the opcode, and it should
 180     update all the parameters of the opcode to their canonical form (e.g. a
 181     short node name must be fully expanded after this method has successfully
 182     completed). This way locking, hooks, logging, etc. can work correctly.
 183
 184     LUs which implement this method must also populate the self.needed_locks
 185     member, as a dict with lock levels as keys, and a list of needed lock names
 186     as values. Rules:
 187
 188       - use an empty dict if you don't need any lock
 189       - if you don't need any lock at a particular level omit that level
 190       - don't put anything for the BGL level
 191       - if you want all locks at a level use locking.ALL_SET as a value
 192
 193     If you need to share locks (rather than acquire them exclusively) at one
 194     level you can modify self.share_locks, setting a true value (usually 1) for
 195     that level. By default locks are not shared.
 196
 197     This function can also define a list of tasklets, which then will be
 198     executed in order instead of the usual LU-level CheckPrereq and Exec
 199     functions, if those are not defined by the LU.
 200
 201     Examples::
 202
 203       # Acquire all nodes and one instance
 204       self.needed_locks = {
 205         locking.LEVEL_NODE: locking.ALL_SET,
 206         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 207       }
 208       # Acquire just two nodes
 209       self.needed_locks = {
 210         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 211       }
 212       # Acquire no locks
 213       self.needed_locks = {} # No, you can't leave it to the default value None
 214
 215     """
 216     # The implementation of this method is mandatory only if the new LU is
 217     # concurrent, so that old LUs don't need to be changed all at the same
 218     # time.
 219     if self.REQ_BGL:
 220       self.needed_locks = {} # Exclusive LUs don't need locks.
 221     else:
 222       raise NotImplementedError
 223
 224   def DeclareLocks(self, level):
 225     """Declare LU locking needs for a level
 226
 227     While most LUs can just declare their locking needs at ExpandNames time,
 228     sometimes there's the need to calculate some locks after having acquired
 229     the ones before. This function is called just before acquiring locks at a
 230     particular level, but after acquiring the ones at lower levels, and permits
 231     such calculations. It can be used to modify self.needed_locks, and by
 232     default it does nothing.
 233
 234     This function is only called if you have something already set in
 235     self.needed_locks for the level.
 236
 237     @param level: Locking level which is going to be locked
 238     @type level: member of ganeti.locking.LEVELS
 239
 240     """
 241
 242   def CheckPrereq(self):
 243     """Check prerequisites for this LU.
 244
 245     This method should check that the prerequisites for the execution
 246     of this LU are fulfilled. It can do internode communication, but
 247     it should be idempotent - no cluster or system changes are
 248     allowed.
 249
 250     The method should raise errors.OpPrereqError in case something is
 251     not fulfilled. Its return value is ignored.
 252
 253     This method should also update all the parameters of the opcode to
 254     their canonical form if it hasn't been done by ExpandNames before.
 255
 256     """
 257     if self.tasklets is not None:
 258       for (idx, tl) in enumerate(self.tasklets):
 259         logging.debug("Checking prerequisites for tasklet %s/%s",
 260                       idx + 1, len(self.tasklets))
 261         tl.CheckPrereq()
 262     else:
 263       pass
 264
 265   def Exec(self, feedback_fn):
 266     """Execute the LU.
 267
 268     This method should implement the actual work. It should raise
 269     errors.OpExecError for failures that are somewhat dealt with in
 270     code, or expected.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 276         tl.Exec(feedback_fn)
 277     else:
 278       raise NotImplementedError
 279
 280   def BuildHooksEnv(self):
 281     """Build hooks environment for this LU.
 282
 283     @rtype: dict
 284     @return: Dictionary containing the environment that will be used for
 285       running the hooks for this LU. The keys of the dict must not be prefixed
 286       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 287       will extend the environment with additional variables. If no environment
 288       should be defined, an empty dictionary should be returned (not C{None}).
 289     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 290       will not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def BuildHooksNodes(self):
 296     """Build list of nodes to run LU's hooks.
 297
 298     @rtype: tuple; (list, list)
 299     @return: Tuple containing a list of node names on which the hook
 300       should run before the execution and a list of node names on which the
 301       hook should run after the execution. No nodes should be returned as an
 302       empty list (and not None).
 303     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 304       will not be called.
 305
 306     """
 307     raise NotImplementedError
 308
 309   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 310     """Notify the LU about the results of its hooks.
 311
 312     This method is called every time a hooks phase is executed, and notifies
 313     the Logical Unit about the hooks' result. The LU can then use it to alter
 314     its result based on the hooks.  By default the method does nothing and the
 315     previous result is passed back unchanged but any LU can define it if it
 316     wants to use the local cluster hook-scripts somehow.
 317
 318     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 319         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 320     @param hook_results: the results of the multi-node hooks rpc call
 321     @param feedback_fn: function used send feedback back to the caller
 322     @param lu_result: the previous Exec result this LU had, or None
 323         in the PRE phase
 324     @return: the new Exec result, based on the previous result
 325         and hook results
 326
 327     """
 328     # API must be kept, thus we ignore the unused argument and could
 329     # be a function warnings
 330     # pylint: disable=W0613,R0201
 331     return lu_result
 332
 333   def _ExpandAndLockInstance(self):
 334     """Helper function to expand and lock an instance.
 335
 336     Many LUs that work on an instance take its name in self.op.instance_name
 337     and need to expand it and then declare the expanded name for locking. This
 338     function does it, and then updates self.op.instance_name to the expanded
 339     name. It also initializes needed_locks as a dict, if this hasn't been done
 340     before.
 341
 342     """
 343     if self.needed_locks is None:
 344       self.needed_locks = {}
 345     else:
 346       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 347         "_ExpandAndLockInstance called with instance-level locks set"
 348     self.op.instance_name = _ExpandInstanceName(self.cfg,
 349                                                 self.op.instance_name)
 350     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 351
 352   def _LockInstancesNodes(self, primary_only=False):
 353     """Helper function to declare instances' nodes for locking.
 354
 355     This function should be called after locking one or more instances to lock
 356     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 357     with all primary or secondary nodes for instances already locked and
 358     present in self.needed_locks[locking.LEVEL_INSTANCE].
 359
 360     It should be called from DeclareLocks, and for safety only works if
 361     self.recalculate_locks[locking.LEVEL_NODE] is set.
 362
 363     In the future it may grow parameters to just lock some instance's nodes, or
 364     to just lock primaries or secondary nodes, if needed.
 365
 366     If should be called in DeclareLocks in a way similar to::
 367
 368       if level == locking.LEVEL_NODE:
 369         self._LockInstancesNodes()
 370
 371     @type primary_only: boolean
 372     @param primary_only: only lock primary nodes of locked instances
 373
 374     """
 375     assert locking.LEVEL_NODE in self.recalculate_locks, \
 376       "_LockInstancesNodes helper function called with no nodes to recalculate"
 377
 378     # TODO: check if we're really been called with the instance locks held
 379
 380     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 381     # future we might want to have different behaviors depending on the value
 382     # of self.recalculate_locks[locking.LEVEL_NODE]
 383     wanted_nodes = []
 384     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 385     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 386       wanted_nodes.append(instance.primary_node)
 387       if not primary_only:
 388         wanted_nodes.extend(instance.secondary_nodes)
 389
 390     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 391       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 392     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 393       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 394
 395     del self.recalculate_locks[locking.LEVEL_NODE]
 396
 397
 398 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 399   """Simple LU which runs no hooks.
 400
 401   This LU is intended as a parent for other LogicalUnits which will
 402   run no hooks, in order to reduce duplicate code.
 403
 404   """
 405   HPATH = None
 406   HTYPE = None
 407
 408   def BuildHooksEnv(self):
 409     """Empty BuildHooksEnv for NoHooksLu.
 410
 411     This just raises an error.
 412
 413     """
 414     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 415
 416   def BuildHooksNodes(self):
 417     """Empty BuildHooksNodes for NoHooksLU.
 418
 419     """
 420     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 421
 422
 423 class Tasklet:
 424   """Tasklet base class.
 425
 426   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 427   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 428   tasklets know nothing about locks.
 429
 430   Subclasses must follow these rules:
 431     - Implement CheckPrereq
 432     - Implement Exec
 433
 434   """
 435   def __init__(self, lu):
 436     self.lu = lu
 437
 438     # Shortcuts
 439     self.cfg = lu.cfg
 440     self.rpc = lu.rpc
 441
 442   def CheckPrereq(self):
 443     """Check prerequisites for this tasklets.
 444
 445     This method should check whether the prerequisites for the execution of
 446     this tasklet are fulfilled. It can do internode communication, but it
 447     should be idempotent - no cluster or system changes are allowed.
 448
 449     The method should raise errors.OpPrereqError in case something is not
 450     fulfilled. Its return value is ignored.
 451
 452     This method should also update all parameters to their canonical form if it
 453     hasn't been done before.
 454
 455     """
 456     pass
 457
 458   def Exec(self, feedback_fn):
 459     """Execute the tasklet.
 460
 461     This method should implement the actual work. It should raise
 462     errors.OpExecError for failures that are somewhat dealt with in code, or
 463     expected.
 464
 465     """
 466     raise NotImplementedError
 467
 468
 469 class _QueryBase:
 470   """Base for query utility classes.
 471
 472   """
 473   #: Attribute holding field definitions
 474   FIELDS = None
 475
 476   def __init__(self, qfilter, fields, use_locking):
 477     """Initializes this class.
 478
 479     """
 480     self.use_locking = use_locking
 481
 482     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 483                              namefield="name")
 484     self.requested_data = self.query.RequestedData()
 485     self.names = self.query.RequestedNames()
 486
 487     # Sort only if no names were requested
 488     self.sort_by_name = not self.names
 489
 490     self.do_locking = None
 491     self.wanted = None
 492
 493   def _GetNames(self, lu, all_names, lock_level):
 494     """Helper function to determine names asked for in the query.
 495
 496     """
 497     if self.do_locking:
 498       names = lu.owned_locks(lock_level)
 499     else:
 500       names = all_names
 501
 502     if self.wanted == locking.ALL_SET:
 503       assert not self.names
 504       # caller didn't specify names, so ordering is not important
 505       return utils.NiceSort(names)
 506
 507     # caller specified names and we must keep the same order
 508     assert self.names
 509     assert not self.do_locking or lu.glm.is_owned(lock_level)
 510
 511     missing = set(self.wanted).difference(names)
 512     if missing:
 513       raise errors.OpExecError("Some items were removed before retrieving"
 514                                " their data: %s" % missing)
 515
 516     # Return expanded names
 517     return self.wanted
 518
 519   def ExpandNames(self, lu):
 520     """Expand names for this query.
 521
 522     See L{LogicalUnit.ExpandNames}.
 523
 524     """
 525     raise NotImplementedError()
 526
 527   def DeclareLocks(self, lu, level):
 528     """Declare locks for this query.
 529
 530     See L{LogicalUnit.DeclareLocks}.
 531
 532     """
 533     raise NotImplementedError()
 534
 535   def _GetQueryData(self, lu):
 536     """Collects all data for this query.
 537
 538     @return: Query data object
 539
 540     """
 541     raise NotImplementedError()
 542
 543   def NewStyleQuery(self, lu):
 544     """Collect data and execute query.
 545
 546     """
 547     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 548                                   sort_by_name=self.sort_by_name)
 549
 550   def OldStyleQuery(self, lu):
 551     """Collect data and execute query.
 552
 553     """
 554     return self.query.OldStyleQuery(self._GetQueryData(lu),
 555                                     sort_by_name=self.sort_by_name)
 556
 557
 558 def _ShareAll():
 559   """Returns a dict declaring all lock levels shared.
 560
 561   """
 562   return dict.fromkeys(locking.LEVELS, 1)
 563
 564
 565 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 566   """Checks if the owned node groups are still correct for an instance.
 567
 568   @type cfg: L{config.ConfigWriter}
 569   @param cfg: The cluster configuration
 570   @type instance_name: string
 571   @param instance_name: Instance name
 572   @type owned_groups: set or frozenset
 573   @param owned_groups: List of currently owned node groups
 574
 575   """
 576   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 577
 578   if not owned_groups.issuperset(inst_groups):
 579     raise errors.OpPrereqError("Instance %s's node groups changed since"
 580                                " locks were acquired, current groups are"
 581                                " are '%s', owning groups '%s'; retry the"
 582                                " operation" %
 583                                (instance_name,
 584                                 utils.CommaJoin(inst_groups),
 585                                 utils.CommaJoin(owned_groups)),
 586                                errors.ECODE_STATE)
 587
 588   return inst_groups
 589
 590
 591 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 592   """Checks if the instances in a node group are still correct.
 593
 594   @type cfg: L{config.ConfigWriter}
 595   @param cfg: The cluster configuration
 596   @type group_uuid: string
 597   @param group_uuid: Node group UUID
 598   @type owned_instances: set or frozenset
 599   @param owned_instances: List of currently owned instances
 600
 601   """
 602   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 603   if owned_instances != wanted_instances:
 604     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 605                                " locks were acquired, wanted '%s', have '%s';"
 606                                " retry the operation" %
 607                                (group_uuid,
 608                                 utils.CommaJoin(wanted_instances),
 609                                 utils.CommaJoin(owned_instances)),
 610                                errors.ECODE_STATE)
 611
 612   return wanted_instances
 613
 614
 615 def _SupportsOob(cfg, node):
 616   """Tells if node supports OOB.
 617
 618   @type cfg: L{config.ConfigWriter}
 619   @param cfg: The cluster configuration
 620   @type node: L{objects.Node}
 621   @param node: The node
 622   @return: The OOB script if supported or an empty string otherwise
 623
 624   """
 625   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 626
 627
 628 def _GetWantedNodes(lu, nodes):
 629   """Returns list of checked and expanded node names.
 630
 631   @type lu: L{LogicalUnit}
 632   @param lu: the logical unit on whose behalf we execute
 633   @type nodes: list
 634   @param nodes: list of node names or None for all nodes
 635   @rtype: list
 636   @return: the list of nodes, sorted
 637   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 638
 639   """
 640   if nodes:
 641     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 642
 643   return utils.NiceSort(lu.cfg.GetNodeList())
 644
 645
 646 def _GetWantedInstances(lu, instances):
 647   """Returns list of checked and expanded instance names.
 648
 649   @type lu: L{LogicalUnit}
 650   @param lu: the logical unit on whose behalf we execute
 651   @type instances: list
 652   @param instances: list of instance names or None for all instances
 653   @rtype: list
 654   @return: the list of instances, sorted
 655   @raise errors.OpPrereqError: if the instances parameter is wrong type
 656   @raise errors.OpPrereqError: if any of the passed instances is not found
 657
 658   """
 659   if instances:
 660     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 661   else:
 662     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 663   return wanted
 664
 665
 666 def _GetUpdatedParams(old_params, update_dict,
 667                       use_default=True, use_none=False):
 668   """Return the new version of a parameter dictionary.
 669
 670   @type old_params: dict
 671   @param old_params: old parameters
 672   @type update_dict: dict
 673   @param update_dict: dict containing new parameter values, or
 674       constants.VALUE_DEFAULT to reset the parameter to its default
 675       value
 676   @param use_default: boolean
 677   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 678       values as 'to be deleted' values
 679   @param use_none: boolean
 680   @type use_none: whether to recognise C{None} values as 'to be
 681       deleted' values
 682   @rtype: dict
 683   @return: the new parameter dictionary
 684
 685   """
 686   params_copy = copy.deepcopy(old_params)
 687   for key, val in update_dict.iteritems():
 688     if ((use_default and val == constants.VALUE_DEFAULT) or
 689         (use_none and val is None)):
 690       try:
 691         del params_copy[key]
 692       except KeyError:
 693         pass
 694     else:
 695       params_copy[key] = val
 696   return params_copy
 697
 698
 699 def _ReleaseLocks(lu, level, names=None, keep=None):
 700   """Releases locks owned by an LU.
 701
 702   @type lu: L{LogicalUnit}
 703   @param level: Lock level
 704   @type names: list or None
 705   @param names: Names of locks to release
 706   @type keep: list or None
 707   @param keep: Names of locks to retain
 708
 709   """
 710   assert not (keep is not None and names is not None), \
 711          "Only one of the 'names' and the 'keep' parameters can be given"
 712
 713   if names is not None:
 714     should_release = names.__contains__
 715   elif keep:
 716     should_release = lambda name: name not in keep
 717   else:
 718     should_release = None
 719
 720   if should_release:
 721     retain = []
 722     release = []
 723
 724     # Determine which locks to release
 725     for name in lu.owned_locks(level):
 726       if should_release(name):
 727         release.append(name)
 728       else:
 729         retain.append(name)
 730
 731     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 732
 733     # Release just some locks
 734     lu.glm.release(level, names=release)
 735
 736     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 737   else:
 738     # Release everything
 739     lu.glm.release(level)
 740
 741     assert not lu.glm.is_owned(level), "No locks should be owned"
 742
 743
 744 def _MapInstanceDisksToNodes(instances):
 745   """Creates a map from (node, volume) to instance name.
 746
 747   @type instances: list of L{objects.Instance}
 748   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 749
 750   """
 751   return dict(((node, vol), inst.name)
 752               for inst in instances
 753               for (node, vols) in inst.MapLVsByNode().items()
 754               for vol in vols)
 755
 756
 757 def _RunPostHook(lu, node_name):
 758   """Runs the post-hook for an opcode on a single node.
 759
 760   """
 761   hm = lu.proc.BuildHooksManager(lu)
 762   try:
 763     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 764   except:
 765     # pylint: disable=W0702
 766     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 767
 768
 769 def _CheckOutputFields(static, dynamic, selected):
 770   """Checks whether all selected fields are valid.
 771
 772   @type static: L{utils.FieldSet}
 773   @param static: static fields set
 774   @type dynamic: L{utils.FieldSet}
 775   @param dynamic: dynamic fields set
 776
 777   """
 778   f = utils.FieldSet()
 779   f.Extend(static)
 780   f.Extend(dynamic)
 781
 782   delta = f.NonMatching(selected)
 783   if delta:
 784     raise errors.OpPrereqError("Unknown output fields selected: %s"
 785                                % ",".join(delta), errors.ECODE_INVAL)
 786
 787
 788 def _CheckGlobalHvParams(params):
 789   """Validates that given hypervisor params are not global ones.
 790
 791   This will ensure that instances don't get customised versions of
 792   global params.
 793
 794   """
 795   used_globals = constants.HVC_GLOBALS.intersection(params)
 796   if used_globals:
 797     msg = ("The following hypervisor parameters are global and cannot"
 798            " be customized at instance level, please modify them at"
 799            " cluster level: %s" % utils.CommaJoin(used_globals))
 800     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 801
 802
 803 def _CheckNodeOnline(lu, node, msg=None):
 804   """Ensure that a given node is online.
 805
 806   @param lu: the LU on behalf of which we make the check
 807   @param node: the node to check
 808   @param msg: if passed, should be a message to replace the default one
 809   @raise errors.OpPrereqError: if the node is offline
 810
 811   """
 812   if msg is None:
 813     msg = "Can't use offline node"
 814   if lu.cfg.GetNodeInfo(node).offline:
 815     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 816
 817
 818 def _CheckNodeNotDrained(lu, node):
 819   """Ensure that a given node is not drained.
 820
 821   @param lu: the LU on behalf of which we make the check
 822   @param node: the node to check
 823   @raise errors.OpPrereqError: if the node is drained
 824
 825   """
 826   if lu.cfg.GetNodeInfo(node).drained:
 827     raise errors.OpPrereqError("Can't use drained node %s" % node,
 828                                errors.ECODE_STATE)
 829
 830
 831 def _CheckNodeVmCapable(lu, node):
 832   """Ensure that a given node is vm capable.
 833
 834   @param lu: the LU on behalf of which we make the check
 835   @param node: the node to check
 836   @raise errors.OpPrereqError: if the node is not vm capable
 837
 838   """
 839   if not lu.cfg.GetNodeInfo(node).vm_capable:
 840     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 841                                errors.ECODE_STATE)
 842
 843
 844 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 845   """Ensure that a node supports a given OS.
 846
 847   @param lu: the LU on behalf of which we make the check
 848   @param node: the node to check
 849   @param os_name: the OS to query about
 850   @param force_variant: whether to ignore variant errors
 851   @raise errors.OpPrereqError: if the node is not supporting the OS
 852
 853   """
 854   result = lu.rpc.call_os_get(node, os_name)
 855   result.Raise("OS '%s' not in supported OS list for node %s" %
 856                (os_name, node),
 857                prereq=True, ecode=errors.ECODE_INVAL)
 858   if not force_variant:
 859     _CheckOSVariant(result.payload, os_name)
 860
 861
 862 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 863   """Ensure that a node has the given secondary ip.
 864
 865   @type lu: L{LogicalUnit}
 866   @param lu: the LU on behalf of which we make the check
 867   @type node: string
 868   @param node: the node to check
 869   @type secondary_ip: string
 870   @param secondary_ip: the ip to check
 871   @type prereq: boolean
 872   @param prereq: whether to throw a prerequisite or an execute error
 873   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 874   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 875
 876   """
 877   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 878   result.Raise("Failure checking secondary ip on node %s" % node,
 879                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 880   if not result.payload:
 881     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 882            " please fix and re-run this command" % secondary_ip)
 883     if prereq:
 884       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 885     else:
 886       raise errors.OpExecError(msg)
 887
 888
 889 def _GetClusterDomainSecret():
 890   """Reads the cluster domain secret.
 891
 892   """
 893   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 894                                strict=True)
 895
 896
 897 def _CheckInstanceDown(lu, instance, reason):
 898   """Ensure that an instance is not running."""
 899   if instance.admin_up:
 900     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 901                                (instance.name, reason), errors.ECODE_STATE)
 902
 903   pnode = instance.primary_node
 904   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 905   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 906               prereq=True, ecode=errors.ECODE_ENVIRON)
 907
 908   if instance.name in ins_l.payload:
 909     raise errors.OpPrereqError("Instance %s is running, %s" %
 910                                (instance.name, reason), errors.ECODE_STATE)
 911
 912
 913 def _ExpandItemName(fn, name, kind):
 914   """Expand an item name.
 915
 916   @param fn: the function to use for expansion
 917   @param name: requested item name
 918   @param kind: text description ('Node' or 'Instance')
 919   @return: the resolved (full) name
 920   @raise errors.OpPrereqError: if the item is not found
 921
 922   """
 923   full_name = fn(name)
 924   if full_name is None:
 925     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 926                                errors.ECODE_NOENT)
 927   return full_name
 928
 929
 930 def _ExpandNodeName(cfg, name):
 931   """Wrapper over L{_ExpandItemName} for nodes."""
 932   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 933
 934
 935 def _ExpandInstanceName(cfg, name):
 936   """Wrapper over L{_ExpandItemName} for instance."""
 937   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 938
 939
 940 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 941                           memory, vcpus, nics, disk_template, disks,
 942                           bep, hvp, hypervisor_name, tags):
 943   """Builds instance related env variables for hooks
 944
 945   This builds the hook environment from individual variables.
 946
 947   @type name: string
 948   @param name: the name of the instance
 949   @type primary_node: string
 950   @param primary_node: the name of the instance's primary node
 951   @type secondary_nodes: list
 952   @param secondary_nodes: list of secondary nodes as strings
 953   @type os_type: string
 954   @param os_type: the name of the instance's OS
 955   @type status: boolean
 956   @param status: the should_run status of the instance
 957   @type memory: string
 958   @param memory: the memory size of the instance
 959   @type vcpus: string
 960   @param vcpus: the count of VCPUs the instance has
 961   @type nics: list
 962   @param nics: list of tuples (ip, mac, mode, link) representing
 963       the NICs the instance has
 964   @type disk_template: string
 965   @param disk_template: the disk template of the instance
 966   @type disks: list
 967   @param disks: the list of (size, mode) pairs
 968   @type bep: dict
 969   @param bep: the backend parameters for the instance
 970   @type hvp: dict
 971   @param hvp: the hypervisor parameters for the instance
 972   @type hypervisor_name: string
 973   @param hypervisor_name: the hypervisor for the instance
 974   @type tags: list
 975   @param tags: list of instance tags as strings
 976   @rtype: dict
 977   @return: the hook environment for this instance
 978
 979   """
 980   if status:
 981     str_status = "up"
 982   else:
 983     str_status = "down"
 984   env = {
 985     "OP_TARGET": name,
 986     "INSTANCE_NAME": name,
 987     "INSTANCE_PRIMARY": primary_node,
 988     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 989     "INSTANCE_OS_TYPE": os_type,
 990     "INSTANCE_STATUS": str_status,
 991     "INSTANCE_MEMORY": memory,
 992     "INSTANCE_VCPUS": vcpus,
 993     "INSTANCE_DISK_TEMPLATE": disk_template,
 994     "INSTANCE_HYPERVISOR": hypervisor_name,
 995   }
 996
 997   if nics:
 998     nic_count = len(nics)
 999     for idx, (ip, mac, mode, link) in enumerate(nics):
1000       if ip is None:
1001         ip = ""
1002       env["INSTANCE_NIC%d_IP" % idx] = ip
1003       env["INSTANCE_NIC%d_MAC" % idx] = mac
1004       env["INSTANCE_NIC%d_MODE" % idx] = mode
1005       env["INSTANCE_NIC%d_LINK" % idx] = link
1006       if mode == constants.NIC_MODE_BRIDGED:
1007         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1008   else:
1009     nic_count = 0
1010
1011   env["INSTANCE_NIC_COUNT"] = nic_count
1012
1013   if disks:
1014     disk_count = len(disks)
1015     for idx, (size, mode) in enumerate(disks):
1016       env["INSTANCE_DISK%d_SIZE" % idx] = size
1017       env["INSTANCE_DISK%d_MODE" % idx] = mode
1018   else:
1019     disk_count = 0
1020
1021   env["INSTANCE_DISK_COUNT"] = disk_count
1022
1023   if not tags:
1024     tags = []
1025
1026   env["INSTANCE_TAGS"] = " ".join(tags)
1027
1028   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1029     for key, value in source.items():
1030       env["INSTANCE_%s_%s" % (kind, key)] = value
1031
1032   return env
1033
1034
1035 def _NICListToTuple(lu, nics):
1036   """Build a list of nic information tuples.
1037
1038   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1039   value in LUInstanceQueryData.
1040
1041   @type lu:  L{LogicalUnit}
1042   @param lu: the logical unit on whose behalf we execute
1043   @type nics: list of L{objects.NIC}
1044   @param nics: list of nics to convert to hooks tuples
1045
1046   """
1047   hooks_nics = []
1048   cluster = lu.cfg.GetClusterInfo()
1049   for nic in nics:
1050     ip = nic.ip
1051     mac = nic.mac
1052     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1053     mode = filled_params[constants.NIC_MODE]
1054     link = filled_params[constants.NIC_LINK]
1055     hooks_nics.append((ip, mac, mode, link))
1056   return hooks_nics
1057
1058
1059 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1060   """Builds instance related env variables for hooks from an object.
1061
1062   @type lu: L{LogicalUnit}
1063   @param lu: the logical unit on whose behalf we execute
1064   @type instance: L{objects.Instance}
1065   @param instance: the instance for which we should build the
1066       environment
1067   @type override: dict
1068   @param override: dictionary with key/values that will override
1069       our values
1070   @rtype: dict
1071   @return: the hook environment dictionary
1072
1073   """
1074   cluster = lu.cfg.GetClusterInfo()
1075   bep = cluster.FillBE(instance)
1076   hvp = cluster.FillHV(instance)
1077   args = {
1078     "name": instance.name,
1079     "primary_node": instance.primary_node,
1080     "secondary_nodes": instance.secondary_nodes,
1081     "os_type": instance.os,
1082     "status": instance.admin_up,
1083     "memory": bep[constants.BE_MEMORY],
1084     "vcpus": bep[constants.BE_VCPUS],
1085     "nics": _NICListToTuple(lu, instance.nics),
1086     "disk_template": instance.disk_template,
1087     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1088     "bep": bep,
1089     "hvp": hvp,
1090     "hypervisor_name": instance.hypervisor,
1091     "tags": instance.tags,
1092   }
1093   if override:
1094     args.update(override)
1095   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1096
1097
1098 def _AdjustCandidatePool(lu, exceptions):
1099   """Adjust the candidate pool after node operations.
1100
1101   """
1102   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1103   if mod_list:
1104     lu.LogInfo("Promoted nodes to master candidate role: %s",
1105                utils.CommaJoin(node.name for node in mod_list))
1106     for name in mod_list:
1107       lu.context.ReaddNode(name)
1108   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1109   if mc_now > mc_max:
1110     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1111                (mc_now, mc_max))
1112
1113
1114 def _DecideSelfPromotion(lu, exceptions=None):
1115   """Decide whether I should promote myself as a master candidate.
1116
1117   """
1118   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1119   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1120   # the new node will increase mc_max with one, so:
1121   mc_should = min(mc_should + 1, cp_size)
1122   return mc_now < mc_should
1123
1124
1125 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1126   """Check that the brigdes needed by a list of nics exist.
1127
1128   """
1129   cluster = lu.cfg.GetClusterInfo()
1130   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1131   brlist = [params[constants.NIC_LINK] for params in paramslist
1132             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1133   if brlist:
1134     result = lu.rpc.call_bridges_exist(target_node, brlist)
1135     result.Raise("Error checking bridges on destination node '%s'" %
1136                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1137
1138
1139 def _CheckInstanceBridgesExist(lu, instance, node=None):
1140   """Check that the brigdes needed by an instance exist.
1141
1142   """
1143   if node is None:
1144     node = instance.primary_node
1145   _CheckNicsBridgesExist(lu, instance.nics, node)
1146
1147
1148 def _CheckOSVariant(os_obj, name):
1149   """Check whether an OS name conforms to the os variants specification.
1150
1151   @type os_obj: L{objects.OS}
1152   @param os_obj: OS object to check
1153   @type name: string
1154   @param name: OS name passed by the user, to check for validity
1155
1156   """
1157   variant = objects.OS.GetVariant(name)
1158   if not os_obj.supported_variants:
1159     if variant:
1160       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1161                                  " passed)" % (os_obj.name, variant),
1162                                  errors.ECODE_INVAL)
1163     return
1164   if not variant:
1165     raise errors.OpPrereqError("OS name must include a variant",
1166                                errors.ECODE_INVAL)
1167
1168   if variant not in os_obj.supported_variants:
1169     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1170
1171
1172 def _GetNodeInstancesInner(cfg, fn):
1173   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1174
1175
1176 def _GetNodeInstances(cfg, node_name):
1177   """Returns a list of all primary and secondary instances on a node.
1178
1179   """
1180
1181   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1182
1183
1184 def _GetNodePrimaryInstances(cfg, node_name):
1185   """Returns primary instances on a node.
1186
1187   """
1188   return _GetNodeInstancesInner(cfg,
1189                                 lambda inst: node_name == inst.primary_node)
1190
1191
1192 def _GetNodeSecondaryInstances(cfg, node_name):
1193   """Returns secondary instances on a node.
1194
1195   """
1196   return _GetNodeInstancesInner(cfg,
1197                                 lambda inst: node_name in inst.secondary_nodes)
1198
1199
1200 def _GetStorageTypeArgs(cfg, storage_type):
1201   """Returns the arguments for a storage type.
1202
1203   """
1204   # Special case for file storage
1205   if storage_type == constants.ST_FILE:
1206     # storage.FileStorage wants a list of storage directories
1207     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1208
1209   return []
1210
1211
1212 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1213   faulty = []
1214
1215   for dev in instance.disks:
1216     cfg.SetDiskID(dev, node_name)
1217
1218   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1219   result.Raise("Failed to get disk status from node %s" % node_name,
1220                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1221
1222   for idx, bdev_status in enumerate(result.payload):
1223     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224       faulty.append(idx)
1225
1226   return faulty
1227
1228
1229 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1230   """Check the sanity of iallocator and node arguments and use the
1231   cluster-wide iallocator if appropriate.
1232
1233   Check that at most one of (iallocator, node) is specified. If none is
1234   specified, then the LU's opcode's iallocator slot is filled with the
1235   cluster-wide default iallocator.
1236
1237   @type iallocator_slot: string
1238   @param iallocator_slot: the name of the opcode iallocator slot
1239   @type node_slot: string
1240   @param node_slot: the name of the opcode target node slot
1241
1242   """
1243   node = getattr(lu.op, node_slot, None)
1244   iallocator = getattr(lu.op, iallocator_slot, None)
1245
1246   if node is not None and iallocator is not None:
1247     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1248                                errors.ECODE_INVAL)
1249   elif node is None and iallocator is None:
1250     default_iallocator = lu.cfg.GetDefaultIAllocator()
1251     if default_iallocator:
1252       setattr(lu.op, iallocator_slot, default_iallocator)
1253     else:
1254       raise errors.OpPrereqError("No iallocator or node given and no"
1255                                  " cluster-wide default iallocator found;"
1256                                  " please specify either an iallocator or a"
1257                                  " node, or set a cluster-wide default"
1258                                  " iallocator")
1259
1260
1261 def _GetDefaultIAllocator(cfg, iallocator):
1262   """Decides on which iallocator to use.
1263
1264   @type cfg: L{config.ConfigWriter}
1265   @param cfg: Cluster configuration object
1266   @type iallocator: string or None
1267   @param iallocator: Iallocator specified in opcode
1268   @rtype: string
1269   @return: Iallocator name
1270
1271   """
1272   if not iallocator:
1273     # Use default iallocator
1274     iallocator = cfg.GetDefaultIAllocator()
1275
1276   if not iallocator:
1277     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1278                                " opcode nor as a cluster-wide default",
1279                                errors.ECODE_INVAL)
1280
1281   return iallocator
1282
1283
1284 class LUClusterPostInit(LogicalUnit):
1285   """Logical unit for running hooks after cluster initialization.
1286
1287   """
1288   HPATH = "cluster-init"
1289   HTYPE = constants.HTYPE_CLUSTER
1290
1291   def BuildHooksEnv(self):
1292     """Build hooks env.
1293
1294     """
1295     return {
1296       "OP_TARGET": self.cfg.GetClusterName(),
1297       }
1298
1299   def BuildHooksNodes(self):
1300     """Build hooks nodes.
1301
1302     """
1303     return ([], [self.cfg.GetMasterNode()])
1304
1305   def Exec(self, feedback_fn):
1306     """Nothing to do.
1307
1308     """
1309     return True
1310
1311
1312 class LUClusterDestroy(LogicalUnit):
1313   """Logical unit for destroying the cluster.
1314
1315   """
1316   HPATH = "cluster-destroy"
1317   HTYPE = constants.HTYPE_CLUSTER
1318
1319   def BuildHooksEnv(self):
1320     """Build hooks env.
1321
1322     """
1323     return {
1324       "OP_TARGET": self.cfg.GetClusterName(),
1325       }
1326
1327   def BuildHooksNodes(self):
1328     """Build hooks nodes.
1329
1330     """
1331     return ([], [])
1332
1333   def CheckPrereq(self):
1334     """Check prerequisites.
1335
1336     This checks whether the cluster is empty.
1337
1338     Any errors are signaled by raising errors.OpPrereqError.
1339
1340     """
1341     master = self.cfg.GetMasterNode()
1342
1343     nodelist = self.cfg.GetNodeList()
1344     if len(nodelist) != 1 or nodelist[0] != master:
1345       raise errors.OpPrereqError("There are still %d node(s) in"
1346                                  " this cluster." % (len(nodelist) - 1),
1347                                  errors.ECODE_INVAL)
1348     instancelist = self.cfg.GetInstanceList()
1349     if instancelist:
1350       raise errors.OpPrereqError("There are still %d instance(s) in"
1351                                  " this cluster." % len(instancelist),
1352                                  errors.ECODE_INVAL)
1353
1354   def Exec(self, feedback_fn):
1355     """Destroys the cluster.
1356
1357     """
1358     (master, ip, dev, netmask, family) = self.cfg.GetMasterNetworkParameters()
1359
1360     # Run post hooks on master node before it's removed
1361     _RunPostHook(self, master)
1362
1363     result = self.rpc.call_node_deactivate_master_ip(master, ip, netmask, dev,
1364                                                      family)
1365     result.Raise("Could not disable the master role")
1366
1367     return master
1368
1369
1370 def _VerifyCertificate(filename):
1371   """Verifies a certificate for L{LUClusterVerifyConfig}.
1372
1373   @type filename: string
1374   @param filename: Path to PEM file
1375
1376   """
1377   try:
1378     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1379                                            utils.ReadFile(filename))
1380   except Exception, err: # pylint: disable=W0703
1381     return (LUClusterVerifyConfig.ETYPE_ERROR,
1382             "Failed to load X509 certificate %s: %s" % (filename, err))
1383
1384   (errcode, msg) = \
1385     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1386                                 constants.SSL_CERT_EXPIRATION_ERROR)
1387
1388   if msg:
1389     fnamemsg = "While verifying %s: %s" % (filename, msg)
1390   else:
1391     fnamemsg = None
1392
1393   if errcode is None:
1394     return (None, fnamemsg)
1395   elif errcode == utils.CERT_WARNING:
1396     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1397   elif errcode == utils.CERT_ERROR:
1398     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1399
1400   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1401
1402
1403 def _GetAllHypervisorParameters(cluster, instances):
1404   """Compute the set of all hypervisor parameters.
1405
1406   @type cluster: L{objects.Cluster}
1407   @param cluster: the cluster object
1408   @param instances: list of L{objects.Instance}
1409   @param instances: additional instances from which to obtain parameters
1410   @rtype: list of (origin, hypervisor, parameters)
1411   @return: a list with all parameters found, indicating the hypervisor they
1412        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1413
1414   """
1415   hvp_data = []
1416
1417   for hv_name in cluster.enabled_hypervisors:
1418     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1419
1420   for os_name, os_hvp in cluster.os_hvp.items():
1421     for hv_name, hv_params in os_hvp.items():
1422       if hv_params:
1423         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1424         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1425
1426   # TODO: collapse identical parameter values in a single one
1427   for instance in instances:
1428     if instance.hvparams:
1429       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1430                        cluster.FillHV(instance)))
1431
1432   return hvp_data
1433
1434
1435 class _VerifyErrors(object):
1436   """Mix-in for cluster/group verify LUs.
1437
1438   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1439   self.op and self._feedback_fn to be available.)
1440
1441   """
1442
1443   ETYPE_FIELD = "code"
1444   ETYPE_ERROR = "ERROR"
1445   ETYPE_WARNING = "WARNING"
1446
1447   def _Error(self, ecode, item, msg, *args, **kwargs):
1448     """Format an error message.
1449
1450     Based on the opcode's error_codes parameter, either format a
1451     parseable error code, or a simpler error string.
1452
1453     This must be called only from Exec and functions called from Exec.
1454
1455     """
1456     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1457     itype, etxt, _ = ecode
1458     # first complete the msg
1459     if args:
1460       msg = msg % args
1461     # then format the whole message
1462     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1463       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1464     else:
1465       if item:
1466         item = " " + item
1467       else:
1468         item = ""
1469       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1470     # and finally report it via the feedback_fn
1471     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1472
1473   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1474     """Log an error message if the passed condition is True.
1475
1476     """
1477     cond = (bool(cond)
1478             or self.op.debug_simulate_errors) # pylint: disable=E1101
1479
1480     # If the error code is in the list of ignored errors, demote the error to a
1481     # warning
1482     (_, etxt, _) = ecode
1483     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1484       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1485
1486     if cond:
1487       self._Error(ecode, *args, **kwargs)
1488
1489     # do not mark the operation as failed for WARN cases only
1490     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1491       self.bad = self.bad or cond
1492
1493
1494 class LUClusterVerify(NoHooksLU):
1495   """Submits all jobs necessary to verify the cluster.
1496
1497   """
1498   REQ_BGL = False
1499
1500   def ExpandNames(self):
1501     self.needed_locks = {}
1502
1503   def Exec(self, feedback_fn):
1504     jobs = []
1505
1506     if self.op.group_name:
1507       groups = [self.op.group_name]
1508       depends_fn = lambda: None
1509     else:
1510       groups = self.cfg.GetNodeGroupList()
1511
1512       # Verify global configuration
1513       jobs.append([
1514         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1515         ])
1516
1517       # Always depend on global verification
1518       depends_fn = lambda: [(-len(jobs), [])]
1519
1520     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1521                                             ignore_errors=self.op.ignore_errors,
1522                                             depends=depends_fn())]
1523                 for group in groups)
1524
1525     # Fix up all parameters
1526     for op in itertools.chain(*jobs): # pylint: disable=W0142
1527       op.debug_simulate_errors = self.op.debug_simulate_errors
1528       op.verbose = self.op.verbose
1529       op.error_codes = self.op.error_codes
1530       try:
1531         op.skip_checks = self.op.skip_checks
1532       except AttributeError:
1533         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1534
1535     return ResultWithJobs(jobs)
1536
1537
1538 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1539   """Verifies the cluster config.
1540
1541   """
1542   REQ_BGL = True
1543
1544   def _VerifyHVP(self, hvp_data):
1545     """Verifies locally the syntax of the hypervisor parameters.
1546
1547     """
1548     for item, hv_name, hv_params in hvp_data:
1549       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1550              (item, hv_name))
1551       try:
1552         hv_class = hypervisor.GetHypervisor(hv_name)
1553         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1554         hv_class.CheckParameterSyntax(hv_params)
1555       except errors.GenericError, err:
1556         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1557
1558   def ExpandNames(self):
1559     # Information can be safely retrieved as the BGL is acquired in exclusive
1560     # mode
1561     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1562     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1563     self.all_node_info = self.cfg.GetAllNodesInfo()
1564     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1565     self.needed_locks = {}
1566
1567   def Exec(self, feedback_fn):
1568     """Verify integrity of cluster, performing various test on nodes.
1569
1570     """
1571     self.bad = False
1572     self._feedback_fn = feedback_fn
1573
1574     feedback_fn("* Verifying cluster config")
1575
1576     for msg in self.cfg.VerifyConfig():
1577       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1578
1579     feedback_fn("* Verifying cluster certificate files")
1580
1581     for cert_filename in constants.ALL_CERT_FILES:
1582       (errcode, msg) = _VerifyCertificate(cert_filename)
1583       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1584
1585     feedback_fn("* Verifying hypervisor parameters")
1586
1587     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1588                                                 self.all_inst_info.values()))
1589
1590     feedback_fn("* Verifying all nodes belong to an existing group")
1591
1592     # We do this verification here because, should this bogus circumstance
1593     # occur, it would never be caught by VerifyGroup, which only acts on
1594     # nodes/instances reachable from existing node groups.
1595
1596     dangling_nodes = set(node.name for node in self.all_node_info.values()
1597                          if node.group not in self.all_group_info)
1598
1599     dangling_instances = {}
1600     no_node_instances = []
1601
1602     for inst in self.all_inst_info.values():
1603       if inst.primary_node in dangling_nodes:
1604         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1605       elif inst.primary_node not in self.all_node_info:
1606         no_node_instances.append(inst.name)
1607
1608     pretty_dangling = [
1609         "%s (%s)" %
1610         (node.name,
1611          utils.CommaJoin(dangling_instances.get(node.name,
1612                                                 ["no instances"])))
1613         for node in dangling_nodes]
1614
1615     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1616                   None,
1617                   "the following nodes (and their instances) belong to a non"
1618                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1619
1620     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1621                   None,
1622                   "the following instances have a non-existing primary-node:"
1623                   " %s", utils.CommaJoin(no_node_instances))
1624
1625     return not self.bad
1626
1627
1628 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1629   """Verifies the status of a node group.
1630
1631   """
1632   HPATH = "cluster-verify"
1633   HTYPE = constants.HTYPE_CLUSTER
1634   REQ_BGL = False
1635
1636   _HOOKS_INDENT_RE = re.compile("^", re.M)
1637
1638   class NodeImage(object):
1639     """A class representing the logical and physical status of a node.
1640
1641     @type name: string
1642     @ivar name: the node name to which this object refers
1643     @ivar volumes: a structure as returned from
1644         L{ganeti.backend.GetVolumeList} (runtime)
1645     @ivar instances: a list of running instances (runtime)
1646     @ivar pinst: list of configured primary instances (config)
1647     @ivar sinst: list of configured secondary instances (config)
1648     @ivar sbp: dictionary of {primary-node: list of instances} for all
1649         instances for which this node is secondary (config)
1650     @ivar mfree: free memory, as reported by hypervisor (runtime)
1651     @ivar dfree: free disk, as reported by the node (runtime)
1652     @ivar offline: the offline status (config)
1653     @type rpc_fail: boolean
1654     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1655         not whether the individual keys were correct) (runtime)
1656     @type lvm_fail: boolean
1657     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1658     @type hyp_fail: boolean
1659     @ivar hyp_fail: whether the RPC call didn't return the instance list
1660     @type ghost: boolean
1661     @ivar ghost: whether this is a known node or not (config)
1662     @type os_fail: boolean
1663     @ivar os_fail: whether the RPC call didn't return valid OS data
1664     @type oslist: list
1665     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1666     @type vm_capable: boolean
1667     @ivar vm_capable: whether the node can host instances
1668
1669     """
1670     def __init__(self, offline=False, name=None, vm_capable=True):
1671       self.name = name
1672       self.volumes = {}
1673       self.instances = []
1674       self.pinst = []
1675       self.sinst = []
1676       self.sbp = {}
1677       self.mfree = 0
1678       self.dfree = 0
1679       self.offline = offline
1680       self.vm_capable = vm_capable
1681       self.rpc_fail = False
1682       self.lvm_fail = False
1683       self.hyp_fail = False
1684       self.ghost = False
1685       self.os_fail = False
1686       self.oslist = {}
1687
1688   def ExpandNames(self):
1689     # This raises errors.OpPrereqError on its own:
1690     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1691
1692     # Get instances in node group; this is unsafe and needs verification later
1693     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1694
1695     self.needed_locks = {
1696       locking.LEVEL_INSTANCE: inst_names,
1697       locking.LEVEL_NODEGROUP: [self.group_uuid],
1698       locking.LEVEL_NODE: [],
1699       }
1700
1701     self.share_locks = _ShareAll()
1702
1703   def DeclareLocks(self, level):
1704     if level == locking.LEVEL_NODE:
1705       # Get members of node group; this is unsafe and needs verification later
1706       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1707
1708       all_inst_info = self.cfg.GetAllInstancesInfo()
1709
1710       # In Exec(), we warn about mirrored instances that have primary and
1711       # secondary living in separate node groups. To fully verify that
1712       # volumes for these instances are healthy, we will need to do an
1713       # extra call to their secondaries. We ensure here those nodes will
1714       # be locked.
1715       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1716         # Important: access only the instances whose lock is owned
1717         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1718           nodes.update(all_inst_info[inst].secondary_nodes)
1719
1720       self.needed_locks[locking.LEVEL_NODE] = nodes
1721
1722   def CheckPrereq(self):
1723     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1724     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1725
1726     group_nodes = set(self.group_info.members)
1727     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1728
1729     unlocked_nodes = \
1730         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1731
1732     unlocked_instances = \
1733         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1734
1735     if unlocked_nodes:
1736       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1737                                  utils.CommaJoin(unlocked_nodes))
1738
1739     if unlocked_instances:
1740       raise errors.OpPrereqError("Missing lock for instances: %s" %
1741                                  utils.CommaJoin(unlocked_instances))
1742
1743     self.all_node_info = self.cfg.GetAllNodesInfo()
1744     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1745
1746     self.my_node_names = utils.NiceSort(group_nodes)
1747     self.my_inst_names = utils.NiceSort(group_instances)
1748
1749     self.my_node_info = dict((name, self.all_node_info[name])
1750                              for name in self.my_node_names)
1751
1752     self.my_inst_info = dict((name, self.all_inst_info[name])
1753                              for name in self.my_inst_names)
1754
1755     # We detect here the nodes that will need the extra RPC calls for verifying
1756     # split LV volumes; they should be locked.
1757     extra_lv_nodes = set()
1758
1759     for inst in self.my_inst_info.values():
1760       if inst.disk_template in constants.DTS_INT_MIRROR:
1761         group = self.my_node_info[inst.primary_node].group
1762         for nname in inst.secondary_nodes:
1763           if self.all_node_info[nname].group != group:
1764             extra_lv_nodes.add(nname)
1765
1766     unlocked_lv_nodes = \
1767         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1768
1769     if unlocked_lv_nodes:
1770       raise errors.OpPrereqError("these nodes could be locked: %s" %
1771                                  utils.CommaJoin(unlocked_lv_nodes))
1772     self.extra_lv_nodes = list(extra_lv_nodes)
1773
1774   def _VerifyNode(self, ninfo, nresult):
1775     """Perform some basic validation on data returned from a node.
1776
1777       - check the result data structure is well formed and has all the
1778         mandatory fields
1779       - check ganeti version
1780
1781     @type ninfo: L{objects.Node}
1782     @param ninfo: the node to check
1783     @param nresult: the results from the node
1784     @rtype: boolean
1785     @return: whether overall this call was successful (and we can expect
1786          reasonable values in the respose)
1787
1788     """
1789     node = ninfo.name
1790     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1791
1792     # main result, nresult should be a non-empty dict
1793     test = not nresult or not isinstance(nresult, dict)
1794     _ErrorIf(test, constants.CV_ENODERPC, node,
1795                   "unable to verify node: no data returned")
1796     if test:
1797       return False
1798
1799     # compares ganeti version
1800     local_version = constants.PROTOCOL_VERSION
1801     remote_version = nresult.get("version", None)
1802     test = not (remote_version and
1803                 isinstance(remote_version, (list, tuple)) and
1804                 len(remote_version) == 2)
1805     _ErrorIf(test, constants.CV_ENODERPC, node,
1806              "connection to node returned invalid data")
1807     if test:
1808       return False
1809
1810     test = local_version != remote_version[0]
1811     _ErrorIf(test, constants.CV_ENODEVERSION, node,
1812              "incompatible protocol versions: master %s,"
1813              " node %s", local_version, remote_version[0])
1814     if test:
1815       return False
1816
1817     # node seems compatible, we can actually try to look into its results
1818
1819     # full package version
1820     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1821                   constants.CV_ENODEVERSION, node,
1822                   "software version mismatch: master %s, node %s",
1823                   constants.RELEASE_VERSION, remote_version[1],
1824                   code=self.ETYPE_WARNING)
1825
1826     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1827     if ninfo.vm_capable and isinstance(hyp_result, dict):
1828       for hv_name, hv_result in hyp_result.iteritems():
1829         test = hv_result is not None
1830         _ErrorIf(test, constants.CV_ENODEHV, node,
1831                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1832
1833     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1834     if ninfo.vm_capable and isinstance(hvp_result, list):
1835       for item, hv_name, hv_result in hvp_result:
1836         _ErrorIf(True, constants.CV_ENODEHV, node,
1837                  "hypervisor %s parameter verify failure (source %s): %s",
1838                  hv_name, item, hv_result)
1839
1840     test = nresult.get(constants.NV_NODESETUP,
1841                        ["Missing NODESETUP results"])
1842     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1843              "; ".join(test))
1844
1845     return True
1846
1847   def _VerifyNodeTime(self, ninfo, nresult,
1848                       nvinfo_starttime, nvinfo_endtime):
1849     """Check the node time.
1850
1851     @type ninfo: L{objects.Node}
1852     @param ninfo: the node to check
1853     @param nresult: the remote results for the node
1854     @param nvinfo_starttime: the start time of the RPC call
1855     @param nvinfo_endtime: the end time of the RPC call
1856
1857     """
1858     node = ninfo.name
1859     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1860
1861     ntime = nresult.get(constants.NV_TIME, None)
1862     try:
1863       ntime_merged = utils.MergeTime(ntime)
1864     except (ValueError, TypeError):
1865       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1866       return
1867
1868     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1869       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1870     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1871       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1872     else:
1873       ntime_diff = None
1874
1875     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1876              "Node time diverges by at least %s from master node time",
1877              ntime_diff)
1878
1879   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1880     """Check the node LVM results.
1881
1882     @type ninfo: L{objects.Node}
1883     @param ninfo: the node to check
1884     @param nresult: the remote results for the node
1885     @param vg_name: the configured VG name
1886
1887     """
1888     if vg_name is None:
1889       return
1890
1891     node = ninfo.name
1892     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1893
1894     # checks vg existence and size > 20G
1895     vglist = nresult.get(constants.NV_VGLIST, None)
1896     test = not vglist
1897     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1898     if not test:
1899       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1900                                             constants.MIN_VG_SIZE)
1901       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1902
1903     # check pv names
1904     pvlist = nresult.get(constants.NV_PVLIST, None)
1905     test = pvlist is None
1906     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1907     if not test:
1908       # check that ':' is not present in PV names, since it's a
1909       # special character for lvcreate (denotes the range of PEs to
1910       # use on the PV)
1911       for _, pvname, owner_vg in pvlist:
1912         test = ":" in pvname
1913         _ErrorIf(test, constants.CV_ENODELVM, node,
1914                  "Invalid character ':' in PV '%s' of VG '%s'",
1915                  pvname, owner_vg)
1916
1917   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1918     """Check the node bridges.
1919
1920     @type ninfo: L{objects.Node}
1921     @param ninfo: the node to check
1922     @param nresult: the remote results for the node
1923     @param bridges: the expected list of bridges
1924
1925     """
1926     if not bridges:
1927       return
1928
1929     node = ninfo.name
1930     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1931
1932     missing = nresult.get(constants.NV_BRIDGES, None)
1933     test = not isinstance(missing, list)
1934     _ErrorIf(test, constants.CV_ENODENET, node,
1935              "did not return valid bridge information")
1936     if not test:
1937       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1938                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1939
1940   def _VerifyNodeNetwork(self, ninfo, nresult):
1941     """Check the node network connectivity results.
1942
1943     @type ninfo: L{objects.Node}
1944     @param ninfo: the node to check
1945     @param nresult: the remote results for the node
1946
1947     """
1948     node = ninfo.name
1949     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1950
1951     test = constants.NV_NODELIST not in nresult
1952     _ErrorIf(test, constants.CV_ENODESSH, node,
1953              "node hasn't returned node ssh connectivity data")
1954     if not test:
1955       if nresult[constants.NV_NODELIST]:
1956         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1957           _ErrorIf(True, constants.CV_ENODESSH, node,
1958                    "ssh communication with node '%s': %s", a_node, a_msg)
1959
1960     test = constants.NV_NODENETTEST not in nresult
1961     _ErrorIf(test, constants.CV_ENODENET, node,
1962              "node hasn't returned node tcp connectivity data")
1963     if not test:
1964       if nresult[constants.NV_NODENETTEST]:
1965         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1966         for anode in nlist:
1967           _ErrorIf(True, constants.CV_ENODENET, node,
1968                    "tcp communication with node '%s': %s",
1969                    anode, nresult[constants.NV_NODENETTEST][anode])
1970
1971     test = constants.NV_MASTERIP not in nresult
1972     _ErrorIf(test, constants.CV_ENODENET, node,
1973              "node hasn't returned node master IP reachability data")
1974     if not test:
1975       if not nresult[constants.NV_MASTERIP]:
1976         if node == self.master_node:
1977           msg = "the master node cannot reach the master IP (not configured?)"
1978         else:
1979           msg = "cannot reach the master IP"
1980         _ErrorIf(True, constants.CV_ENODENET, node, msg)
1981
1982   def _VerifyInstance(self, instance, instanceconfig, node_image,
1983                       diskstatus):
1984     """Verify an instance.
1985
1986     This function checks to see if the required block devices are
1987     available on the instance's node.
1988
1989     """
1990     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1991     node_current = instanceconfig.primary_node
1992
1993     node_vol_should = {}
1994     instanceconfig.MapLVsByNode(node_vol_should)
1995
1996     for node in node_vol_should:
1997       n_img = node_image[node]
1998       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1999         # ignore missing volumes on offline or broken nodes
2000         continue
2001       for volume in node_vol_should[node]:
2002         test = volume not in n_img.volumes
2003         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2004                  "volume %s missing on node %s", volume, node)
2005
2006     if instanceconfig.admin_up:
2007       pri_img = node_image[node_current]
2008       test = instance not in pri_img.instances and not pri_img.offline
2009       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2010                "instance not running on its primary node %s",
2011                node_current)
2012
2013     diskdata = [(nname, success, status, idx)
2014                 for (nname, disks) in diskstatus.items()
2015                 for idx, (success, status) in enumerate(disks)]
2016
2017     for nname, success, bdev_status, idx in diskdata:
2018       # the 'ghost node' construction in Exec() ensures that we have a
2019       # node here
2020       snode = node_image[nname]
2021       bad_snode = snode.ghost or snode.offline
2022       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2023                constants.CV_EINSTANCEFAULTYDISK, instance,
2024                "couldn't retrieve status for disk/%s on %s: %s",
2025                idx, nname, bdev_status)
2026       _ErrorIf((instanceconfig.admin_up and success and
2027                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2028                constants.CV_EINSTANCEFAULTYDISK, instance,
2029                "disk/%s on %s is faulty", idx, nname)
2030
2031   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2032     """Verify if there are any unknown volumes in the cluster.
2033
2034     The .os, .swap and backup volumes are ignored. All other volumes are
2035     reported as unknown.
2036
2037     @type reserved: L{ganeti.utils.FieldSet}
2038     @param reserved: a FieldSet of reserved volume names
2039
2040     """
2041     for node, n_img in node_image.items():
2042       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2043         # skip non-healthy nodes
2044         continue
2045       for volume in n_img.volumes:
2046         test = ((node not in node_vol_should or
2047                 volume not in node_vol_should[node]) and
2048                 not reserved.Matches(volume))
2049         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2050                       "volume %s is unknown", volume)
2051
2052   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2053     """Verify N+1 Memory Resilience.
2054
2055     Check that if one single node dies we can still start all the
2056     instances it was primary for.
2057
2058     """
2059     cluster_info = self.cfg.GetClusterInfo()
2060     for node, n_img in node_image.items():
2061       # This code checks that every node which is now listed as
2062       # secondary has enough memory to host all instances it is
2063       # supposed to should a single other node in the cluster fail.
2064       # FIXME: not ready for failover to an arbitrary node
2065       # FIXME: does not support file-backed instances
2066       # WARNING: we currently take into account down instances as well
2067       # as up ones, considering that even if they're down someone
2068       # might want to start them even in the event of a node failure.
2069       if n_img.offline:
2070         # we're skipping offline nodes from the N+1 warning, since
2071         # most likely we don't have good memory infromation from them;
2072         # we already list instances living on such nodes, and that's
2073         # enough warning
2074         continue
2075       for prinode, instances in n_img.sbp.items():
2076         needed_mem = 0
2077         for instance in instances:
2078           bep = cluster_info.FillBE(instance_cfg[instance])
2079           if bep[constants.BE_AUTO_BALANCE]:
2080             needed_mem += bep[constants.BE_MEMORY]
2081         test = n_img.mfree < needed_mem
2082         self._ErrorIf(test, constants.CV_ENODEN1, node,
2083                       "not enough memory to accomodate instance failovers"
2084                       " should node %s fail (%dMiB needed, %dMiB available)",
2085                       prinode, needed_mem, n_img.mfree)
2086
2087   @classmethod
2088   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2089                    (files_all, files_opt, files_mc, files_vm)):
2090     """Verifies file checksums collected from all nodes.
2091
2092     @param errorif: Callback for reporting errors
2093     @param nodeinfo: List of L{objects.Node} objects
2094     @param master_node: Name of master node
2095     @param all_nvinfo: RPC results
2096
2097     """
2098     # Define functions determining which nodes to consider for a file
2099     files2nodefn = [
2100       (files_all, None),
2101       (files_mc, lambda node: (node.master_candidate or
2102                                node.name == master_node)),
2103       (files_vm, lambda node: node.vm_capable),
2104       ]
2105
2106     # Build mapping from filename to list of nodes which should have the file
2107     nodefiles = {}
2108     for (files, fn) in files2nodefn:
2109       if fn is None:
2110         filenodes = nodeinfo
2111       else:
2112         filenodes = filter(fn, nodeinfo)
2113       nodefiles.update((filename,
2114                         frozenset(map(operator.attrgetter("name"), filenodes)))
2115                        for filename in files)
2116
2117     assert set(nodefiles) == (files_all | files_mc | files_vm)
2118
2119     fileinfo = dict((filename, {}) for filename in nodefiles)
2120     ignore_nodes = set()
2121
2122     for node in nodeinfo:
2123       if node.offline:
2124         ignore_nodes.add(node.name)
2125         continue
2126
2127       nresult = all_nvinfo[node.name]
2128
2129       if nresult.fail_msg or not nresult.payload:
2130         node_files = None
2131       else:
2132         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2133
2134       test = not (node_files and isinstance(node_files, dict))
2135       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2136               "Node did not return file checksum data")
2137       if test:
2138         ignore_nodes.add(node.name)
2139         continue
2140
2141       # Build per-checksum mapping from filename to nodes having it
2142       for (filename, checksum) in node_files.items():
2143         assert filename in nodefiles
2144         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2145
2146     for (filename, checksums) in fileinfo.items():
2147       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2148
2149       # Nodes having the file
2150       with_file = frozenset(node_name
2151                             for nodes in fileinfo[filename].values()
2152                             for node_name in nodes) - ignore_nodes
2153
2154       expected_nodes = nodefiles[filename] - ignore_nodes
2155
2156       # Nodes missing file
2157       missing_file = expected_nodes - with_file
2158
2159       if filename in files_opt:
2160         # All or no nodes
2161         errorif(missing_file and missing_file != expected_nodes,
2162                 constants.CV_ECLUSTERFILECHECK, None,
2163                 "File %s is optional, but it must exist on all or no"
2164                 " nodes (not found on %s)",
2165                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2166       else:
2167         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2168                 "File %s is missing from node(s) %s", filename,
2169                 utils.CommaJoin(utils.NiceSort(missing_file)))
2170
2171         # Warn if a node has a file it shouldn't
2172         unexpected = with_file - expected_nodes
2173         errorif(unexpected,
2174                 constants.CV_ECLUSTERFILECHECK, None,
2175                 "File %s should not exist on node(s) %s",
2176                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2177
2178       # See if there are multiple versions of the file
2179       test = len(checksums) > 1
2180       if test:
2181         variants = ["variant %s on %s" %
2182                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2183                     for (idx, (checksum, nodes)) in
2184                       enumerate(sorted(checksums.items()))]
2185       else:
2186         variants = []
2187
2188       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2189               "File %s found with %s different checksums (%s)",
2190               filename, len(checksums), "; ".join(variants))
2191
2192   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2193                       drbd_map):
2194     """Verifies and the node DRBD status.
2195
2196     @type ninfo: L{objects.Node}
2197     @param ninfo: the node to check
2198     @param nresult: the remote results for the node
2199     @param instanceinfo: the dict of instances
2200     @param drbd_helper: the configured DRBD usermode helper
2201     @param drbd_map: the DRBD map as returned by
2202         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2203
2204     """
2205     node = ninfo.name
2206     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2207
2208     if drbd_helper:
2209       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2210       test = (helper_result == None)
2211       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2212                "no drbd usermode helper returned")
2213       if helper_result:
2214         status, payload = helper_result
2215         test = not status
2216         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2217                  "drbd usermode helper check unsuccessful: %s", payload)
2218         test = status and (payload != drbd_helper)
2219         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2220                  "wrong drbd usermode helper: %s", payload)
2221
2222     # compute the DRBD minors
2223     node_drbd = {}
2224     for minor, instance in drbd_map[node].items():
2225       test = instance not in instanceinfo
2226       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2227                "ghost instance '%s' in temporary DRBD map", instance)
2228         # ghost instance should not be running, but otherwise we
2229         # don't give double warnings (both ghost instance and
2230         # unallocated minor in use)
2231       if test:
2232         node_drbd[minor] = (instance, False)
2233       else:
2234         instance = instanceinfo[instance]
2235         node_drbd[minor] = (instance.name, instance.admin_up)
2236
2237     # and now check them
2238     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2239     test = not isinstance(used_minors, (tuple, list))
2240     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2241              "cannot parse drbd status file: %s", str(used_minors))
2242     if test:
2243       # we cannot check drbd status
2244       return
2245
2246     for minor, (iname, must_exist) in node_drbd.items():
2247       test = minor not in used_minors and must_exist
2248       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2249                "drbd minor %d of instance %s is not active", minor, iname)
2250     for minor in used_minors:
2251       test = minor not in node_drbd
2252       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2253                "unallocated drbd minor %d is in use", minor)
2254
2255   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2256     """Builds the node OS structures.
2257
2258     @type ninfo: L{objects.Node}
2259     @param ninfo: the node to check
2260     @param nresult: the remote results for the node
2261     @param nimg: the node image object
2262
2263     """
2264     node = ninfo.name
2265     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2266
2267     remote_os = nresult.get(constants.NV_OSLIST, None)
2268     test = (not isinstance(remote_os, list) or
2269             not compat.all(isinstance(v, list) and len(v) == 7
2270                            for v in remote_os))
2271
2272     _ErrorIf(test, constants.CV_ENODEOS, node,
2273              "node hasn't returned valid OS data")
2274
2275     nimg.os_fail = test
2276
2277     if test:
2278       return
2279
2280     os_dict = {}
2281
2282     for (name, os_path, status, diagnose,
2283          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2284
2285       if name not in os_dict:
2286         os_dict[name] = []
2287
2288       # parameters is a list of lists instead of list of tuples due to
2289       # JSON lacking a real tuple type, fix it:
2290       parameters = [tuple(v) for v in parameters]
2291       os_dict[name].append((os_path, status, diagnose,
2292                             set(variants), set(parameters), set(api_ver)))
2293
2294     nimg.oslist = os_dict
2295
2296   def _VerifyNodeOS(self, ninfo, nimg, base):
2297     """Verifies the node OS list.
2298
2299     @type ninfo: L{objects.Node}
2300     @param ninfo: the node to check
2301     @param nimg: the node image object
2302     @param base: the 'template' node we match against (e.g. from the master)
2303
2304     """
2305     node = ninfo.name
2306     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2307
2308     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2309
2310     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2311     for os_name, os_data in nimg.oslist.items():
2312       assert os_data, "Empty OS status for OS %s?!" % os_name
2313       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2314       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2315                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2316       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2317                "OS '%s' has multiple entries (first one shadows the rest): %s",
2318                os_name, utils.CommaJoin([v[0] for v in os_data]))
2319       # comparisons with the 'base' image
2320       test = os_name not in base.oslist
2321       _ErrorIf(test, constants.CV_ENODEOS, node,
2322                "Extra OS %s not present on reference node (%s)",
2323                os_name, base.name)
2324       if test:
2325         continue
2326       assert base.oslist[os_name], "Base node has empty OS status?"
2327       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2328       if not b_status:
2329         # base OS is invalid, skipping
2330         continue
2331       for kind, a, b in [("API version", f_api, b_api),
2332                          ("variants list", f_var, b_var),
2333                          ("parameters", beautify_params(f_param),
2334                           beautify_params(b_param))]:
2335         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2336                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2337                  kind, os_name, base.name,
2338                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2339
2340     # check any missing OSes
2341     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2342     _ErrorIf(missing, constants.CV_ENODEOS, node,
2343              "OSes present on reference node %s but missing on this node: %s",
2344              base.name, utils.CommaJoin(missing))
2345
2346   def _VerifyOob(self, ninfo, nresult):
2347     """Verifies out of band functionality of a node.
2348
2349     @type ninfo: L{objects.Node}
2350     @param ninfo: the node to check
2351     @param nresult: the remote results for the node
2352
2353     """
2354     node = ninfo.name
2355     # We just have to verify the paths on master and/or master candidates
2356     # as the oob helper is invoked on the master
2357     if ((ninfo.master_candidate or ninfo.master_capable) and
2358         constants.NV_OOB_PATHS in nresult):
2359       for path_result in nresult[constants.NV_OOB_PATHS]:
2360         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2361
2362   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2363     """Verifies and updates the node volume data.
2364
2365     This function will update a L{NodeImage}'s internal structures
2366     with data from the remote call.
2367
2368     @type ninfo: L{objects.Node}
2369     @param ninfo: the node to check
2370     @param nresult: the remote results for the node
2371     @param nimg: the node image object
2372     @param vg_name: the configured VG name
2373
2374     """
2375     node = ninfo.name
2376     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2377
2378     nimg.lvm_fail = True
2379     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2380     if vg_name is None:
2381       pass
2382     elif isinstance(lvdata, basestring):
2383       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2384                utils.SafeEncode(lvdata))
2385     elif not isinstance(lvdata, dict):
2386       _ErrorIf(True, constants.CV_ENODELVM, node,
2387                "rpc call to node failed (lvlist)")
2388     else:
2389       nimg.volumes = lvdata
2390       nimg.lvm_fail = False
2391
2392   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2393     """Verifies and updates the node instance list.
2394
2395     If the listing was successful, then updates this node's instance
2396     list. Otherwise, it marks the RPC call as failed for the instance
2397     list key.
2398
2399     @type ninfo: L{objects.Node}
2400     @param ninfo: the node to check
2401     @param nresult: the remote results for the node
2402     @param nimg: the node image object
2403
2404     """
2405     idata = nresult.get(constants.NV_INSTANCELIST, None)
2406     test = not isinstance(idata, list)
2407     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2408                   "rpc call to node failed (instancelist): %s",
2409                   utils.SafeEncode(str(idata)))
2410     if test:
2411       nimg.hyp_fail = True
2412     else:
2413       nimg.instances = idata
2414
2415   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2416     """Verifies and computes a node information map
2417
2418     @type ninfo: L{objects.Node}
2419     @param ninfo: the node to check
2420     @param nresult: the remote results for the node
2421     @param nimg: the node image object
2422     @param vg_name: the configured VG name
2423
2424     """
2425     node = ninfo.name
2426     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2427
2428     # try to read free memory (from the hypervisor)
2429     hv_info = nresult.get(constants.NV_HVINFO, None)
2430     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2431     _ErrorIf(test, constants.CV_ENODEHV, node,
2432              "rpc call to node failed (hvinfo)")
2433     if not test:
2434       try:
2435         nimg.mfree = int(hv_info["memory_free"])
2436       except (ValueError, TypeError):
2437         _ErrorIf(True, constants.CV_ENODERPC, node,
2438                  "node returned invalid nodeinfo, check hypervisor")
2439
2440     # FIXME: devise a free space model for file based instances as well
2441     if vg_name is not None:
2442       test = (constants.NV_VGLIST not in nresult or
2443               vg_name not in nresult[constants.NV_VGLIST])
2444       _ErrorIf(test, constants.CV_ENODELVM, node,
2445                "node didn't return data for the volume group '%s'"
2446                " - it is either missing or broken", vg_name)
2447       if not test:
2448         try:
2449           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2450         except (ValueError, TypeError):
2451           _ErrorIf(True, constants.CV_ENODERPC, node,
2452                    "node returned invalid LVM info, check LVM status")
2453
2454   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2455     """Gets per-disk status information for all instances.
2456
2457     @type nodelist: list of strings
2458     @param nodelist: Node names
2459     @type node_image: dict of (name, L{objects.Node})
2460     @param node_image: Node objects
2461     @type instanceinfo: dict of (name, L{objects.Instance})
2462     @param instanceinfo: Instance objects
2463     @rtype: {instance: {node: [(succes, payload)]}}
2464     @return: a dictionary of per-instance dictionaries with nodes as
2465         keys and disk information as values; the disk information is a
2466         list of tuples (success, payload)
2467
2468     """
2469     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2470
2471     node_disks = {}
2472     node_disks_devonly = {}
2473     diskless_instances = set()
2474     diskless = constants.DT_DISKLESS
2475
2476     for nname in nodelist:
2477       node_instances = list(itertools.chain(node_image[nname].pinst,
2478                                             node_image[nname].sinst))
2479       diskless_instances.update(inst for inst in node_instances
2480                                 if instanceinfo[inst].disk_template == diskless)
2481       disks = [(inst, disk)
2482                for inst in node_instances
2483                for disk in instanceinfo[inst].disks]
2484
2485       if not disks:
2486         # No need to collect data
2487         continue
2488
2489       node_disks[nname] = disks
2490
2491       # Creating copies as SetDiskID below will modify the objects and that can
2492       # lead to incorrect data returned from nodes
2493       devonly = [dev.Copy() for (_, dev) in disks]
2494
2495       for dev in devonly:
2496         self.cfg.SetDiskID(dev, nname)
2497
2498       node_disks_devonly[nname] = devonly
2499
2500     assert len(node_disks) == len(node_disks_devonly)
2501
2502     # Collect data from all nodes with disks
2503     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2504                                                           node_disks_devonly)
2505
2506     assert len(result) == len(node_disks)
2507
2508     instdisk = {}
2509
2510     for (nname, nres) in result.items():
2511       disks = node_disks[nname]
2512
2513       if nres.offline:
2514         # No data from this node
2515         data = len(disks) * [(False, "node offline")]
2516       else:
2517         msg = nres.fail_msg
2518         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2519                  "while getting disk information: %s", msg)
2520         if msg:
2521           # No data from this node
2522           data = len(disks) * [(False, msg)]
2523         else:
2524           data = []
2525           for idx, i in enumerate(nres.payload):
2526             if isinstance(i, (tuple, list)) and len(i) == 2:
2527               data.append(i)
2528             else:
2529               logging.warning("Invalid result from node %s, entry %d: %s",
2530                               nname, idx, i)
2531               data.append((False, "Invalid result from the remote node"))
2532
2533       for ((inst, _), status) in zip(disks, data):
2534         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2535
2536     # Add empty entries for diskless instances.
2537     for inst in diskless_instances:
2538       assert inst not in instdisk
2539       instdisk[inst] = {}
2540
2541     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2542                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2543                       compat.all(isinstance(s, (tuple, list)) and
2544                                  len(s) == 2 for s in statuses)
2545                       for inst, nnames in instdisk.items()
2546                       for nname, statuses in nnames.items())
2547     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2548
2549     return instdisk
2550
2551   @staticmethod
2552   def _SshNodeSelector(group_uuid, all_nodes):
2553     """Create endless iterators for all potential SSH check hosts.
2554
2555     """
2556     nodes = [node for node in all_nodes
2557              if (node.group != group_uuid and
2558                  not node.offline)]
2559     keyfunc = operator.attrgetter("group")
2560
2561     return map(itertools.cycle,
2562                [sorted(map(operator.attrgetter("name"), names))
2563                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2564                                                   keyfunc)])
2565
2566   @classmethod
2567   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2568     """Choose which nodes should talk to which other nodes.
2569
2570     We will make nodes contact all nodes in their group, and one node from
2571     every other group.
2572
2573     @warning: This algorithm has a known issue if one node group is much
2574       smaller than others (e.g. just one node). In such a case all other
2575       nodes will talk to the single node.
2576
2577     """
2578     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2579     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2580
2581     return (online_nodes,
2582             dict((name, sorted([i.next() for i in sel]))
2583                  for name in online_nodes))
2584
2585   def BuildHooksEnv(self):
2586     """Build hooks env.
2587
2588     Cluster-Verify hooks just ran in the post phase and their failure makes
2589     the output be logged in the verify output and the verification to fail.
2590
2591     """
2592     env = {
2593       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2594       }
2595
2596     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2597                for node in self.my_node_info.values())
2598
2599     return env
2600
2601   def BuildHooksNodes(self):
2602     """Build hooks nodes.
2603
2604     """
2605     return ([], self.my_node_names)
2606
2607   def Exec(self, feedback_fn):
2608     """Verify integrity of the node group, performing various test on nodes.
2609
2610     """
2611     # This method has too many local variables. pylint: disable=R0914
2612     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2613
2614     if not self.my_node_names:
2615       # empty node group
2616       feedback_fn("* Empty node group, skipping verification")
2617       return True
2618
2619     self.bad = False
2620     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2621     verbose = self.op.verbose
2622     self._feedback_fn = feedback_fn
2623
2624     vg_name = self.cfg.GetVGName()
2625     drbd_helper = self.cfg.GetDRBDHelper()
2626     cluster = self.cfg.GetClusterInfo()
2627     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2628     hypervisors = cluster.enabled_hypervisors
2629     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2630
2631     i_non_redundant = [] # Non redundant instances
2632     i_non_a_balanced = [] # Non auto-balanced instances
2633     n_offline = 0 # Count of offline nodes
2634     n_drained = 0 # Count of nodes being drained
2635     node_vol_should = {}
2636
2637     # FIXME: verify OS list
2638
2639     # File verification
2640     filemap = _ComputeAncillaryFiles(cluster, False)
2641
2642     # do local checksums
2643     master_node = self.master_node = self.cfg.GetMasterNode()
2644     master_ip = self.cfg.GetMasterIP()
2645
2646     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2647
2648     node_verify_param = {
2649       constants.NV_FILELIST:
2650         utils.UniqueSequence(filename
2651                              for files in filemap
2652                              for filename in files),
2653       constants.NV_NODELIST:
2654         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2655                                   self.all_node_info.values()),
2656       constants.NV_HYPERVISOR: hypervisors,
2657       constants.NV_HVPARAMS:
2658         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2659       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2660                                  for node in node_data_list
2661                                  if not node.offline],
2662       constants.NV_INSTANCELIST: hypervisors,
2663       constants.NV_VERSION: None,
2664       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2665       constants.NV_NODESETUP: None,
2666       constants.NV_TIME: None,
2667       constants.NV_MASTERIP: (master_node, master_ip),
2668       constants.NV_OSLIST: None,
2669       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2670       }
2671
2672     if vg_name is not None:
2673       node_verify_param[constants.NV_VGLIST] = None
2674       node_verify_param[constants.NV_LVLIST] = vg_name
2675       node_verify_param[constants.NV_PVLIST] = [vg_name]
2676       node_verify_param[constants.NV_DRBDLIST] = None
2677
2678     if drbd_helper:
2679       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2680
2681     # bridge checks
2682     # FIXME: this needs to be changed per node-group, not cluster-wide
2683     bridges = set()
2684     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2685     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2686       bridges.add(default_nicpp[constants.NIC_LINK])
2687     for instance in self.my_inst_info.values():
2688       for nic in instance.nics:
2689         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2690         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2691           bridges.add(full_nic[constants.NIC_LINK])
2692
2693     if bridges:
2694       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2695
2696     # Build our expected cluster state
2697     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2698                                                  name=node.name,
2699                                                  vm_capable=node.vm_capable))
2700                       for node in node_data_list)
2701
2702     # Gather OOB paths
2703     oob_paths = []
2704     for node in self.all_node_info.values():
2705       path = _SupportsOob(self.cfg, node)
2706       if path and path not in oob_paths:
2707         oob_paths.append(path)
2708
2709     if oob_paths:
2710       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2711
2712     for instance in self.my_inst_names:
2713       inst_config = self.my_inst_info[instance]
2714
2715       for nname in inst_config.all_nodes:
2716         if nname not in node_image:
2717           gnode = self.NodeImage(name=nname)
2718           gnode.ghost = (nname not in self.all_node_info)
2719           node_image[nname] = gnode
2720
2721       inst_config.MapLVsByNode(node_vol_should)
2722
2723       pnode = inst_config.primary_node
2724       node_image[pnode].pinst.append(instance)
2725
2726       for snode in inst_config.secondary_nodes:
2727         nimg = node_image[snode]
2728         nimg.sinst.append(instance)
2729         if pnode not in nimg.sbp:
2730           nimg.sbp[pnode] = []
2731         nimg.sbp[pnode].append(instance)
2732
2733     # At this point, we have the in-memory data structures complete,
2734     # except for the runtime information, which we'll gather next
2735
2736     # Due to the way our RPC system works, exact response times cannot be
2737     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2738     # time before and after executing the request, we can at least have a time
2739     # window.
2740     nvinfo_starttime = time.time()
2741     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2742                                            node_verify_param,
2743                                            self.cfg.GetClusterName())
2744     nvinfo_endtime = time.time()
2745
2746     if self.extra_lv_nodes and vg_name is not None:
2747       extra_lv_nvinfo = \
2748           self.rpc.call_node_verify(self.extra_lv_nodes,
2749                                     {constants.NV_LVLIST: vg_name},
2750                                     self.cfg.GetClusterName())
2751     else:
2752       extra_lv_nvinfo = {}
2753
2754     all_drbd_map = self.cfg.ComputeDRBDMap()
2755
2756     feedback_fn("* Gathering disk information (%s nodes)" %
2757                 len(self.my_node_names))
2758     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2759                                      self.my_inst_info)
2760
2761     feedback_fn("* Verifying configuration file consistency")
2762
2763     # If not all nodes are being checked, we need to make sure the master node
2764     # and a non-checked vm_capable node are in the list.
2765     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2766     if absent_nodes:
2767       vf_nvinfo = all_nvinfo.copy()
2768       vf_node_info = list(self.my_node_info.values())
2769       additional_nodes = []
2770       if master_node not in self.my_node_info:
2771         additional_nodes.append(master_node)
2772         vf_node_info.append(self.all_node_info[master_node])
2773       # Add the first vm_capable node we find which is not included
2774       for node in absent_nodes:
2775         nodeinfo = self.all_node_info[node]
2776         if nodeinfo.vm_capable and not nodeinfo.offline:
2777           additional_nodes.append(node)
2778           vf_node_info.append(self.all_node_info[node])
2779           break
2780       key = constants.NV_FILELIST
2781       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2782                                                  {key: node_verify_param[key]},
2783                                                  self.cfg.GetClusterName()))
2784     else:
2785       vf_nvinfo = all_nvinfo
2786       vf_node_info = self.my_node_info.values()
2787
2788     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2789
2790     feedback_fn("* Verifying node status")
2791
2792     refos_img = None
2793
2794     for node_i in node_data_list:
2795       node = node_i.name
2796       nimg = node_image[node]
2797
2798       if node_i.offline:
2799         if verbose:
2800           feedback_fn("* Skipping offline node %s" % (node,))
2801         n_offline += 1
2802         continue
2803
2804       if node == master_node:
2805         ntype = "master"
2806       elif node_i.master_candidate:
2807         ntype = "master candidate"
2808       elif node_i.drained:
2809         ntype = "drained"
2810         n_drained += 1
2811       else:
2812         ntype = "regular"
2813       if verbose:
2814         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2815
2816       msg = all_nvinfo[node].fail_msg
2817       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2818                msg)
2819       if msg:
2820         nimg.rpc_fail = True
2821         continue
2822
2823       nresult = all_nvinfo[node].payload
2824
2825       nimg.call_ok = self._VerifyNode(node_i, nresult)
2826       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2827       self._VerifyNodeNetwork(node_i, nresult)
2828       self._VerifyOob(node_i, nresult)
2829
2830       if nimg.vm_capable:
2831         self._VerifyNodeLVM(node_i, nresult, vg_name)
2832         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2833                              all_drbd_map)
2834
2835         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2836         self._UpdateNodeInstances(node_i, nresult, nimg)
2837         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2838         self._UpdateNodeOS(node_i, nresult, nimg)
2839
2840         if not nimg.os_fail:
2841           if refos_img is None:
2842             refos_img = nimg
2843           self._VerifyNodeOS(node_i, nimg, refos_img)
2844         self._VerifyNodeBridges(node_i, nresult, bridges)
2845
2846         # Check whether all running instancies are primary for the node. (This
2847         # can no longer be done from _VerifyInstance below, since some of the
2848         # wrong instances could be from other node groups.)
2849         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2850
2851         for inst in non_primary_inst:
2852           test = inst in self.all_inst_info
2853           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2854                    "instance should not run on node %s", node_i.name)
2855           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2856                    "node is running unknown instance %s", inst)
2857
2858     for node, result in extra_lv_nvinfo.items():
2859       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2860                               node_image[node], vg_name)
2861
2862     feedback_fn("* Verifying instance status")
2863     for instance in self.my_inst_names:
2864       if verbose:
2865         feedback_fn("* Verifying instance %s" % instance)
2866       inst_config = self.my_inst_info[instance]
2867       self._VerifyInstance(instance, inst_config, node_image,
2868                            instdisk[instance])
2869       inst_nodes_offline = []
2870
2871       pnode = inst_config.primary_node
2872       pnode_img = node_image[pnode]
2873       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2874                constants.CV_ENODERPC, pnode, "instance %s, connection to"
2875                " primary node failed", instance)
2876
2877       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2878                constants.CV_EINSTANCEBADNODE, instance,
2879                "instance is marked as running and lives on offline node %s",
2880                inst_config.primary_node)
2881
2882       # If the instance is non-redundant we cannot survive losing its primary
2883       # node, so we are not N+1 compliant. On the other hand we have no disk
2884       # templates with more than one secondary so that situation is not well
2885       # supported either.
2886       # FIXME: does not support file-backed instances
2887       if not inst_config.secondary_nodes:
2888         i_non_redundant.append(instance)
2889
2890       _ErrorIf(len(inst_config.secondary_nodes) > 1,
2891                constants.CV_EINSTANCELAYOUT,
2892                instance, "instance has multiple secondary nodes: %s",
2893                utils.CommaJoin(inst_config.secondary_nodes),
2894                code=self.ETYPE_WARNING)
2895
2896       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2897         pnode = inst_config.primary_node
2898         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2899         instance_groups = {}
2900
2901         for node in instance_nodes:
2902           instance_groups.setdefault(self.all_node_info[node].group,
2903                                      []).append(node)
2904
2905         pretty_list = [
2906           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2907           # Sort so that we always list the primary node first.
2908           for group, nodes in sorted(instance_groups.items(),
2909                                      key=lambda (_, nodes): pnode in nodes,
2910                                      reverse=True)]
2911
2912         self._ErrorIf(len(instance_groups) > 1,
2913                       constants.CV_EINSTANCESPLITGROUPS,
2914                       instance, "instance has primary and secondary nodes in"
2915                       " different groups: %s", utils.CommaJoin(pretty_list),
2916                       code=self.ETYPE_WARNING)
2917
2918       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2919         i_non_a_balanced.append(instance)
2920
2921       for snode in inst_config.secondary_nodes:
2922         s_img = node_image[snode]
2923         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2924                  snode, "instance %s, connection to secondary node failed",
2925                  instance)
2926
2927         if s_img.offline:
2928           inst_nodes_offline.append(snode)
2929
2930       # warn that the instance lives on offline nodes
2931       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2932                "instance has offline secondary node(s) %s",
2933                utils.CommaJoin(inst_nodes_offline))
2934       # ... or ghost/non-vm_capable nodes
2935       for node in inst_config.all_nodes:
2936         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2937                  instance, "instance lives on ghost node %s", node)
2938         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2939                  instance, "instance lives on non-vm_capable node %s", node)
2940
2941     feedback_fn("* Verifying orphan volumes")
2942     reserved = utils.FieldSet(*cluster.reserved_lvs)
2943
2944     # We will get spurious "unknown volume" warnings if any node of this group
2945     # is secondary for an instance whose primary is in another group. To avoid
2946     # them, we find these instances and add their volumes to node_vol_should.
2947     for inst in self.all_inst_info.values():
2948       for secondary in inst.secondary_nodes:
2949         if (secondary in self.my_node_info
2950             and inst.name not in self.my_inst_info):
2951           inst.MapLVsByNode(node_vol_should)
2952           break
2953
2954     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2955
2956     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2957       feedback_fn("* Verifying N+1 Memory redundancy")
2958       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2959
2960     feedback_fn("* Other Notes")
2961     if i_non_redundant:
2962       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2963                   % len(i_non_redundant))
2964
2965     if i_non_a_balanced:
2966       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2967                   % len(i_non_a_balanced))
2968
2969     if n_offline:
2970       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2971
2972     if n_drained:
2973       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2974
2975     return not self.bad
2976
2977   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2978     """Analyze the post-hooks' result
2979
2980     This method analyses the hook result, handles it, and sends some
2981     nicely-formatted feedback back to the user.
2982
2983     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2984         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2985     @param hooks_results: the results of the multi-node hooks rpc call
2986     @param feedback_fn: function used send feedback back to the caller
2987     @param lu_result: previous Exec result
2988     @return: the new Exec result, based on the previous result
2989         and hook results
2990
2991     """
2992     # We only really run POST phase hooks, only for non-empty groups,
2993     # and are only interested in their results
2994     if not self.my_node_names:
2995       # empty node group
2996       pass
2997     elif phase == constants.HOOKS_PHASE_POST:
2998       # Used to change hooks' output to proper indentation
2999       feedback_fn("* Hooks Results")
3000       assert hooks_results, "invalid result from hooks"
3001
3002       for node_name in hooks_results:
3003         res = hooks_results[node_name]
3004         msg = res.fail_msg
3005         test = msg and not res.offline
3006         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3007                       "Communication failure in hooks execution: %s", msg)
3008         if res.offline or msg:
3009           # No need to investigate payload if node is offline or gave
3010           # an error.
3011           continue
3012         for script, hkr, output in res.payload:
3013           test = hkr == constants.HKR_FAIL
3014           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3015                         "Script %s failed, output:", script)
3016           if test:
3017             output = self._HOOKS_INDENT_RE.sub("      ", output)
3018             feedback_fn("%s" % output)
3019             lu_result = False
3020
3021     return lu_result
3022
3023
3024 class LUClusterVerifyDisks(NoHooksLU):
3025   """Verifies the cluster disks status.
3026
3027   """
3028   REQ_BGL = False
3029
3030   def ExpandNames(self):
3031     self.share_locks = _ShareAll()
3032     self.needed_locks = {
3033       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3034       }
3035
3036   def Exec(self, feedback_fn):
3037     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3038
3039     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3040     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3041                            for group in group_names])
3042
3043
3044 class LUGroupVerifyDisks(NoHooksLU):
3045   """Verifies the status of all disks in a node group.
3046
3047   """
3048   REQ_BGL = False
3049
3050   def ExpandNames(self):
3051     # Raises errors.OpPrereqError on its own if group can't be found
3052     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3053
3054     self.share_locks = _ShareAll()
3055     self.needed_locks = {
3056       locking.LEVEL_INSTANCE: [],
3057       locking.LEVEL_NODEGROUP: [],
3058       locking.LEVEL_NODE: [],
3059       }
3060
3061   def DeclareLocks(self, level):
3062     if level == locking.LEVEL_INSTANCE:
3063       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3064
3065       # Lock instances optimistically, needs verification once node and group
3066       # locks have been acquired
3067       self.needed_locks[locking.LEVEL_INSTANCE] = \
3068         self.cfg.GetNodeGroupInstances(self.group_uuid)
3069
3070     elif level == locking.LEVEL_NODEGROUP:
3071       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3072
3073       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3074         set([self.group_uuid] +
3075             # Lock all groups used by instances optimistically; this requires
3076             # going via the node before it's locked, requiring verification
3077             # later on
3078             [group_uuid
3079              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3080              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3081
3082     elif level == locking.LEVEL_NODE:
3083       # This will only lock the nodes in the group to be verified which contain
3084       # actual instances
3085       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3086       self._LockInstancesNodes()
3087
3088       # Lock all nodes in group to be verified
3089       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3090       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3091       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3092
3093   def CheckPrereq(self):
3094     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3095     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3096     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3097
3098     assert self.group_uuid in owned_groups
3099
3100     # Check if locked instances are still correct
3101     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3102
3103     # Get instance information
3104     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3105
3106     # Check if node groups for locked instances are still correct
3107     for (instance_name, inst) in self.instances.items():
3108       assert owned_nodes.issuperset(inst.all_nodes), \
3109         "Instance %s's nodes changed while we kept the lock" % instance_name
3110
3111       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3112                                              owned_groups)
3113
3114       assert self.group_uuid in inst_groups, \
3115         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3116
3117   def Exec(self, feedback_fn):
3118     """Verify integrity of cluster disks.
3119
3120     @rtype: tuple of three items
3121     @return: a tuple of (dict of node-to-node_error, list of instances
3122         which need activate-disks, dict of instance: (node, volume) for
3123         missing volumes
3124
3125     """
3126     res_nodes = {}
3127     res_instances = set()
3128     res_missing = {}
3129
3130     nv_dict = _MapInstanceDisksToNodes([inst
3131                                         for inst in self.instances.values()
3132                                         if inst.admin_up])
3133
3134     if nv_dict:
3135       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3136                              set(self.cfg.GetVmCapableNodeList()))
3137
3138       node_lvs = self.rpc.call_lv_list(nodes, [])
3139
3140       for (node, node_res) in node_lvs.items():
3141         if node_res.offline:
3142           continue
3143
3144         msg = node_res.fail_msg
3145         if msg:
3146           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3147           res_nodes[node] = msg
3148           continue
3149
3150         for lv_name, (_, _, lv_online) in node_res.payload.items():
3151           inst = nv_dict.pop((node, lv_name), None)
3152           if not (lv_online or inst is None):
3153             res_instances.add(inst)
3154
3155       # any leftover items in nv_dict are missing LVs, let's arrange the data
3156       # better
3157       for key, inst in nv_dict.iteritems():
3158         res_missing.setdefault(inst, []).append(list(key))
3159
3160     return (res_nodes, list(res_instances), res_missing)
3161
3162
3163 class LUClusterRepairDiskSizes(NoHooksLU):
3164   """Verifies the cluster disks sizes.
3165
3166   """
3167   REQ_BGL = False
3168
3169   def ExpandNames(self):
3170     if self.op.instances:
3171       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3172       self.needed_locks = {
3173         locking.LEVEL_NODE: [],
3174         locking.LEVEL_INSTANCE: self.wanted_names,
3175         }
3176       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3177     else:
3178       self.wanted_names = None
3179       self.needed_locks = {
3180         locking.LEVEL_NODE: locking.ALL_SET,
3181         locking.LEVEL_INSTANCE: locking.ALL_SET,
3182         }
3183     self.share_locks = _ShareAll()
3184
3185   def DeclareLocks(self, level):
3186     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3187       self._LockInstancesNodes(primary_only=True)
3188
3189   def CheckPrereq(self):
3190     """Check prerequisites.
3191
3192     This only checks the optional instance list against the existing names.
3193
3194     """
3195     if self.wanted_names is None:
3196       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3197
3198     self.wanted_instances = \
3199         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3200
3201   def _EnsureChildSizes(self, disk):
3202     """Ensure children of the disk have the needed disk size.
3203
3204     This is valid mainly for DRBD8 and fixes an issue where the
3205     children have smaller disk size.
3206
3207     @param disk: an L{ganeti.objects.Disk} object
3208
3209     """
3210     if disk.dev_type == constants.LD_DRBD8:
3211       assert disk.children, "Empty children for DRBD8?"
3212       fchild = disk.children[0]
3213       mismatch = fchild.size < disk.size
3214       if mismatch:
3215         self.LogInfo("Child disk has size %d, parent %d, fixing",
3216                      fchild.size, disk.size)
3217         fchild.size = disk.size
3218
3219       # and we recurse on this child only, not on the metadev
3220       return self._EnsureChildSizes(fchild) or mismatch
3221     else:
3222       return False
3223
3224   def Exec(self, feedback_fn):
3225     """Verify the size of cluster disks.
3226
3227     """
3228     # TODO: check child disks too
3229     # TODO: check differences in size between primary/secondary nodes
3230     per_node_disks = {}
3231     for instance in self.wanted_instances:
3232       pnode = instance.primary_node
3233       if pnode not in per_node_disks:
3234         per_node_disks[pnode] = []
3235       for idx, disk in enumerate(instance.disks):
3236         per_node_disks[pnode].append((instance, idx, disk))
3237
3238     changed = []
3239     for node, dskl in per_node_disks.items():
3240       newl = [v[2].Copy() for v in dskl]
3241       for dsk in newl:
3242         self.cfg.SetDiskID(dsk, node)
3243       result = self.rpc.call_blockdev_getsize(node, newl)
3244       if result.fail_msg:
3245         self.LogWarning("Failure in blockdev_getsize call to node"
3246                         " %s, ignoring", node)
3247         continue
3248       if len(result.payload) != len(dskl):
3249         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3250                         " result.payload=%s", node, len(dskl), result.payload)
3251         self.LogWarning("Invalid result from node %s, ignoring node results",
3252                         node)
3253         continue
3254       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3255         if size is None:
3256           self.LogWarning("Disk %d of instance %s did not return size"
3257                           " information, ignoring", idx, instance.name)
3258           continue
3259         if not isinstance(size, (int, long)):
3260           self.LogWarning("Disk %d of instance %s did not return valid"
3261                           " size information, ignoring", idx, instance.name)
3262           continue
3263         size = size >> 20
3264         if size != disk.size:
3265           self.LogInfo("Disk %d of instance %s has mismatched size,"
3266                        " correcting: recorded %d, actual %d", idx,
3267                        instance.name, disk.size, size)
3268           disk.size = size
3269           self.cfg.Update(instance, feedback_fn)
3270           changed.append((instance.name, idx, size))
3271         if self._EnsureChildSizes(disk):
3272           self.cfg.Update(instance, feedback_fn)
3273           changed.append((instance.name, idx, disk.size))
3274     return changed
3275
3276
3277 class LUClusterRename(LogicalUnit):
3278   """Rename the cluster.
3279
3280   """
3281   HPATH = "cluster-rename"
3282   HTYPE = constants.HTYPE_CLUSTER
3283
3284   def BuildHooksEnv(self):
3285     """Build hooks env.
3286
3287     """
3288     return {
3289       "OP_TARGET": self.cfg.GetClusterName(),
3290       "NEW_NAME": self.op.name,
3291       }
3292
3293   def BuildHooksNodes(self):
3294     """Build hooks nodes.
3295
3296     """
3297     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3298
3299   def CheckPrereq(self):
3300     """Verify that the passed name is a valid one.
3301
3302     """
3303     hostname = netutils.GetHostname(name=self.op.name,
3304                                     family=self.cfg.GetPrimaryIPFamily())
3305
3306     new_name = hostname.name
3307     self.ip = new_ip = hostname.ip
3308     old_name = self.cfg.GetClusterName()
3309     old_ip = self.cfg.GetMasterIP()
3310     if new_name == old_name and new_ip == old_ip:
3311       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3312                                  " cluster has changed",
3313                                  errors.ECODE_INVAL)
3314     if new_ip != old_ip:
3315       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3316         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3317                                    " reachable on the network" %
3318                                    new_ip, errors.ECODE_NOTUNIQUE)
3319
3320     self.op.name = new_name
3321
3322   def Exec(self, feedback_fn):
3323     """Rename the cluster.
3324
3325     """
3326     clustername = self.op.name
3327     new_ip = self.ip
3328
3329     # shutdown the master IP
3330     (master, ip, dev, netmask, family) = self.cfg.GetMasterNetworkParameters()
3331     result = self.rpc.call_node_deactivate_master_ip(master, ip, netmask, dev,
3332                                                      family)
3333     result.Raise("Could not disable the master role")
3334
3335     try:
3336       cluster = self.cfg.GetClusterInfo()
3337       cluster.cluster_name = clustername
3338       cluster.master_ip = new_ip
3339       self.cfg.Update(cluster, feedback_fn)
3340
3341       # update the known hosts file
3342       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3343       node_list = self.cfg.GetOnlineNodeList()
3344       try:
3345         node_list.remove(master)
3346       except ValueError:
3347         pass
3348       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3349     finally:
3350       result = self.rpc.call_node_activate_master_ip(master, new_ip, netmask,
3351                                                      dev, family)
3352       msg = result.fail_msg
3353       if msg:
3354         self.LogWarning("Could not re-enable the master role on"
3355                         " the master, please restart manually: %s", msg)
3356
3357     return clustername
3358
3359
3360 def _ValidateNetmask(cfg, netmask):
3361   """Checks if a netmask is valid.
3362
3363   @type cfg: L{config.ConfigWriter}
3364   @param cfg: The cluster configuration
3365   @type netmask: int
3366   @param netmask: the netmask to be verified
3367   @raise errors.OpPrereqError: if the validation fails
3368
3369   """
3370   ip_family = cfg.GetPrimaryIPFamily()
3371   try:
3372     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3373   except errors.ProgrammerError:
3374     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3375                                ip_family)
3376   if not ipcls.ValidateNetmask(netmask):
3377     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3378                                 (netmask))
3379
3380
3381 class LUClusterSetParams(LogicalUnit):
3382   """Change the parameters of the cluster.
3383
3384   """
3385   HPATH = "cluster-modify"
3386   HTYPE = constants.HTYPE_CLUSTER
3387   REQ_BGL = False
3388
3389   def CheckArguments(self):
3390     """Check parameters
3391
3392     """
3393     if self.op.uid_pool:
3394       uidpool.CheckUidPool(self.op.uid_pool)
3395
3396     if self.op.add_uids:
3397       uidpool.CheckUidPool(self.op.add_uids)
3398
3399     if self.op.remove_uids:
3400       uidpool.CheckUidPool(self.op.remove_uids)
3401
3402     if self.op.master_netmask is not None:
3403       _ValidateNetmask(self.cfg, self.op.master_netmask)
3404
3405   def ExpandNames(self):
3406     # FIXME: in the future maybe other cluster params won't require checking on
3407     # all nodes to be modified.
3408     self.needed_locks = {
3409       locking.LEVEL_NODE: locking.ALL_SET,
3410     }
3411     self.share_locks[locking.LEVEL_NODE] = 1
3412
3413   def BuildHooksEnv(self):
3414     """Build hooks env.
3415
3416     """
3417     return {
3418       "OP_TARGET": self.cfg.GetClusterName(),
3419       "NEW_VG_NAME": self.op.vg_name,
3420       }
3421
3422   def BuildHooksNodes(self):
3423     """Build hooks nodes.
3424
3425     """
3426     mn = self.cfg.GetMasterNode()
3427     return ([mn], [mn])
3428
3429   def CheckPrereq(self):
3430     """Check prerequisites.
3431
3432     This checks whether the given params don't conflict and
3433     if the given volume group is valid.
3434
3435     """
3436     if self.op.vg_name is not None and not self.op.vg_name:
3437       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3438         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3439                                    " instances exist", errors.ECODE_INVAL)
3440
3441     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3442       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3443         raise errors.OpPrereqError("Cannot disable drbd helper while"
3444                                    " drbd-based instances exist",
3445                                    errors.ECODE_INVAL)
3446
3447     node_list = self.owned_locks(locking.LEVEL_NODE)
3448
3449     # if vg_name not None, checks given volume group on all nodes
3450     if self.op.vg_name:
3451       vglist = self.rpc.call_vg_list(node_list)
3452       for node in node_list:
3453         msg = vglist[node].fail_msg
3454         if msg:
3455           # ignoring down node
3456           self.LogWarning("Error while gathering data on node %s"
3457                           " (ignoring node): %s", node, msg)
3458           continue
3459         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3460                                               self.op.vg_name,
3461                                               constants.MIN_VG_SIZE)
3462         if vgstatus:
3463           raise errors.OpPrereqError("Error on node '%s': %s" %
3464                                      (node, vgstatus), errors.ECODE_ENVIRON)
3465
3466     if self.op.drbd_helper:
3467       # checks given drbd helper on all nodes
3468       helpers = self.rpc.call_drbd_helper(node_list)
3469       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3470         if ninfo.offline:
3471           self.LogInfo("Not checking drbd helper on offline node %s", node)
3472           continue
3473         msg = helpers[node].fail_msg
3474         if msg:
3475           raise errors.OpPrereqError("Error checking drbd helper on node"
3476                                      " '%s': %s" % (node, msg),
3477                                      errors.ECODE_ENVIRON)
3478         node_helper = helpers[node].payload
3479         if node_helper != self.op.drbd_helper:
3480           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3481                                      (node, node_helper), errors.ECODE_ENVIRON)
3482
3483     self.cluster = cluster = self.cfg.GetClusterInfo()
3484     # validate params changes
3485     if self.op.beparams:
3486       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3487       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3488
3489     if self.op.ndparams:
3490       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3491       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3492
3493       # TODO: we need a more general way to handle resetting
3494       # cluster-level parameters to default values
3495       if self.new_ndparams["oob_program"] == "":
3496         self.new_ndparams["oob_program"] = \
3497             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3498
3499     if self.op.nicparams:
3500       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3501       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3502       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3503       nic_errors = []
3504
3505       # check all instances for consistency
3506       for instance in self.cfg.GetAllInstancesInfo().values():
3507         for nic_idx, nic in enumerate(instance.nics):
3508           params_copy = copy.deepcopy(nic.nicparams)
3509           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3510
3511           # check parameter syntax
3512           try:
3513             objects.NIC.CheckParameterSyntax(params_filled)
3514           except errors.ConfigurationError, err:
3515             nic_errors.append("Instance %s, nic/%d: %s" %
3516                               (instance.name, nic_idx, err))
3517
3518           # if we're moving instances to routed, check that they have an ip
3519           target_mode = params_filled[constants.NIC_MODE]
3520           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3521             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3522                               " address" % (instance.name, nic_idx))
3523       if nic_errors:
3524         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3525                                    "\n".join(nic_errors))
3526
3527     # hypervisor list/parameters
3528     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3529     if self.op.hvparams:
3530       for hv_name, hv_dict in self.op.hvparams.items():
3531         if hv_name not in self.new_hvparams:
3532           self.new_hvparams[hv_name] = hv_dict
3533         else:
3534           self.new_hvparams[hv_name].update(hv_dict)
3535
3536     # os hypervisor parameters
3537     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3538     if self.op.os_hvp:
3539       for os_name, hvs in self.op.os_hvp.items():
3540         if os_name not in self.new_os_hvp:
3541           self.new_os_hvp[os_name] = hvs
3542         else:
3543           for hv_name, hv_dict in hvs.items():
3544             if hv_name not in self.new_os_hvp[os_name]:
3545               self.new_os_hvp[os_name][hv_name] = hv_dict
3546             else:
3547               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3548
3549     # os parameters
3550     self.new_osp = objects.FillDict(cluster.osparams, {})
3551     if self.op.osparams:
3552       for os_name, osp in self.op.osparams.items():
3553         if os_name not in self.new_osp:
3554           self.new_osp[os_name] = {}
3555
3556         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3557                                                   use_none=True)
3558
3559         if not self.new_osp[os_name]:
3560           # we removed all parameters
3561           del self.new_osp[os_name]
3562         else:
3563           # check the parameter validity (remote check)
3564           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3565                          os_name, self.new_osp[os_name])
3566
3567     # changes to the hypervisor list
3568     if self.op.enabled_hypervisors is not None:
3569       self.hv_list = self.op.enabled_hypervisors
3570       for hv in self.hv_list:
3571         # if the hypervisor doesn't already exist in the cluster
3572         # hvparams, we initialize it to empty, and then (in both
3573         # cases) we make sure to fill the defaults, as we might not
3574         # have a complete defaults list if the hypervisor wasn't
3575         # enabled before
3576         if hv not in new_hvp:
3577           new_hvp[hv] = {}
3578         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3579         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3580     else:
3581       self.hv_list = cluster.enabled_hypervisors
3582
3583     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3584       # either the enabled list has changed, or the parameters have, validate
3585       for hv_name, hv_params in self.new_hvparams.items():
3586         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3587             (self.op.enabled_hypervisors and
3588              hv_name in self.op.enabled_hypervisors)):
3589           # either this is a new hypervisor, or its parameters have changed
3590           hv_class = hypervisor.GetHypervisor(hv_name)
3591           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3592           hv_class.CheckParameterSyntax(hv_params)
3593           _CheckHVParams(self, node_list, hv_name, hv_params)
3594
3595     if self.op.os_hvp:
3596       # no need to check any newly-enabled hypervisors, since the
3597       # defaults have already been checked in the above code-block
3598       for os_name, os_hvp in self.new_os_hvp.items():
3599         for hv_name, hv_params in os_hvp.items():
3600           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3601           # we need to fill in the new os_hvp on top of the actual hv_p
3602           cluster_defaults = self.new_hvparams.get(hv_name, {})
3603           new_osp = objects.FillDict(cluster_defaults, hv_params)
3604           hv_class = hypervisor.GetHypervisor(hv_name)
3605           hv_class.CheckParameterSyntax(new_osp)
3606           _CheckHVParams(self, node_list, hv_name, new_osp)
3607
3608     if self.op.default_iallocator:
3609       alloc_script = utils.FindFile(self.op.default_iallocator,
3610                                     constants.IALLOCATOR_SEARCH_PATH,
3611                                     os.path.isfile)
3612       if alloc_script is None:
3613         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3614                                    " specified" % self.op.default_iallocator,
3615                                    errors.ECODE_INVAL)
3616
3617   def Exec(self, feedback_fn):
3618     """Change the parameters of the cluster.
3619
3620     """
3621     if self.op.vg_name is not None:
3622       new_volume = self.op.vg_name
3623       if not new_volume:
3624         new_volume = None
3625       if new_volume != self.cfg.GetVGName():
3626         self.cfg.SetVGName(new_volume)
3627       else:
3628         feedback_fn("Cluster LVM configuration already in desired"
3629                     " state, not changing")
3630     if self.op.drbd_helper is not None:
3631       new_helper = self.op.drbd_helper
3632       if not new_helper:
3633         new_helper = None
3634       if new_helper != self.cfg.GetDRBDHelper():
3635         self.cfg.SetDRBDHelper(new_helper)
3636       else:
3637         feedback_fn("Cluster DRBD helper already in desired state,"
3638                     " not changing")
3639     if self.op.hvparams:
3640       self.cluster.hvparams = self.new_hvparams
3641     if self.op.os_hvp:
3642       self.cluster.os_hvp = self.new_os_hvp
3643     if self.op.enabled_hypervisors is not None:
3644       self.cluster.hvparams = self.new_hvparams
3645       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3646     if self.op.beparams:
3647       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3648     if self.op.nicparams:
3649       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3650     if self.op.osparams:
3651       self.cluster.osparams = self.new_osp
3652     if self.op.ndparams:
3653       self.cluster.ndparams = self.new_ndparams
3654
3655     if self.op.candidate_pool_size is not None:
3656       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3657       # we need to update the pool size here, otherwise the save will fail
3658       _AdjustCandidatePool(self, [])
3659
3660     if self.op.maintain_node_health is not None:
3661       self.cluster.maintain_node_health = self.op.maintain_node_health
3662
3663     if self.op.prealloc_wipe_disks is not None:
3664       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3665
3666     if self.op.add_uids is not None:
3667       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3668
3669     if self.op.remove_uids is not None:
3670       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3671
3672     if self.op.uid_pool is not None:
3673       self.cluster.uid_pool = self.op.uid_pool
3674
3675     if self.op.default_iallocator is not None:
3676       self.cluster.default_iallocator = self.op.default_iallocator
3677
3678     if self.op.reserved_lvs is not None:
3679       self.cluster.reserved_lvs = self.op.reserved_lvs
3680
3681     def helper_os(aname, mods, desc):
3682       desc += " OS list"
3683       lst = getattr(self.cluster, aname)
3684       for key, val in mods:
3685         if key == constants.DDM_ADD:
3686           if val in lst:
3687             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3688           else:
3689             lst.append(val)
3690         elif key == constants.DDM_REMOVE:
3691           if val in lst:
3692             lst.remove(val)
3693           else:
3694             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3695         else:
3696           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3697
3698     if self.op.hidden_os:
3699       helper_os("hidden_os", self.op.hidden_os, "hidden")
3700
3701     if self.op.blacklisted_os:
3702       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3703
3704     if self.op.master_netdev:
3705       (master, ip, dev, netmask, family) = self.cfg.GetMasterNetworkParameters()
3706       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3707                   self.cluster.master_netdev)
3708       result = self.rpc.call_node_deactivate_master_ip(master, ip, netmask, dev,
3709                                                        family)
3710       result.Raise("Could not disable the master ip")
3711       feedback_fn("Changing master_netdev from %s to %s" %
3712                   (dev, self.op.master_netdev))
3713       self.cluster.master_netdev = self.op.master_netdev
3714
3715     if self.op.master_netmask:
3716       (master, ip, dev, old_netmask, _) = self.cfg.GetMasterNetworkParameters()
3717       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3718       result = self.rpc.call_node_change_master_netmask(master, old_netmask,
3719                                                         self.op.master_netmask,
3720                                                         ip, dev)
3721       if result.fail_msg:
3722         msg = "Could not change the master IP netmask: %s" % result.fail_msg
3723         self.LogWarning(msg)
3724         feedback_fn(msg)
3725       else:
3726         self.cluster.master_netmask = self.op.master_netmask
3727
3728     self.cfg.Update(self.cluster, feedback_fn)
3729
3730     if self.op.master_netdev:
3731       (master, ip, dev, netmask, family) = self.cfg.GetMasterNetworkParameters()
3732       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3733                   self.op.master_netdev)
3734       result = self.rpc.call_node_activate_master_ip(master, ip, netmask, dev,
3735                                                      family)
3736       if result.fail_msg:
3737         self.LogWarning("Could not re-enable the master ip on"
3738                         " the master, please restart manually: %s",
3739                         result.fail_msg)
3740
3741
3742 def _UploadHelper(lu, nodes, fname):
3743   """Helper for uploading a file and showing warnings.
3744
3745   """
3746   if os.path.exists(fname):
3747     result = lu.rpc.call_upload_file(nodes, fname)
3748     for to_node, to_result in result.items():
3749       msg = to_result.fail_msg
3750       if msg:
3751         msg = ("Copy of file %s to node %s failed: %s" %
3752                (fname, to_node, msg))
3753         lu.proc.LogWarning(msg)
3754
3755
3756 def _ComputeAncillaryFiles(cluster, redist):
3757   """Compute files external to Ganeti which need to be consistent.
3758
3759   @type redist: boolean
3760   @param redist: Whether to include files which need to be redistributed
3761
3762   """
3763   # Compute files for all nodes
3764   files_all = set([
3765     constants.SSH_KNOWN_HOSTS_FILE,
3766     constants.CONFD_HMAC_KEY,
3767     constants.CLUSTER_DOMAIN_SECRET_FILE,
3768     constants.SPICE_CERT_FILE,
3769     constants.SPICE_CACERT_FILE,
3770     constants.RAPI_USERS_FILE,
3771     ])
3772
3773   if not redist:
3774     files_all.update(constants.ALL_CERT_FILES)
3775     files_all.update(ssconf.SimpleStore().GetFileList())
3776   else:
3777     # we need to ship at least the RAPI certificate
3778     files_all.add(constants.RAPI_CERT_FILE)
3779
3780   if cluster.modify_etc_hosts:
3781     files_all.add(constants.ETC_HOSTS)
3782
3783   # Files which are optional, these must:
3784   # - be present in one other category as well
3785   # - either exist or not exist on all nodes of that category (mc, vm all)
3786   files_opt = set([
3787     constants.RAPI_USERS_FILE,
3788     ])
3789
3790   # Files which should only be on master candidates
3791   files_mc = set()
3792   if not redist:
3793     files_mc.add(constants.CLUSTER_CONF_FILE)
3794
3795   # Files which should only be on VM-capable nodes
3796   files_vm = set(filename
3797     for hv_name in cluster.enabled_hypervisors
3798     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3799
3800   files_opt |= set(filename
3801     for hv_name in cluster.enabled_hypervisors
3802     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3803
3804   # Filenames in each category must be unique
3805   all_files_set = files_all | files_mc | files_vm
3806   assert (len(all_files_set) ==
3807           sum(map(len, [files_all, files_mc, files_vm]))), \
3808          "Found file listed in more than one file list"
3809
3810   # Optional files must be present in one other category
3811   assert all_files_set.issuperset(files_opt), \
3812          "Optional file not in a different required list"
3813
3814   return (files_all, files_opt, files_mc, files_vm)
3815
3816
3817 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3818   """Distribute additional files which are part of the cluster configuration.
3819
3820   ConfigWriter takes care of distributing the config and ssconf files, but
3821   there are more files which should be distributed to all nodes. This function
3822   makes sure those are copied.
3823
3824   @param lu: calling logical unit
3825   @param additional_nodes: list of nodes not in the config to distribute to
3826   @type additional_vm: boolean
3827   @param additional_vm: whether the additional nodes are vm-capable or not
3828
3829   """
3830   # Gather target nodes
3831   cluster = lu.cfg.GetClusterInfo()
3832   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3833
3834   online_nodes = lu.cfg.GetOnlineNodeList()
3835   vm_nodes = lu.cfg.GetVmCapableNodeList()
3836
3837   if additional_nodes is not None:
3838     online_nodes.extend(additional_nodes)
3839     if additional_vm:
3840       vm_nodes.extend(additional_nodes)
3841
3842   # Never distribute to master node
3843   for nodelist in [online_nodes, vm_nodes]:
3844     if master_info.name in nodelist:
3845       nodelist.remove(master_info.name)
3846
3847   # Gather file lists
3848   (files_all, _, files_mc, files_vm) = \
3849     _ComputeAncillaryFiles(cluster, True)
3850
3851   # Never re-distribute configuration file from here
3852   assert not (constants.CLUSTER_CONF_FILE in files_all or
3853               constants.CLUSTER_CONF_FILE in files_vm)
3854   assert not files_mc, "Master candidates not handled in this function"
3855
3856   filemap = [
3857     (online_nodes, files_all),
3858     (vm_nodes, files_vm),
3859     ]
3860
3861   # Upload the files
3862   for (node_list, files) in filemap:
3863     for fname in files:
3864       _UploadHelper(lu, node_list, fname)
3865
3866
3867 class LUClusterRedistConf(NoHooksLU):
3868   """Force the redistribution of cluster configuration.
3869
3870   This is a very simple LU.
3871
3872   """
3873   REQ_BGL = False
3874
3875   def ExpandNames(self):
3876     self.needed_locks = {
3877       locking.LEVEL_NODE: locking.ALL_SET,
3878     }
3879     self.share_locks[locking.LEVEL_NODE] = 1
3880
3881   def Exec(self, feedback_fn):
3882     """Redistribute the configuration.
3883
3884     """
3885     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3886     _RedistributeAncillaryFiles(self)
3887
3888
3889 class LUClusterActivateMasterIp(NoHooksLU):
3890   """Activate the master IP on the master node.
3891
3892   """
3893   def Exec(self, feedback_fn):
3894     """Activate the master IP.
3895
3896     """
3897     (master, ip, dev, netmask, family) = self.cfg.GetMasterNetworkParameters()
3898     self.rpc.call_node_activate_master_ip(master, ip, netmask, dev, family)
3899
3900
3901 class LUClusterDeactivateMasterIp(NoHooksLU):
3902   """Deactivate the master IP on the master node.
3903
3904   """
3905   def Exec(self, feedback_fn):
3906     """Deactivate the master IP.
3907
3908     """
3909     (master, ip, dev, netmask, family) = self.cfg.GetMasterNetworkParameters()
3910     self.rpc.call_node_deactivate_master_ip(master, ip, netmask, dev, family)
3911
3912
3913 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3914   """Sleep and poll for an instance's disk to sync.
3915
3916   """
3917   if not instance.disks or disks is not None and not disks:
3918     return True
3919
3920   disks = _ExpandCheckDisks(instance, disks)
3921
3922   if not oneshot:
3923     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3924
3925   node = instance.primary_node
3926
3927   for dev in disks:
3928     lu.cfg.SetDiskID(dev, node)
3929
3930   # TODO: Convert to utils.Retry
3931
3932   retries = 0
3933   degr_retries = 10 # in seconds, as we sleep 1 second each time
3934   while True:
3935     max_time = 0
3936     done = True
3937     cumul_degraded = False
3938     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3939     msg = rstats.fail_msg
3940     if msg:
3941       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3942       retries += 1
3943       if retries >= 10:
3944         raise errors.RemoteError("Can't contact node %s for mirror data,"
3945                                  " aborting." % node)
3946       time.sleep(6)
3947       continue
3948     rstats = rstats.payload
3949     retries = 0
3950     for i, mstat in enumerate(rstats):
3951       if mstat is None:
3952         lu.LogWarning("Can't compute data for node %s/%s",
3953                            node, disks[i].iv_name)
3954         continue
3955
3956       cumul_degraded = (cumul_degraded or
3957                         (mstat.is_degraded and mstat.sync_percent is None))
3958       if mstat.sync_percent is not None:
3959         done = False
3960         if mstat.estimated_time is not None:
3961           rem_time = ("%s remaining (estimated)" %
3962                       utils.FormatSeconds(mstat.estimated_time))
3963           max_time = mstat.estimated_time
3964         else:
3965           rem_time = "no time estimate"
3966         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3967                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3968
3969     # if we're done but degraded, let's do a few small retries, to
3970     # make sure we see a stable and not transient situation; therefore
3971     # we force restart of the loop
3972     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3973       logging.info("Degraded disks found, %d retries left", degr_retries)
3974       degr_retries -= 1
3975       time.sleep(1)
3976       continue
3977
3978     if done or oneshot:
3979       break
3980
3981     time.sleep(min(60, max_time))
3982
3983   if done:
3984     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3985   return not cumul_degraded
3986
3987
3988 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3989   """Check that mirrors are not degraded.
3990
3991   The ldisk parameter, if True, will change the test from the
3992   is_degraded attribute (which represents overall non-ok status for
3993   the device(s)) to the ldisk (representing the local storage status).
3994
3995   """
3996   lu.cfg.SetDiskID(dev, node)
3997
3998   result = True
3999
4000   if on_primary or dev.AssembleOnSecondary():
4001     rstats = lu.rpc.call_blockdev_find(node, dev)
4002     msg = rstats.fail_msg
4003     if msg:
4004       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4005       result = False
4006     elif not rstats.payload:
4007       lu.LogWarning("Can't find disk on node %s", node)
4008       result = False
4009     else:
4010       if ldisk:
4011         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4012       else:
4013         result = result and not rstats.payload.is_degraded
4014
4015   if dev.children:
4016     for child in dev.children:
4017       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4018
4019   return result
4020
4021
4022 class LUOobCommand(NoHooksLU):
4023   """Logical unit for OOB handling.
4024
4025   """
4026   REG_BGL = False
4027   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4028
4029   def ExpandNames(self):
4030     """Gather locks we need.
4031
4032     """
4033     if self.op.node_names:
4034       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4035       lock_names = self.op.node_names
4036     else:
4037       lock_names = locking.ALL_SET
4038
4039     self.needed_locks = {
4040       locking.LEVEL_NODE: lock_names,
4041       }
4042
4043   def CheckPrereq(self):
4044     """Check prerequisites.
4045
4046     This checks:
4047      - the node exists in the configuration
4048      - OOB is supported
4049
4050     Any errors are signaled by raising errors.OpPrereqError.
4051
4052     """
4053     self.nodes = []
4054     self.master_node = self.cfg.GetMasterNode()
4055
4056     assert self.op.power_delay >= 0.0
4057
4058     if self.op.node_names:
4059       if (self.op.command in self._SKIP_MASTER and
4060           self.master_node in self.op.node_names):
4061         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4062         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4063
4064         if master_oob_handler:
4065           additional_text = ("run '%s %s %s' if you want to operate on the"
4066                              " master regardless") % (master_oob_handler,
4067                                                       self.op.command,
4068                                                       self.master_node)
4069         else:
4070           additional_text = "it does not support out-of-band operations"
4071
4072         raise errors.OpPrereqError(("Operating on the master node %s is not"
4073                                     " allowed for %s; %s") %
4074                                    (self.master_node, self.op.command,
4075                                     additional_text), errors.ECODE_INVAL)
4076     else:
4077       self.op.node_names = self.cfg.GetNodeList()
4078       if self.op.command in self._SKIP_MASTER:
4079         self.op.node_names.remove(self.master_node)
4080
4081     if self.op.command in self._SKIP_MASTER:
4082       assert self.master_node not in self.op.node_names
4083
4084     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4085       if node is None:
4086         raise errors.OpPrereqError("Node %s not found" % node_name,
4087                                    errors.ECODE_NOENT)
4088       else:
4089         self.nodes.append(node)
4090
4091       if (not self.op.ignore_status and
4092           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4093         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4094                                     " not marked offline") % node_name,
4095                                    errors.ECODE_STATE)
4096
4097   def Exec(self, feedback_fn):
4098     """Execute OOB and return result if we expect any.
4099
4100     """
4101     master_node = self.master_node
4102     ret = []
4103
4104     for idx, node in enumerate(utils.NiceSort(self.nodes,
4105                                               key=lambda node: node.name)):
4106       node_entry = [(constants.RS_NORMAL, node.name)]
4107       ret.append(node_entry)
4108
4109       oob_program = _SupportsOob(self.cfg, node)
4110
4111       if not oob_program:
4112         node_entry.append((constants.RS_UNAVAIL, None))
4113         continue
4114
4115       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4116                    self.op.command, oob_program, node.name)
4117       result = self.rpc.call_run_oob(master_node, oob_program,
4118                                      self.op.command, node.name,
4119                                      self.op.timeout)
4120
4121       if result.fail_msg:
4122         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4123                         node.name, result.fail_msg)
4124         node_entry.append((constants.RS_NODATA, None))
4125       else:
4126         try:
4127           self._CheckPayload(result)
4128         except errors.OpExecError, err:
4129           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4130                           node.name, err)
4131           node_entry.append((constants.RS_NODATA, None))
4132         else:
4133           if self.op.command == constants.OOB_HEALTH:
4134             # For health we should log important events
4135             for item, status in result.payload:
4136               if status in [constants.OOB_STATUS_WARNING,
4137                             constants.OOB_STATUS_CRITICAL]:
4138                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4139                                 item, node.name, status)
4140
4141           if self.op.command == constants.OOB_POWER_ON:
4142             node.powered = True
4143           elif self.op.command == constants.OOB_POWER_OFF:
4144             node.powered = False
4145           elif self.op.command == constants.OOB_POWER_STATUS:
4146             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4147             if powered != node.powered:
4148               logging.warning(("Recorded power state (%s) of node '%s' does not"
4149                                " match actual power state (%s)"), node.powered,
4150                               node.name, powered)
4151
4152           # For configuration changing commands we should update the node
4153           if self.op.command in (constants.OOB_POWER_ON,
4154                                  constants.OOB_POWER_OFF):
4155             self.cfg.Update(node, feedback_fn)
4156
4157           node_entry.append((constants.RS_NORMAL, result.payload))
4158
4159           if (self.op.command == constants.OOB_POWER_ON and
4160               idx < len(self.nodes) - 1):
4161             time.sleep(self.op.power_delay)
4162
4163     return ret
4164
4165   def _CheckPayload(self, result):
4166     """Checks if the payload is valid.
4167
4168     @param result: RPC result
4169     @raises errors.OpExecError: If payload is not valid
4170
4171     """
4172     errs = []
4173     if self.op.command == constants.OOB_HEALTH:
4174       if not isinstance(result.payload, list):
4175         errs.append("command 'health' is expected to return a list but got %s" %
4176                     type(result.payload))
4177       else:
4178         for item, status in result.payload:
4179           if status not in constants.OOB_STATUSES:
4180             errs.append("health item '%s' has invalid status '%s'" %
4181                         (item, status))
4182
4183     if self.op.command == constants.OOB_POWER_STATUS:
4184       if not isinstance(result.payload, dict):
4185         errs.append("power-status is expected to return a dict but got %s" %
4186                     type(result.payload))
4187
4188     if self.op.command in [
4189         constants.OOB_POWER_ON,
4190         constants.OOB_POWER_OFF,
4191         constants.OOB_POWER_CYCLE,
4192         ]:
4193       if result.payload is not None:
4194         errs.append("%s is expected to not return payload but got '%s'" %
4195                     (self.op.command, result.payload))
4196
4197     if errs:
4198       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4199                                utils.CommaJoin(errs))
4200
4201
4202 class _OsQuery(_QueryBase):
4203   FIELDS = query.OS_FIELDS
4204
4205   def ExpandNames(self, lu):
4206     # Lock all nodes in shared mode
4207     # Temporary removal of locks, should be reverted later
4208     # TODO: reintroduce locks when they are lighter-weight
4209     lu.needed_locks = {}
4210     #self.share_locks[locking.LEVEL_NODE] = 1
4211     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4212
4213     # The following variables interact with _QueryBase._GetNames
4214     if self.names:
4215       self.wanted = self.names
4216     else:
4217       self.wanted = locking.ALL_SET
4218
4219     self.do_locking = self.use_locking
4220
4221   def DeclareLocks(self, lu, level):
4222     pass
4223
4224   @staticmethod
4225   def _DiagnoseByOS(rlist):
4226     """Remaps a per-node return list into an a per-os per-node dictionary
4227
4228     @param rlist: a map with node names as keys and OS objects as values
4229
4230     @rtype: dict
4231     @return: a dictionary with osnames as keys and as value another
4232         map, with nodes as keys and tuples of (path, status, diagnose,
4233         variants, parameters, api_versions) as values, eg::
4234
4235           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4236                                      (/srv/..., False, "invalid api")],
4237                            "node2": [(/srv/..., True, "", [], [])]}
4238           }
4239
4240     """
4241     all_os = {}
4242     # we build here the list of nodes that didn't fail the RPC (at RPC
4243     # level), so that nodes with a non-responding node daemon don't
4244     # make all OSes invalid
4245     good_nodes = [node_name for node_name in rlist
4246                   if not rlist[node_name].fail_msg]
4247     for node_name, nr in rlist.items():
4248       if nr.fail_msg or not nr.payload:
4249         continue
4250       for (name, path, status, diagnose, variants,
4251            params, api_versions) in nr.payload:
4252         if name not in all_os:
4253           # build a list of nodes for this os containing empty lists
4254           # for each node in node_list
4255           all_os[name] = {}
4256           for nname in good_nodes:
4257             all_os[name][nname] = []
4258         # convert params from [name, help] to (name, help)
4259         params = [tuple(v) for v in params]
4260         all_os[name][node_name].append((path, status, diagnose,
4261                                         variants, params, api_versions))
4262     return all_os
4263
4264   def _GetQueryData(self, lu):
4265     """Computes the list of nodes and their attributes.
4266
4267     """
4268     # Locking is not used
4269     assert not (compat.any(lu.glm.is_owned(level)
4270                            for level in locking.LEVELS
4271                            if level != locking.LEVEL_CLUSTER) or
4272                 self.do_locking or self.use_locking)
4273
4274     valid_nodes = [node.name
4275                    for node in lu.cfg.GetAllNodesInfo().values()
4276                    if not node.offline and node.vm_capable]
4277     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4278     cluster = lu.cfg.GetClusterInfo()
4279
4280     data = {}
4281
4282     for (os_name, os_data) in pol.items():
4283       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4284                           hidden=(os_name in cluster.hidden_os),
4285                           blacklisted=(os_name in cluster.blacklisted_os))
4286
4287       variants = set()
4288       parameters = set()
4289       api_versions = set()
4290
4291       for idx, osl in enumerate(os_data.values()):
4292         info.valid = bool(info.valid and osl and osl[0][1])
4293         if not info.valid:
4294           break
4295
4296         (node_variants, node_params, node_api) = osl[0][3:6]
4297         if idx == 0:
4298           # First entry
4299           variants.update(node_variants)
4300           parameters.update(node_params)
4301           api_versions.update(node_api)
4302         else:
4303           # Filter out inconsistent values
4304           variants.intersection_update(node_variants)
4305           parameters.intersection_update(node_params)
4306           api_versions.intersection_update(node_api)
4307
4308       info.variants = list(variants)
4309       info.parameters = list(parameters)
4310       info.api_versions = list(api_versions)
4311
4312       data[os_name] = info
4313
4314     # Prepare data in requested order
4315     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4316             if name in data]
4317
4318
4319 class LUOsDiagnose(NoHooksLU):
4320   """Logical unit for OS diagnose/query.
4321
4322   """
4323   REQ_BGL = False
4324
4325   @staticmethod
4326   def _BuildFilter(fields, names):
4327     """Builds a filter for querying OSes.
4328
4329     """
4330     name_filter = qlang.MakeSimpleFilter("name", names)
4331
4332     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4333     # respective field is not requested
4334     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4335                      for fname in ["hidden", "blacklisted"]
4336                      if fname not in fields]
4337     if "valid" not in fields:
4338       status_filter.append([qlang.OP_TRUE, "valid"])
4339
4340     if status_filter:
4341       status_filter.insert(0, qlang.OP_AND)
4342     else:
4343       status_filter = None
4344
4345     if name_filter and status_filter:
4346       return [qlang.OP_AND, name_filter, status_filter]
4347     elif name_filter:
4348       return name_filter
4349     else:
4350       return status_filter
4351
4352   def CheckArguments(self):
4353     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4354                        self.op.output_fields, False)
4355
4356   def ExpandNames(self):
4357     self.oq.ExpandNames(self)
4358
4359   def Exec(self, feedback_fn):
4360     return self.oq.OldStyleQuery(self)
4361
4362
4363 class LUNodeRemove(LogicalUnit):
4364   """Logical unit for removing a node.
4365
4366   """
4367   HPATH = "node-remove"
4368   HTYPE = constants.HTYPE_NODE
4369
4370   def BuildHooksEnv(self):
4371     """Build hooks env.
4372
4373     This doesn't run on the target node in the pre phase as a failed
4374     node would then be impossible to remove.
4375
4376     """
4377     return {
4378       "OP_TARGET": self.op.node_name,
4379       "NODE_NAME": self.op.node_name,
4380       }
4381
4382   def BuildHooksNodes(self):
4383     """Build hooks nodes.
4384
4385     """
4386     all_nodes = self.cfg.GetNodeList()
4387     try:
4388       all_nodes.remove(self.op.node_name)
4389     except ValueError:
4390       logging.warning("Node '%s', which is about to be removed, was not found"
4391                       " in the list of all nodes", self.op.node_name)
4392     return (all_nodes, all_nodes)
4393
4394   def CheckPrereq(self):
4395     """Check prerequisites.
4396
4397     This checks:
4398      - the node exists in the configuration
4399      - it does not have primary or secondary instances
4400      - it's not the master
4401
4402     Any errors are signaled by raising errors.OpPrereqError.
4403
4404     """
4405     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4406     node = self.cfg.GetNodeInfo(self.op.node_name)
4407     assert node is not None
4408
4409     masternode = self.cfg.GetMasterNode()
4410     if node.name == masternode:
4411       raise errors.OpPrereqError("Node is the master node, failover to another"
4412                                  " node is required", errors.ECODE_INVAL)
4413
4414     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4415       if node.name in instance.all_nodes:
4416         raise errors.OpPrereqError("Instance %s is still running on the node,"
4417                                    " please remove first" % instance_name,
4418                                    errors.ECODE_INVAL)
4419     self.op.node_name = node.name
4420     self.node = node
4421
4422   def Exec(self, feedback_fn):
4423     """Removes the node from the cluster.
4424
4425     """
4426     node = self.node
4427     logging.info("Stopping the node daemon and removing configs from node %s",
4428                  node.name)
4429
4430     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4431
4432     # Promote nodes to master candidate as needed
4433     _AdjustCandidatePool(self, exceptions=[node.name])
4434     self.context.RemoveNode(node.name)
4435
4436     # Run post hooks on the node before it's removed
4437     _RunPostHook(self, node.name)
4438
4439     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4440     msg = result.fail_msg
4441     if msg:
4442       self.LogWarning("Errors encountered on the remote node while leaving"
4443                       " the cluster: %s", msg)
4444
4445     # Remove node from our /etc/hosts
4446     if self.cfg.GetClusterInfo().modify_etc_hosts:
4447       master_node = self.cfg.GetMasterNode()
4448       result = self.rpc.call_etc_hosts_modify(master_node,
4449                                               constants.ETC_HOSTS_REMOVE,
4450                                               node.name, None)
4451       result.Raise("Can't update hosts file with new host data")
4452       _RedistributeAncillaryFiles(self)
4453
4454
4455 class _NodeQuery(_QueryBase):
4456   FIELDS = query.NODE_FIELDS
4457
4458   def ExpandNames(self, lu):
4459     lu.needed_locks = {}
4460     lu.share_locks = _ShareAll()
4461
4462     if self.names:
4463       self.wanted = _GetWantedNodes(lu, self.names)
4464     else:
4465       self.wanted = locking.ALL_SET
4466
4467     self.do_locking = (self.use_locking and
4468                        query.NQ_LIVE in self.requested_data)
4469
4470     if self.do_locking:
4471       # If any non-static field is requested we need to lock the nodes
4472       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4473
4474   def DeclareLocks(self, lu, level):
4475     pass
4476
4477   def _GetQueryData(self, lu):
4478     """Computes the list of nodes and their attributes.
4479
4480     """
4481     all_info = lu.cfg.GetAllNodesInfo()
4482
4483     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4484
4485     # Gather data as requested
4486     if query.NQ_LIVE in self.requested_data:
4487       # filter out non-vm_capable nodes
4488       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4489
4490       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4491                                         lu.cfg.GetHypervisorType())
4492       live_data = dict((name, nresult.payload)
4493                        for (name, nresult) in node_data.items()
4494                        if not nresult.fail_msg and nresult.payload)
4495     else:
4496       live_data = None
4497
4498     if query.NQ_INST in self.requested_data:
4499       node_to_primary = dict([(name, set()) for name in nodenames])
4500       node_to_secondary = dict([(name, set()) for name in nodenames])
4501
4502       inst_data = lu.cfg.GetAllInstancesInfo()
4503
4504       for inst in inst_data.values():
4505         if inst.primary_node in node_to_primary:
4506           node_to_primary[inst.primary_node].add(inst.name)
4507         for secnode in inst.secondary_nodes:
4508           if secnode in node_to_secondary:
4509             node_to_secondary[secnode].add(inst.name)
4510     else:
4511       node_to_primary = None
4512       node_to_secondary = None
4513
4514     if query.NQ_OOB in self.requested_data:
4515       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4516                          for name, node in all_info.iteritems())
4517     else:
4518       oob_support = None
4519
4520     if query.NQ_GROUP in self.requested_data:
4521       groups = lu.cfg.GetAllNodeGroupsInfo()
4522     else:
4523       groups = {}
4524
4525     return query.NodeQueryData([all_info[name] for name in nodenames],
4526                                live_data, lu.cfg.GetMasterNode(),
4527                                node_to_primary, node_to_secondary, groups,
4528                                oob_support, lu.cfg.GetClusterInfo())
4529
4530
4531 class LUNodeQuery(NoHooksLU):
4532   """Logical unit for querying nodes.
4533
4534   """
4535   # pylint: disable=W0142
4536   REQ_BGL = False
4537
4538   def CheckArguments(self):
4539     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4540                          self.op.output_fields, self.op.use_locking)
4541
4542   def ExpandNames(self):
4543     self.nq.ExpandNames(self)
4544
4545   def Exec(self, feedback_fn):
4546     return self.nq.OldStyleQuery(self)
4547
4548
4549 class LUNodeQueryvols(NoHooksLU):
4550   """Logical unit for getting volumes on node(s).
4551
4552   """
4553   REQ_BGL = False
4554   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4555   _FIELDS_STATIC = utils.FieldSet("node")
4556
4557   def CheckArguments(self):
4558     _CheckOutputFields(static=self._FIELDS_STATIC,
4559                        dynamic=self._FIELDS_DYNAMIC,
4560                        selected=self.op.output_fields)
4561
4562   def ExpandNames(self):
4563     self.needed_locks = {}
4564     self.share_locks[locking.LEVEL_NODE] = 1
4565     if not self.op.nodes:
4566       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4567     else:
4568       self.needed_locks[locking.LEVEL_NODE] = \
4569         _GetWantedNodes(self, self.op.nodes)
4570
4571   def Exec(self, feedback_fn):
4572     """Computes the list of nodes and their attributes.
4573
4574     """
4575     nodenames = self.owned_locks(locking.LEVEL_NODE)
4576     volumes = self.rpc.call_node_volumes(nodenames)
4577
4578     ilist = self.cfg.GetAllInstancesInfo()
4579     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4580
4581     output = []
4582     for node in nodenames:
4583       nresult = volumes[node]
4584       if nresult.offline:
4585         continue
4586       msg = nresult.fail_msg
4587       if msg:
4588         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4589         continue
4590
4591       node_vols = sorted(nresult.payload,
4592                          key=operator.itemgetter("dev"))
4593
4594       for vol in node_vols:
4595         node_output = []
4596         for field in self.op.output_fields:
4597           if field == "node":
4598             val = node
4599           elif field == "phys":
4600             val = vol["dev"]
4601           elif field == "vg":
4602             val = vol["vg"]
4603           elif field == "name":
4604             val = vol["name"]
4605           elif field == "size":
4606             val = int(float(vol["size"]))
4607           elif field == "instance":
4608             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4609           else:
4610             raise errors.ParameterError(field)
4611           node_output.append(str(val))
4612
4613         output.append(node_output)
4614
4615     return output
4616
4617
4618 class LUNodeQueryStorage(NoHooksLU):
4619   """Logical unit for getting information on storage units on node(s).
4620
4621   """
4622   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4623   REQ_BGL = False
4624
4625   def CheckArguments(self):
4626     _CheckOutputFields(static=self._FIELDS_STATIC,
4627                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4628                        selected=self.op.output_fields)
4629
4630   def ExpandNames(self):
4631     self.needed_locks = {}
4632     self.share_locks[locking.LEVEL_NODE] = 1
4633
4634     if self.op.nodes:
4635       self.needed_locks[locking.LEVEL_NODE] = \
4636         _GetWantedNodes(self, self.op.nodes)
4637     else:
4638       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4639
4640   def Exec(self, feedback_fn):
4641     """Computes the list of nodes and their attributes.
4642
4643     """
4644     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4645
4646     # Always get name to sort by
4647     if constants.SF_NAME in self.op.output_fields:
4648       fields = self.op.output_fields[:]
4649     else:
4650       fields = [constants.SF_NAME] + self.op.output_fields
4651
4652     # Never ask for node or type as it's only known to the LU
4653     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4654       while extra in fields:
4655         fields.remove(extra)
4656
4657     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4658     name_idx = field_idx[constants.SF_NAME]
4659
4660     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4661     data = self.rpc.call_storage_list(self.nodes,
4662                                       self.op.storage_type, st_args,
4663                                       self.op.name, fields)
4664
4665     result = []
4666
4667     for node in utils.NiceSort(self.nodes):
4668       nresult = data[node]
4669       if nresult.offline:
4670         continue
4671
4672       msg = nresult.fail_msg
4673       if msg:
4674         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4675         continue
4676
4677       rows = dict([(row[name_idx], row) for row in nresult.payload])
4678
4679       for name in utils.NiceSort(rows.keys()):
4680         row = rows[name]
4681
4682         out = []
4683
4684         for field in self.op.output_fields:
4685           if field == constants.SF_NODE:
4686             val = node
4687           elif field == constants.SF_TYPE:
4688             val = self.op.storage_type
4689           elif field in field_idx:
4690             val = row[field_idx[field]]
4691           else:
4692             raise errors.ParameterError(field)
4693
4694           out.append(val)
4695
4696         result.append(out)
4697
4698     return result
4699
4700
4701 class _InstanceQuery(_QueryBase):
4702   FIELDS = query.INSTANCE_FIELDS
4703
4704   def ExpandNames(self, lu):
4705     lu.needed_locks = {}
4706     lu.share_locks = _ShareAll()
4707
4708     if self.names:
4709       self.wanted = _GetWantedInstances(lu, self.names)
4710     else:
4711       self.wanted = locking.ALL_SET
4712
4713     self.do_locking = (self.use_locking and
4714                        query.IQ_LIVE in self.requested_data)
4715     if self.do_locking:
4716       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4717       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4718       lu.needed_locks[locking.LEVEL_NODE] = []
4719       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4720
4721     self.do_grouplocks = (self.do_locking and
4722                           query.IQ_NODES in self.requested_data)
4723
4724   def DeclareLocks(self, lu, level):
4725     if self.do_locking:
4726       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4727         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4728
4729         # Lock all groups used by instances optimistically; this requires going
4730         # via the node before it's locked, requiring verification later on
4731         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4732           set(group_uuid
4733               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4734               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4735       elif level == locking.LEVEL_NODE:
4736         lu._LockInstancesNodes() # pylint: disable=W0212
4737
4738   @staticmethod
4739   def _CheckGroupLocks(lu):
4740     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4741     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4742
4743     # Check if node groups for locked instances are still correct
4744     for instance_name in owned_instances:
4745       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4746
4747   def _GetQueryData(self, lu):
4748     """Computes the list of instances and their attributes.
4749
4750     """
4751     if self.do_grouplocks:
4752       self._CheckGroupLocks(lu)
4753
4754     cluster = lu.cfg.GetClusterInfo()
4755     all_info = lu.cfg.GetAllInstancesInfo()
4756
4757     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4758
4759     instance_list = [all_info[name] for name in instance_names]
4760     nodes = frozenset(itertools.chain(*(inst.all_nodes
4761                                         for inst in instance_list)))
4762     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4763     bad_nodes = []
4764     offline_nodes = []
4765     wrongnode_inst = set()
4766
4767     # Gather data as requested
4768     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4769       live_data = {}
4770       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4771       for name in nodes:
4772         result = node_data[name]
4773         if result.offline:
4774           # offline nodes will be in both lists
4775           assert result.fail_msg
4776           offline_nodes.append(name)
4777         if result.fail_msg:
4778           bad_nodes.append(name)
4779         elif result.payload:
4780           for inst in result.payload:
4781             if inst in all_info:
4782               if all_info[inst].primary_node == name:
4783                 live_data.update(result.payload)
4784               else:
4785                 wrongnode_inst.add(inst)
4786             else:
4787               # orphan instance; we don't list it here as we don't
4788               # handle this case yet in the output of instance listing
4789               logging.warning("Orphan instance '%s' found on node %s",
4790                               inst, name)
4791         # else no instance is alive
4792     else:
4793       live_data = {}
4794
4795     if query.IQ_DISKUSAGE in self.requested_data:
4796       disk_usage = dict((inst.name,
4797                          _ComputeDiskSize(inst.disk_template,
4798                                           [{constants.IDISK_SIZE: disk.size}
4799                                            for disk in inst.disks]))
4800                         for inst in instance_list)
4801     else:
4802       disk_usage = None
4803
4804     if query.IQ_CONSOLE in self.requested_data:
4805       consinfo = {}
4806       for inst in instance_list:
4807         if inst.name in live_data:
4808           # Instance is running
4809           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4810         else:
4811           consinfo[inst.name] = None
4812       assert set(consinfo.keys()) == set(instance_names)
4813     else:
4814       consinfo = None
4815
4816     if query.IQ_NODES in self.requested_data:
4817       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4818                                             instance_list)))
4819       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4820       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4821                     for uuid in set(map(operator.attrgetter("group"),
4822                                         nodes.values())))
4823     else:
4824       nodes = None
4825       groups = None
4826
4827     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4828                                    disk_usage, offline_nodes, bad_nodes,
4829                                    live_data, wrongnode_inst, consinfo,
4830                                    nodes, groups)
4831
4832
4833 class LUQuery(NoHooksLU):
4834   """Query for resources/items of a certain kind.
4835
4836   """
4837   # pylint: disable=W0142
4838   REQ_BGL = False
4839
4840   def CheckArguments(self):
4841     qcls = _GetQueryImplementation(self.op.what)
4842
4843     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4844
4845   def ExpandNames(self):
4846     self.impl.ExpandNames(self)
4847
4848   def DeclareLocks(self, level):
4849     self.impl.DeclareLocks(self, level)
4850
4851   def Exec(self, feedback_fn):
4852     return self.impl.NewStyleQuery(self)
4853
4854
4855 class LUQueryFields(NoHooksLU):
4856   """Query for resources/items of a certain kind.
4857
4858   """
4859   # pylint: disable=W0142
4860   REQ_BGL = False
4861
4862   def CheckArguments(self):
4863     self.qcls = _GetQueryImplementation(self.op.what)
4864
4865   def ExpandNames(self):
4866     self.needed_locks = {}
4867
4868   def Exec(self, feedback_fn):
4869     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4870
4871
4872 class LUNodeModifyStorage(NoHooksLU):
4873   """Logical unit for modifying a storage volume on a node.
4874
4875   """
4876   REQ_BGL = False
4877
4878   def CheckArguments(self):
4879     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4880
4881     storage_type = self.op.storage_type
4882
4883     try:
4884       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4885     except KeyError:
4886       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4887                                  " modified" % storage_type,
4888                                  errors.ECODE_INVAL)
4889
4890     diff = set(self.op.changes.keys()) - modifiable
4891     if diff:
4892       raise errors.OpPrereqError("The following fields can not be modified for"
4893                                  " storage units of type '%s': %r" %
4894                                  (storage_type, list(diff)),
4895                                  errors.ECODE_INVAL)
4896
4897   def ExpandNames(self):
4898     self.needed_locks = {
4899       locking.LEVEL_NODE: self.op.node_name,
4900       }
4901
4902   def Exec(self, feedback_fn):
4903     """Computes the list of nodes and their attributes.
4904
4905     """
4906     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4907     result = self.rpc.call_storage_modify(self.op.node_name,
4908                                           self.op.storage_type, st_args,
4909                                           self.op.name, self.op.changes)
4910     result.Raise("Failed to modify storage unit '%s' on %s" %
4911                  (self.op.name, self.op.node_name))
4912
4913
4914 class LUNodeAdd(LogicalUnit):
4915   """Logical unit for adding node to the cluster.
4916
4917   """
4918   HPATH = "node-add"
4919   HTYPE = constants.HTYPE_NODE
4920   _NFLAGS = ["master_capable", "vm_capable"]
4921
4922   def CheckArguments(self):
4923     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4924     # validate/normalize the node name
4925     self.hostname = netutils.GetHostname(name=self.op.node_name,
4926                                          family=self.primary_ip_family)
4927     self.op.node_name = self.hostname.name
4928
4929     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4930       raise errors.OpPrereqError("Cannot readd the master node",
4931                                  errors.ECODE_STATE)
4932
4933     if self.op.readd and self.op.group:
4934       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4935                                  " being readded", errors.ECODE_INVAL)
4936
4937   def BuildHooksEnv(self):
4938     """Build hooks env.
4939
4940     This will run on all nodes before, and on all nodes + the new node after.
4941
4942     """
4943     return {
4944       "OP_TARGET": self.op.node_name,
4945       "NODE_NAME": self.op.node_name,
4946       "NODE_PIP": self.op.primary_ip,
4947       "NODE_SIP": self.op.secondary_ip,
4948       "MASTER_CAPABLE": str(self.op.master_capable),
4949       "VM_CAPABLE": str(self.op.vm_capable),
4950       }
4951
4952   def BuildHooksNodes(self):
4953     """Build hooks nodes.
4954
4955     """
4956     # Exclude added node
4957     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4958     post_nodes = pre_nodes + [self.op.node_name, ]
4959
4960     return (pre_nodes, post_nodes)
4961
4962   def CheckPrereq(self):
4963     """Check prerequisites.
4964
4965     This checks:
4966      - the new node is not already in the config
4967      - it is resolvable
4968      - its parameters (single/dual homed) matches the cluster
4969
4970     Any errors are signaled by raising errors.OpPrereqError.
4971
4972     """
4973     cfg = self.cfg
4974     hostname = self.hostname
4975     node = hostname.name
4976     primary_ip = self.op.primary_ip = hostname.ip
4977     if self.op.secondary_ip is None:
4978       if self.primary_ip_family == netutils.IP6Address.family:
4979         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4980                                    " IPv4 address must be given as secondary",
4981                                    errors.ECODE_INVAL)
4982       self.op.secondary_ip = primary_ip
4983
4984     secondary_ip = self.op.secondary_ip
4985     if not netutils.IP4Address.IsValid(secondary_ip):
4986       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4987                                  " address" % secondary_ip, errors.ECODE_INVAL)
4988
4989     node_list = cfg.GetNodeList()
4990     if not self.op.readd and node in node_list:
4991       raise errors.OpPrereqError("Node %s is already in the configuration" %
4992                                  node, errors.ECODE_EXISTS)
4993     elif self.op.readd and node not in node_list:
4994       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4995                                  errors.ECODE_NOENT)
4996
4997     self.changed_primary_ip = False
4998
4999     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5000       if self.op.readd and node == existing_node_name:
5001         if existing_node.secondary_ip != secondary_ip:
5002           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5003                                      " address configuration as before",
5004                                      errors.ECODE_INVAL)
5005         if existing_node.primary_ip != primary_ip:
5006           self.changed_primary_ip = True
5007
5008         continue
5009
5010       if (existing_node.primary_ip == primary_ip or
5011           existing_node.secondary_ip == primary_ip or
5012           existing_node.primary_ip == secondary_ip or
5013           existing_node.secondary_ip == secondary_ip):
5014         raise errors.OpPrereqError("New node ip address(es) conflict with"
5015                                    " existing node %s" % existing_node.name,
5016                                    errors.ECODE_NOTUNIQUE)
5017
5018     # After this 'if' block, None is no longer a valid value for the
5019     # _capable op attributes
5020     if self.op.readd:
5021       old_node = self.cfg.GetNodeInfo(node)
5022       assert old_node is not None, "Can't retrieve locked node %s" % node
5023       for attr in self._NFLAGS:
5024         if getattr(self.op, attr) is None:
5025           setattr(self.op, attr, getattr(old_node, attr))
5026     else:
5027       for attr in self._NFLAGS:
5028         if getattr(self.op, attr) is None:
5029           setattr(self.op, attr, True)
5030
5031     if self.op.readd and not self.op.vm_capable:
5032       pri, sec = cfg.GetNodeInstances(node)
5033       if pri or sec:
5034         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5035                                    " flag set to false, but it already holds"
5036                                    " instances" % node,
5037                                    errors.ECODE_STATE)
5038
5039     # check that the type of the node (single versus dual homed) is the
5040     # same as for the master
5041     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5042     master_singlehomed = myself.secondary_ip == myself.primary_ip
5043     newbie_singlehomed = secondary_ip == primary_ip
5044     if master_singlehomed != newbie_singlehomed:
5045       if master_singlehomed:
5046         raise errors.OpPrereqError("The master has no secondary ip but the"
5047                                    " new node has one",
5048                                    errors.ECODE_INVAL)
5049       else:
5050         raise errors.OpPrereqError("The master has a secondary ip but the"
5051                                    " new node doesn't have one",
5052                                    errors.ECODE_INVAL)
5053
5054     # checks reachability
5055     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5056       raise errors.OpPrereqError("Node not reachable by ping",
5057                                  errors.ECODE_ENVIRON)
5058
5059     if not newbie_singlehomed:
5060       # check reachability from my secondary ip to newbie's secondary ip
5061       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5062                            source=myself.secondary_ip):
5063         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5064                                    " based ping to node daemon port",
5065                                    errors.ECODE_ENVIRON)
5066
5067     if self.op.readd:
5068       exceptions = [node]
5069     else:
5070       exceptions = []
5071
5072     if self.op.master_capable:
5073       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5074     else:
5075       self.master_candidate = False
5076
5077     if self.op.readd:
5078       self.new_node = old_node
5079     else:
5080       node_group = cfg.LookupNodeGroup(self.op.group)
5081       self.new_node = objects.Node(name=node,
5082                                    primary_ip=primary_ip,
5083                                    secondary_ip=secondary_ip,
5084                                    master_candidate=self.master_candidate,
5085                                    offline=False, drained=False,
5086                                    group=node_group)
5087
5088     if self.op.ndparams:
5089       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5090
5091   def Exec(self, feedback_fn):
5092     """Adds the new node to the cluster.
5093
5094     """
5095     new_node = self.new_node
5096     node = new_node.name
5097
5098     # We adding a new node so we assume it's powered
5099     new_node.powered = True
5100
5101     # for re-adds, reset the offline/drained/master-candidate flags;
5102     # we need to reset here, otherwise offline would prevent RPC calls
5103     # later in the procedure; this also means that if the re-add
5104     # fails, we are left with a non-offlined, broken node
5105     if self.op.readd:
5106       new_node.drained = new_node.offline = False # pylint: disable=W0201
5107       self.LogInfo("Readding a node, the offline/drained flags were reset")
5108       # if we demote the node, we do cleanup later in the procedure
5109       new_node.master_candidate = self.master_candidate
5110       if self.changed_primary_ip:
5111         new_node.primary_ip = self.op.primary_ip
5112
5113     # copy the master/vm_capable flags
5114     for attr in self._NFLAGS:
5115       setattr(new_node, attr, getattr(self.op, attr))
5116
5117     # notify the user about any possible mc promotion
5118     if new_node.master_candidate:
5119       self.LogInfo("Node will be a master candidate")
5120
5121     if self.op.ndparams:
5122       new_node.ndparams = self.op.ndparams
5123     else:
5124       new_node.ndparams = {}
5125
5126     # check connectivity
5127     result = self.rpc.call_version([node])[node]
5128     result.Raise("Can't get version information from node %s" % node)
5129     if constants.PROTOCOL_VERSION == result.payload:
5130       logging.info("Communication to node %s fine, sw version %s match",
5131                    node, result.payload)
5132     else:
5133       raise errors.OpExecError("Version mismatch master version %s,"
5134                                " node version %s" %
5135                                (constants.PROTOCOL_VERSION, result.payload))
5136
5137     # Add node to our /etc/hosts, and add key to known_hosts
5138     if self.cfg.GetClusterInfo().modify_etc_hosts:
5139       master_node = self.cfg.GetMasterNode()
5140       result = self.rpc.call_etc_hosts_modify(master_node,
5141                                               constants.ETC_HOSTS_ADD,
5142                                               self.hostname.name,
5143                                               self.hostname.ip)
5144       result.Raise("Can't update hosts file with new host data")
5145
5146     if new_node.secondary_ip != new_node.primary_ip:
5147       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5148                                False)
5149
5150     node_verify_list = [self.cfg.GetMasterNode()]
5151     node_verify_param = {
5152       constants.NV_NODELIST: ([node], {}),
5153       # TODO: do a node-net-test as well?
5154     }
5155
5156     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5157                                        self.cfg.GetClusterName())
5158     for verifier in node_verify_list:
5159       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5160       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5161       if nl_payload:
5162         for failed in nl_payload:
5163           feedback_fn("ssh/hostname verification failed"
5164                       " (checking from %s): %s" %
5165                       (verifier, nl_payload[failed]))
5166         raise errors.OpExecError("ssh/hostname verification failed")
5167
5168     if self.op.readd:
5169       _RedistributeAncillaryFiles(self)
5170       self.context.ReaddNode(new_node)
5171       # make sure we redistribute the config
5172       self.cfg.Update(new_node, feedback_fn)
5173       # and make sure the new node will not have old files around
5174       if not new_node.master_candidate:
5175         result = self.rpc.call_node_demote_from_mc(new_node.name)
5176         msg = result.fail_msg
5177         if msg:
5178           self.LogWarning("Node failed to demote itself from master"
5179                           " candidate status: %s" % msg)
5180     else:
5181       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5182                                   additional_vm=self.op.vm_capable)
5183       self.context.AddNode(new_node, self.proc.GetECId())
5184
5185
5186 class LUNodeSetParams(LogicalUnit):
5187   """Modifies the parameters of a node.
5188
5189   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5190       to the node role (as _ROLE_*)
5191   @cvar _R2F: a dictionary from node role to tuples of flags
5192   @cvar _FLAGS: a list of attribute names corresponding to the flags
5193
5194   """
5195   HPATH = "node-modify"
5196   HTYPE = constants.HTYPE_NODE
5197   REQ_BGL = False
5198   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5199   _F2R = {
5200     (True, False, False): _ROLE_CANDIDATE,
5201     (False, True, False): _ROLE_DRAINED,
5202     (False, False, True): _ROLE_OFFLINE,
5203     (False, False, False): _ROLE_REGULAR,
5204     }
5205   _R2F = dict((v, k) for k, v in _F2R.items())
5206   _FLAGS = ["master_candidate", "drained", "offline"]
5207
5208   def CheckArguments(self):
5209     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5210     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5211                 self.op.master_capable, self.op.vm_capable,
5212                 self.op.secondary_ip, self.op.ndparams]
5213     if all_mods.count(None) == len(all_mods):
5214       raise errors.OpPrereqError("Please pass at least one modification",
5215                                  errors.ECODE_INVAL)
5216     if all_mods.count(True) > 1:
5217       raise errors.OpPrereqError("Can't set the node into more than one"
5218                                  " state at the same time",
5219                                  errors.ECODE_INVAL)
5220
5221     # Boolean value that tells us whether we might be demoting from MC
5222     self.might_demote = (self.op.master_candidate == False or
5223                          self.op.offline == True or
5224                          self.op.drained == True or
5225                          self.op.master_capable == False)
5226
5227     if self.op.secondary_ip:
5228       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5229         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5230                                    " address" % self.op.secondary_ip,
5231                                    errors.ECODE_INVAL)
5232
5233     self.lock_all = self.op.auto_promote and self.might_demote
5234     self.lock_instances = self.op.secondary_ip is not None
5235
5236   def ExpandNames(self):
5237     if self.lock_all:
5238       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5239     else:
5240       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5241
5242     if self.lock_instances:
5243       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5244
5245   def DeclareLocks(self, level):
5246     # If we have locked all instances, before waiting to lock nodes, release
5247     # all the ones living on nodes unrelated to the current operation.
5248     if level == locking.LEVEL_NODE and self.lock_instances:
5249       self.affected_instances = []
5250       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5251         instances_keep = []
5252
5253         # Build list of instances to release
5254         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5255         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5256           if (instance.disk_template in constants.DTS_INT_MIRROR and
5257               self.op.node_name in instance.all_nodes):
5258             instances_keep.append(instance_name)
5259             self.affected_instances.append(instance)
5260
5261         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5262
5263         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5264                 set(instances_keep))
5265
5266   def BuildHooksEnv(self):
5267     """Build hooks env.
5268
5269     This runs on the master node.
5270
5271     """
5272     return {
5273       "OP_TARGET": self.op.node_name,
5274       "MASTER_CANDIDATE": str(self.op.master_candidate),
5275       "OFFLINE": str(self.op.offline),
5276       "DRAINED": str(self.op.drained),
5277       "MASTER_CAPABLE": str(self.op.master_capable),
5278       "VM_CAPABLE": str(self.op.vm_capable),
5279       }
5280
5281   def BuildHooksNodes(self):
5282     """Build hooks nodes.
5283
5284     """
5285     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5286     return (nl, nl)
5287
5288   def CheckPrereq(self):
5289     """Check prerequisites.
5290
5291     This only checks the instance list against the existing names.
5292
5293     """
5294     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5295
5296     if (self.op.master_candidate is not None or
5297         self.op.drained is not None or
5298         self.op.offline is not None):
5299       # we can't change the master's node flags
5300       if self.op.node_name == self.cfg.GetMasterNode():
5301         raise errors.OpPrereqError("The master role can be changed"
5302                                    " only via master-failover",
5303                                    errors.ECODE_INVAL)
5304
5305     if self.op.master_candidate and not node.master_capable:
5306       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5307                                  " it a master candidate" % node.name,
5308                                  errors.ECODE_STATE)
5309
5310     if self.op.vm_capable == False:
5311       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5312       if ipri or isec:
5313         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5314                                    " the vm_capable flag" % node.name,
5315                                    errors.ECODE_STATE)
5316
5317     if node.master_candidate and self.might_demote and not self.lock_all:
5318       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5319       # check if after removing the current node, we're missing master
5320       # candidates
5321       (mc_remaining, mc_should, _) = \
5322           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5323       if mc_remaining < mc_should:
5324         raise errors.OpPrereqError("Not enough master candidates, please"
5325                                    " pass auto promote option to allow"
5326                                    " promotion", errors.ECODE_STATE)
5327
5328     self.old_flags = old_flags = (node.master_candidate,
5329                                   node.drained, node.offline)
5330     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5331     self.old_role = old_role = self._F2R[old_flags]
5332
5333     # Check for ineffective changes
5334     for attr in self._FLAGS:
5335       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5336         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5337         setattr(self.op, attr, None)
5338
5339     # Past this point, any flag change to False means a transition
5340     # away from the respective state, as only real changes are kept
5341
5342     # TODO: We might query the real power state if it supports OOB
5343     if _SupportsOob(self.cfg, node):
5344       if self.op.offline is False and not (node.powered or
5345                                            self.op.powered == True):
5346         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5347                                     " offline status can be reset") %
5348                                    self.op.node_name)
5349     elif self.op.powered is not None:
5350       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5351                                   " as it does not support out-of-band"
5352                                   " handling") % self.op.node_name)
5353
5354     # If we're being deofflined/drained, we'll MC ourself if needed
5355     if (self.op.drained == False or self.op.offline == False or
5356         (self.op.master_capable and not node.master_capable)):
5357       if _DecideSelfPromotion(self):
5358         self.op.master_candidate = True
5359         self.LogInfo("Auto-promoting node to master candidate")
5360
5361     # If we're no longer master capable, we'll demote ourselves from MC
5362     if self.op.master_capable == False and node.master_candidate:
5363       self.LogInfo("Demoting from master candidate")
5364       self.op.master_candidate = False
5365
5366     # Compute new role
5367     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5368     if self.op.master_candidate:
5369       new_role = self._ROLE_CANDIDATE
5370     elif self.op.drained:
5371       new_role = self._ROLE_DRAINED
5372     elif self.op.offline:
5373       new_role = self._ROLE_OFFLINE
5374     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5375       # False is still in new flags, which means we're un-setting (the
5376       # only) True flag
5377       new_role = self._ROLE_REGULAR
5378     else: # no new flags, nothing, keep old role
5379       new_role = old_role
5380
5381     self.new_role = new_role
5382
5383     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5384       # Trying to transition out of offline status
5385       # TODO: Use standard RPC runner, but make sure it works when the node is
5386       # still marked offline
5387       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5388       if result.fail_msg:
5389         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5390                                    " to report its version: %s" %
5391                                    (node.name, result.fail_msg),
5392                                    errors.ECODE_STATE)
5393       else:
5394         self.LogWarning("Transitioning node from offline to online state"
5395                         " without using re-add. Please make sure the node"
5396                         " is healthy!")
5397
5398     if self.op.secondary_ip:
5399       # Ok even without locking, because this can't be changed by any LU
5400       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5401       master_singlehomed = master.secondary_ip == master.primary_ip
5402       if master_singlehomed and self.op.secondary_ip:
5403         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5404                                    " homed cluster", errors.ECODE_INVAL)
5405
5406       if node.offline:
5407         if self.affected_instances:
5408           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5409                                      " node has instances (%s) configured"
5410                                      " to use it" % self.affected_instances)
5411       else:
5412         # On online nodes, check that no instances are running, and that
5413         # the node has the new ip and we can reach it.
5414         for instance in self.affected_instances:
5415           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5416
5417         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5418         if master.name != node.name:
5419           # check reachability from master secondary ip to new secondary ip
5420           if not netutils.TcpPing(self.op.secondary_ip,
5421                                   constants.DEFAULT_NODED_PORT,
5422                                   source=master.secondary_ip):
5423             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5424                                        " based ping to node daemon port",
5425                                        errors.ECODE_ENVIRON)
5426
5427     if self.op.ndparams:
5428       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5429       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5430       self.new_ndparams = new_ndparams
5431
5432   def Exec(self, feedback_fn):
5433     """Modifies a node.
5434
5435     """
5436     node = self.node
5437     old_role = self.old_role
5438     new_role = self.new_role
5439
5440     result = []
5441
5442     if self.op.ndparams:
5443       node.ndparams = self.new_ndparams
5444
5445     if self.op.powered is not None:
5446       node.powered = self.op.powered
5447
5448     for attr in ["master_capable", "vm_capable"]:
5449       val = getattr(self.op, attr)
5450       if val is not None:
5451         setattr(node, attr, val)
5452         result.append((attr, str(val)))
5453
5454     if new_role != old_role:
5455       # Tell the node to demote itself, if no longer MC and not offline
5456       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5457         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5458         if msg:
5459           self.LogWarning("Node failed to demote itself: %s", msg)
5460
5461       new_flags = self._R2F[new_role]
5462       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5463         if of != nf:
5464           result.append((desc, str(nf)))
5465       (node.master_candidate, node.drained, node.offline) = new_flags
5466
5467       # we locked all nodes, we adjust the CP before updating this node
5468       if self.lock_all:
5469         _AdjustCandidatePool(self, [node.name])
5470
5471     if self.op.secondary_ip:
5472       node.secondary_ip = self.op.secondary_ip
5473       result.append(("secondary_ip", self.op.secondary_ip))
5474
5475     # this will trigger configuration file update, if needed
5476     self.cfg.Update(node, feedback_fn)
5477
5478     # this will trigger job queue propagation or cleanup if the mc
5479     # flag changed
5480     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5481       self.context.ReaddNode(node)
5482
5483     return result
5484
5485
5486 class LUNodePowercycle(NoHooksLU):
5487   """Powercycles a node.
5488
5489   """
5490   REQ_BGL = False
5491
5492   def CheckArguments(self):
5493     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5494     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5495       raise errors.OpPrereqError("The node is the master and the force"
5496                                  " parameter was not set",
5497                                  errors.ECODE_INVAL)
5498
5499   def ExpandNames(self):
5500     """Locking for PowercycleNode.
5501
5502     This is a last-resort option and shouldn't block on other
5503     jobs. Therefore, we grab no locks.
5504
5505     """
5506     self.needed_locks = {}
5507
5508   def Exec(self, feedback_fn):
5509     """Reboots a node.
5510
5511     """
5512     result = self.rpc.call_node_powercycle(self.op.node_name,
5513                                            self.cfg.GetHypervisorType())
5514     result.Raise("Failed to schedule the reboot")
5515     return result.payload
5516
5517
5518 class LUClusterQuery(NoHooksLU):
5519   """Query cluster configuration.
5520
5521   """
5522   REQ_BGL = False
5523
5524   def ExpandNames(self):
5525     self.needed_locks = {}
5526
5527   def Exec(self, feedback_fn):
5528     """Return cluster config.
5529
5530     """
5531     cluster = self.cfg.GetClusterInfo()
5532     os_hvp = {}
5533
5534     # Filter just for enabled hypervisors
5535     for os_name, hv_dict in cluster.os_hvp.items():
5536       os_hvp[os_name] = {}
5537       for hv_name, hv_params in hv_dict.items():
5538         if hv_name in cluster.enabled_hypervisors:
5539           os_hvp[os_name][hv_name] = hv_params
5540
5541     # Convert ip_family to ip_version
5542     primary_ip_version = constants.IP4_VERSION
5543     if cluster.primary_ip_family == netutils.IP6Address.family:
5544       primary_ip_version = constants.IP6_VERSION
5545
5546     result = {
5547       "software_version": constants.RELEASE_VERSION,
5548       "protocol_version": constants.PROTOCOL_VERSION,
5549       "config_version": constants.CONFIG_VERSION,
5550       "os_api_version": max(constants.OS_API_VERSIONS),
5551       "export_version": constants.EXPORT_VERSION,
5552       "architecture": (platform.architecture()[0], platform.machine()),
5553       "name": cluster.cluster_name,
5554       "master": cluster.master_node,
5555       "default_hypervisor": cluster.enabled_hypervisors[0],
5556       "enabled_hypervisors": cluster.enabled_hypervisors,
5557       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5558                         for hypervisor_name in cluster.enabled_hypervisors]),
5559       "os_hvp": os_hvp,
5560       "beparams": cluster.beparams,
5561       "osparams": cluster.osparams,
5562       "nicparams": cluster.nicparams,
5563       "ndparams": cluster.ndparams,
5564       "candidate_pool_size": cluster.candidate_pool_size,
5565       "master_netdev": cluster.master_netdev,
5566       "master_netmask": cluster.master_netmask,
5567       "volume_group_name": cluster.volume_group_name,
5568       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5569       "file_storage_dir": cluster.file_storage_dir,
5570       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5571       "maintain_node_health": cluster.maintain_node_health,
5572       "ctime": cluster.ctime,
5573       "mtime": cluster.mtime,
5574       "uuid": cluster.uuid,
5575       "tags": list(cluster.GetTags()),
5576       "uid_pool": cluster.uid_pool,
5577       "default_iallocator": cluster.default_iallocator,
5578       "reserved_lvs": cluster.reserved_lvs,
5579       "primary_ip_version": primary_ip_version,
5580       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5581       "hidden_os": cluster.hidden_os,
5582       "blacklisted_os": cluster.blacklisted_os,
5583       }
5584
5585     return result
5586
5587
5588 class LUClusterConfigQuery(NoHooksLU):
5589   """Return configuration values.
5590
5591   """
5592   REQ_BGL = False
5593   _FIELDS_DYNAMIC = utils.FieldSet()
5594   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5595                                   "watcher_pause", "volume_group_name")
5596
5597   def CheckArguments(self):
5598     _CheckOutputFields(static=self._FIELDS_STATIC,
5599                        dynamic=self._FIELDS_DYNAMIC,
5600                        selected=self.op.output_fields)
5601
5602   def ExpandNames(self):
5603     self.needed_locks = {}
5604
5605   def Exec(self, feedback_fn):
5606     """Dump a representation of the cluster config to the standard output.
5607
5608     """
5609     values = []
5610     for field in self.op.output_fields:
5611       if field == "cluster_name":
5612         entry = self.cfg.GetClusterName()
5613       elif field == "master_node":
5614         entry = self.cfg.GetMasterNode()
5615       elif field == "drain_flag":
5616         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5617       elif field == "watcher_pause":
5618         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5619       elif field == "volume_group_name":
5620         entry = self.cfg.GetVGName()
5621       else:
5622         raise errors.ParameterError(field)
5623       values.append(entry)
5624     return values
5625
5626
5627 class LUInstanceActivateDisks(NoHooksLU):
5628   """Bring up an instance's disks.
5629
5630   """
5631   REQ_BGL = False
5632
5633   def ExpandNames(self):
5634     self._ExpandAndLockInstance()
5635     self.needed_locks[locking.LEVEL_NODE] = []
5636     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5637
5638   def DeclareLocks(self, level):
5639     if level == locking.LEVEL_NODE:
5640       self._LockInstancesNodes()
5641
5642   def CheckPrereq(self):
5643     """Check prerequisites.
5644
5645     This checks that the instance is in the cluster.
5646
5647     """
5648     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5649     assert self.instance is not None, \
5650       "Cannot retrieve locked instance %s" % self.op.instance_name
5651     _CheckNodeOnline(self, self.instance.primary_node)
5652
5653   def Exec(self, feedback_fn):
5654     """Activate the disks.
5655
5656     """
5657     disks_ok, disks_info = \
5658               _AssembleInstanceDisks(self, self.instance,
5659                                      ignore_size=self.op.ignore_size)
5660     if not disks_ok:
5661       raise errors.OpExecError("Cannot activate block devices")
5662
5663     return disks_info
5664
5665
5666 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5667                            ignore_size=False):
5668   """Prepare the block devices for an instance.
5669
5670   This sets up the block devices on all nodes.
5671
5672   @type lu: L{LogicalUnit}
5673   @param lu: the logical unit on whose behalf we execute
5674   @type instance: L{objects.Instance}
5675   @param instance: the instance for whose disks we assemble
5676   @type disks: list of L{objects.Disk} or None
5677   @param disks: which disks to assemble (or all, if None)
5678   @type ignore_secondaries: boolean
5679   @param ignore_secondaries: if true, errors on secondary nodes
5680       won't result in an error return from the function
5681   @type ignore_size: boolean
5682   @param ignore_size: if true, the current known size of the disk
5683       will not be used during the disk activation, useful for cases
5684       when the size is wrong
5685   @return: False if the operation failed, otherwise a list of
5686       (host, instance_visible_name, node_visible_name)
5687       with the mapping from node devices to instance devices
5688
5689   """
5690   device_info = []
5691   disks_ok = True
5692   iname = instance.name
5693   disks = _ExpandCheckDisks(instance, disks)
5694
5695   # With the two passes mechanism we try to reduce the window of
5696   # opportunity for the race condition of switching DRBD to primary
5697   # before handshaking occured, but we do not eliminate it
5698
5699   # The proper fix would be to wait (with some limits) until the
5700   # connection has been made and drbd transitions from WFConnection
5701   # into any other network-connected state (Connected, SyncTarget,
5702   # SyncSource, etc.)
5703
5704   # 1st pass, assemble on all nodes in secondary mode
5705   for idx, inst_disk in enumerate(disks):
5706     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5707       if ignore_size:
5708         node_disk = node_disk.Copy()
5709         node_disk.UnsetSize()
5710       lu.cfg.SetDiskID(node_disk, node)
5711       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5712       msg = result.fail_msg
5713       if msg:
5714         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5715                            " (is_primary=False, pass=1): %s",
5716                            inst_disk.iv_name, node, msg)
5717         if not ignore_secondaries:
5718           disks_ok = False
5719
5720   # FIXME: race condition on drbd migration to primary
5721
5722   # 2nd pass, do only the primary node
5723   for idx, inst_disk in enumerate(disks):
5724     dev_path = None
5725
5726     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5727       if node != instance.primary_node:
5728         continue
5729       if ignore_size:
5730         node_disk = node_disk.Copy()
5731         node_disk.UnsetSize()
5732       lu.cfg.SetDiskID(node_disk, node)
5733       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5734       msg = result.fail_msg
5735       if msg:
5736         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5737                            " (is_primary=True, pass=2): %s",
5738                            inst_disk.iv_name, node, msg)
5739         disks_ok = False
5740       else:
5741         dev_path = result.payload
5742
5743     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5744
5745   # leave the disks configured for the primary node
5746   # this is a workaround that would be fixed better by
5747   # improving the logical/physical id handling
5748   for disk in disks:
5749     lu.cfg.SetDiskID(disk, instance.primary_node)
5750
5751   return disks_ok, device_info
5752
5753
5754 def _StartInstanceDisks(lu, instance, force):
5755   """Start the disks of an instance.
5756
5757   """
5758   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5759                                            ignore_secondaries=force)
5760   if not disks_ok:
5761     _ShutdownInstanceDisks(lu, instance)
5762     if force is not None and not force:
5763       lu.proc.LogWarning("", hint="If the message above refers to a"
5764                          " secondary node,"
5765                          " you can retry the operation using '--force'.")
5766     raise errors.OpExecError("Disk consistency error")
5767
5768
5769 class LUInstanceDeactivateDisks(NoHooksLU):
5770   """Shutdown an instance's disks.
5771
5772   """
5773   REQ_BGL = False
5774
5775   def ExpandNames(self):
5776     self._ExpandAndLockInstance()
5777     self.needed_locks[locking.LEVEL_NODE] = []
5778     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5779
5780   def DeclareLocks(self, level):
5781     if level == locking.LEVEL_NODE:
5782       self._LockInstancesNodes()
5783
5784   def CheckPrereq(self):
5785     """Check prerequisites.
5786
5787     This checks that the instance is in the cluster.
5788
5789     """
5790     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5791     assert self.instance is not None, \
5792       "Cannot retrieve locked instance %s" % self.op.instance_name
5793
5794   def Exec(self, feedback_fn):
5795     """Deactivate the disks
5796
5797     """
5798     instance = self.instance
5799     if self.op.force:
5800       _ShutdownInstanceDisks(self, instance)
5801     else:
5802       _SafeShutdownInstanceDisks(self, instance)
5803
5804
5805 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5806   """Shutdown block devices of an instance.
5807
5808   This function checks if an instance is running, before calling
5809   _ShutdownInstanceDisks.
5810
5811   """
5812   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5813   _ShutdownInstanceDisks(lu, instance, disks=disks)
5814
5815
5816 def _ExpandCheckDisks(instance, disks):
5817   """Return the instance disks selected by the disks list
5818
5819   @type disks: list of L{objects.Disk} or None
5820   @param disks: selected disks
5821   @rtype: list of L{objects.Disk}
5822   @return: selected instance disks to act on
5823
5824   """
5825   if disks is None:
5826     return instance.disks
5827   else:
5828     if not set(disks).issubset(instance.disks):
5829       raise errors.ProgrammerError("Can only act on disks belonging to the"
5830                                    " target instance")
5831     return disks
5832
5833
5834 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5835   """Shutdown block devices of an instance.
5836
5837   This does the shutdown on all nodes of the instance.
5838
5839   If the ignore_primary is false, errors on the primary node are
5840   ignored.
5841
5842   """
5843   all_result = True
5844   disks = _ExpandCheckDisks(instance, disks)
5845
5846   for disk in disks:
5847     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5848       lu.cfg.SetDiskID(top_disk, node)
5849       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5850       msg = result.fail_msg
5851       if msg:
5852         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5853                       disk.iv_name, node, msg)
5854         if ((node == instance.primary_node and not ignore_primary) or
5855             (node != instance.primary_node and not result.offline)):
5856           all_result = False
5857   return all_result
5858
5859
5860 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5861   """Checks if a node has enough free memory.
5862
5863   This function check if a given node has the needed amount of free
5864   memory. In case the node has less memory or we cannot get the
5865   information from the node, this function raise an OpPrereqError
5866   exception.
5867
5868   @type lu: C{LogicalUnit}
5869   @param lu: a logical unit from which we get configuration data
5870   @type node: C{str}
5871   @param node: the node to check
5872   @type reason: C{str}
5873   @param reason: string to use in the error message
5874   @type requested: C{int}
5875   @param requested: the amount of memory in MiB to check for
5876   @type hypervisor_name: C{str}
5877   @param hypervisor_name: the hypervisor to ask for memory stats
5878   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5879       we cannot check the node
5880
5881   """
5882   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5883   nodeinfo[node].Raise("Can't get data from node %s" % node,
5884                        prereq=True, ecode=errors.ECODE_ENVIRON)
5885   free_mem = nodeinfo[node].payload.get("memory_free", None)
5886   if not isinstance(free_mem, int):
5887     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5888                                " was '%s'" % (node, free_mem),
5889                                errors.ECODE_ENVIRON)
5890   if requested > free_mem:
5891     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5892                                " needed %s MiB, available %s MiB" %
5893                                (node, reason, requested, free_mem),
5894                                errors.ECODE_NORES)
5895
5896
5897 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5898   """Checks if nodes have enough free disk space in the all VGs.
5899
5900   This function check if all given nodes have the needed amount of
5901   free disk. In case any node has less disk or we cannot get the
5902   information from the node, this function raise an OpPrereqError
5903   exception.
5904
5905   @type lu: C{LogicalUnit}
5906   @param lu: a logical unit from which we get configuration data
5907   @type nodenames: C{list}
5908   @param nodenames: the list of node names to check
5909   @type req_sizes: C{dict}
5910   @param req_sizes: the hash of vg and corresponding amount of disk in
5911       MiB to check for
5912   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5913       or we cannot check the node
5914
5915   """
5916   for vg, req_size in req_sizes.items():
5917     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5918
5919
5920 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5921   """Checks if nodes have enough free disk space in the specified VG.
5922
5923   This function check if all given nodes have the needed amount of
5924   free disk. In case any node has less disk or we cannot get the
5925   information from the node, this function raise an OpPrereqError
5926   exception.
5927
5928   @type lu: C{LogicalUnit}
5929   @param lu: a logical unit from which we get configuration data
5930   @type nodenames: C{list}
5931   @param nodenames: the list of node names to check
5932   @type vg: C{str}
5933   @param vg: the volume group to check
5934   @type requested: C{int}
5935   @param requested: the amount of disk in MiB to check for
5936   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5937       or we cannot check the node
5938
5939   """
5940   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5941   for node in nodenames:
5942     info = nodeinfo[node]
5943     info.Raise("Cannot get current information from node %s" % node,
5944                prereq=True, ecode=errors.ECODE_ENVIRON)
5945     vg_free = info.payload.get("vg_free", None)
5946     if not isinstance(vg_free, int):
5947       raise errors.OpPrereqError("Can't compute free disk space on node"
5948                                  " %s for vg %s, result was '%s'" %
5949                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5950     if requested > vg_free:
5951       raise errors.OpPrereqError("Not enough disk space on target node %s"
5952                                  " vg %s: required %d MiB, available %d MiB" %
5953                                  (node, vg, requested, vg_free),
5954                                  errors.ECODE_NORES)
5955
5956
5957 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5958   """Checks if nodes have enough physical CPUs
5959
5960   This function checks if all given nodes have the needed number of
5961   physical CPUs. In case any node has less CPUs or we cannot get the
5962   information from the node, this function raises an OpPrereqError
5963   exception.
5964
5965   @type lu: C{LogicalUnit}
5966   @param lu: a logical unit from which we get configuration data
5967   @type nodenames: C{list}
5968   @param nodenames: the list of node names to check
5969   @type requested: C{int}
5970   @param requested: the minimum acceptable number of physical CPUs
5971   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5972       or we cannot check the node
5973
5974   """
5975   nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5976   for node in nodenames:
5977     info = nodeinfo[node]
5978     info.Raise("Cannot get current information from node %s" % node,
5979                prereq=True, ecode=errors.ECODE_ENVIRON)
5980     num_cpus = info.payload.get("cpu_total", None)
5981     if not isinstance(num_cpus, int):
5982       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5983                                  " on node %s, result was '%s'" %
5984                                  (node, num_cpus), errors.ECODE_ENVIRON)
5985     if requested > num_cpus:
5986       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5987                                  "required" % (node, num_cpus, requested),
5988                                  errors.ECODE_NORES)
5989
5990
5991 class LUInstanceStartup(LogicalUnit):
5992   """Starts an instance.
5993
5994   """
5995   HPATH = "instance-start"
5996   HTYPE = constants.HTYPE_INSTANCE
5997   REQ_BGL = False
5998
5999   def CheckArguments(self):
6000     # extra beparams
6001     if self.op.beparams:
6002       # fill the beparams dict
6003       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6004
6005   def ExpandNames(self):
6006     self._ExpandAndLockInstance()
6007
6008   def BuildHooksEnv(self):
6009     """Build hooks env.
6010
6011     This runs on master, primary and secondary nodes of the instance.
6012
6013     """
6014     env = {
6015       "FORCE": self.op.force,
6016       }
6017
6018     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6019
6020     return env
6021
6022   def BuildHooksNodes(self):
6023     """Build hooks nodes.
6024
6025     """
6026     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6027     return (nl, nl)
6028
6029   def CheckPrereq(self):
6030     """Check prerequisites.
6031
6032     This checks that the instance is in the cluster.
6033
6034     """
6035     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6036     assert self.instance is not None, \
6037       "Cannot retrieve locked instance %s" % self.op.instance_name
6038
6039     # extra hvparams
6040     if self.op.hvparams:
6041       # check hypervisor parameter syntax (locally)
6042       cluster = self.cfg.GetClusterInfo()
6043       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6044       filled_hvp = cluster.FillHV(instance)
6045       filled_hvp.update(self.op.hvparams)
6046       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6047       hv_type.CheckParameterSyntax(filled_hvp)
6048       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6049
6050     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6051
6052     if self.primary_offline and self.op.ignore_offline_nodes:
6053       self.proc.LogWarning("Ignoring offline primary node")
6054
6055       if self.op.hvparams or self.op.beparams:
6056         self.proc.LogWarning("Overridden parameters are ignored")
6057     else:
6058       _CheckNodeOnline(self, instance.primary_node)
6059
6060       bep = self.cfg.GetClusterInfo().FillBE(instance)
6061
6062       # check bridges existence
6063       _CheckInstanceBridgesExist(self, instance)
6064
6065       remote_info = self.rpc.call_instance_info(instance.primary_node,
6066                                                 instance.name,
6067                                                 instance.hypervisor)
6068       remote_info.Raise("Error checking node %s" % instance.primary_node,
6069                         prereq=True, ecode=errors.ECODE_ENVIRON)
6070       if not remote_info.payload: # not running already
6071         _CheckNodeFreeMemory(self, instance.primary_node,
6072                              "starting instance %s" % instance.name,
6073                              bep[constants.BE_MEMORY], instance.hypervisor)
6074
6075   def Exec(self, feedback_fn):
6076     """Start the instance.
6077
6078     """
6079     instance = self.instance
6080     force = self.op.force
6081
6082     if not self.op.no_remember:
6083       self.cfg.MarkInstanceUp(instance.name)
6084
6085     if self.primary_offline:
6086       assert self.op.ignore_offline_nodes
6087       self.proc.LogInfo("Primary node offline, marked instance as started")
6088     else:
6089       node_current = instance.primary_node
6090
6091       _StartInstanceDisks(self, instance, force)
6092
6093       result = \
6094         self.rpc.call_instance_start(node_current,
6095                                      (instance, self.op.hvparams,
6096                                       self.op.beparams),
6097                                      self.op.startup_paused)
6098       msg = result.fail_msg
6099       if msg:
6100         _ShutdownInstanceDisks(self, instance)
6101         raise errors.OpExecError("Could not start instance: %s" % msg)
6102
6103
6104 class LUInstanceReboot(LogicalUnit):
6105   """Reboot an instance.
6106
6107   """
6108   HPATH = "instance-reboot"
6109   HTYPE = constants.HTYPE_INSTANCE
6110   REQ_BGL = False
6111
6112   def ExpandNames(self):
6113     self._ExpandAndLockInstance()
6114
6115   def BuildHooksEnv(self):
6116     """Build hooks env.
6117
6118     This runs on master, primary and secondary nodes of the instance.
6119
6120     """
6121     env = {
6122       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6123       "REBOOT_TYPE": self.op.reboot_type,
6124       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6125       }
6126
6127     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6128
6129     return env
6130
6131   def BuildHooksNodes(self):
6132     """Build hooks nodes.
6133
6134     """
6135     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6136     return (nl, nl)
6137
6138   def CheckPrereq(self):
6139     """Check prerequisites.
6140
6141     This checks that the instance is in the cluster.
6142
6143     """
6144     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6145     assert self.instance is not None, \
6146       "Cannot retrieve locked instance %s" % self.op.instance_name
6147
6148     _CheckNodeOnline(self, instance.primary_node)
6149
6150     # check bridges existence
6151     _CheckInstanceBridgesExist(self, instance)
6152
6153   def Exec(self, feedback_fn):
6154     """Reboot the instance.
6155
6156     """
6157     instance = self.instance
6158     ignore_secondaries = self.op.ignore_secondaries
6159     reboot_type = self.op.reboot_type
6160
6161     remote_info = self.rpc.call_instance_info(instance.primary_node,
6162                                               instance.name,
6163                                               instance.hypervisor)
6164     remote_info.Raise("Error checking node %s" % instance.primary_node)
6165     instance_running = bool(remote_info.payload)
6166
6167     node_current = instance.primary_node
6168
6169     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6170                                             constants.INSTANCE_REBOOT_HARD]:
6171       for disk in instance.disks:
6172         self.cfg.SetDiskID(disk, node_current)
6173       result = self.rpc.call_instance_reboot(node_current, instance,
6174                                              reboot_type,
6175                                              self.op.shutdown_timeout)
6176       result.Raise("Could not reboot instance")
6177     else:
6178       if instance_running:
6179         result = self.rpc.call_instance_shutdown(node_current, instance,
6180                                                  self.op.shutdown_timeout)
6181         result.Raise("Could not shutdown instance for full reboot")
6182         _ShutdownInstanceDisks(self, instance)
6183       else:
6184         self.LogInfo("Instance %s was already stopped, starting now",
6185                      instance.name)
6186       _StartInstanceDisks(self, instance, ignore_secondaries)
6187       result = self.rpc.call_instance_start(node_current,
6188                                             (instance, None, None), False)
6189       msg = result.fail_msg
6190       if msg:
6191         _ShutdownInstanceDisks(self, instance)
6192         raise errors.OpExecError("Could not start instance for"
6193                                  " full reboot: %s" % msg)
6194
6195     self.cfg.MarkInstanceUp(instance.name)
6196
6197
6198 class LUInstanceShutdown(LogicalUnit):
6199   """Shutdown an instance.
6200
6201   """
6202   HPATH = "instance-stop"
6203   HTYPE = constants.HTYPE_INSTANCE
6204   REQ_BGL = False
6205
6206   def ExpandNames(self):
6207     self._ExpandAndLockInstance()
6208
6209   def BuildHooksEnv(self):
6210     """Build hooks env.
6211
6212     This runs on master, primary and secondary nodes of the instance.
6213
6214     """
6215     env = _BuildInstanceHookEnvByObject(self, self.instance)
6216     env["TIMEOUT"] = self.op.timeout
6217     return env
6218
6219   def BuildHooksNodes(self):
6220     """Build hooks nodes.
6221
6222     """
6223     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6224     return (nl, nl)
6225
6226   def CheckPrereq(self):
6227     """Check prerequisites.
6228
6229     This checks that the instance is in the cluster.
6230
6231     """
6232     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6233     assert self.instance is not None, \
6234       "Cannot retrieve locked instance %s" % self.op.instance_name
6235
6236     self.primary_offline = \
6237       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6238
6239     if self.primary_offline and self.op.ignore_offline_nodes:
6240       self.proc.LogWarning("Ignoring offline primary node")
6241     else:
6242       _CheckNodeOnline(self, self.instance.primary_node)
6243
6244   def Exec(self, feedback_fn):
6245     """Shutdown the instance.
6246
6247     """
6248     instance = self.instance
6249     node_current = instance.primary_node
6250     timeout = self.op.timeout
6251
6252     if not self.op.no_remember:
6253       self.cfg.MarkInstanceDown(instance.name)
6254
6255     if self.primary_offline:
6256       assert self.op.ignore_offline_nodes
6257       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6258     else:
6259       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6260       msg = result.fail_msg
6261       if msg:
6262         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6263
6264       _ShutdownInstanceDisks(self, instance)
6265
6266
6267 class LUInstanceReinstall(LogicalUnit):
6268   """Reinstall an instance.
6269
6270   """
6271   HPATH = "instance-reinstall"
6272   HTYPE = constants.HTYPE_INSTANCE
6273   REQ_BGL = False
6274
6275   def ExpandNames(self):
6276     self._ExpandAndLockInstance()
6277
6278   def BuildHooksEnv(self):
6279     """Build hooks env.
6280
6281     This runs on master, primary and secondary nodes of the instance.
6282
6283     """
6284     return _BuildInstanceHookEnvByObject(self, self.instance)
6285
6286   def BuildHooksNodes(self):
6287     """Build hooks nodes.
6288
6289     """
6290     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6291     return (nl, nl)
6292
6293   def CheckPrereq(self):
6294     """Check prerequisites.
6295
6296     This checks that the instance is in the cluster and is not running.
6297
6298     """
6299     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6300     assert instance is not None, \
6301       "Cannot retrieve locked instance %s" % self.op.instance_name
6302     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6303                      " offline, cannot reinstall")
6304     for node in instance.secondary_nodes:
6305       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6306                        " cannot reinstall")
6307
6308     if instance.disk_template == constants.DT_DISKLESS:
6309       raise errors.OpPrereqError("Instance '%s' has no disks" %
6310                                  self.op.instance_name,
6311                                  errors.ECODE_INVAL)
6312     _CheckInstanceDown(self, instance, "cannot reinstall")
6313
6314     if self.op.os_type is not None:
6315       # OS verification
6316       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6317       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6318       instance_os = self.op.os_type
6319     else:
6320       instance_os = instance.os
6321
6322     nodelist = list(instance.all_nodes)
6323
6324     if self.op.osparams:
6325       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6326       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6327       self.os_inst = i_osdict # the new dict (without defaults)
6328     else:
6329       self.os_inst = None
6330
6331     self.instance = instance
6332
6333   def Exec(self, feedback_fn):
6334     """Reinstall the instance.
6335
6336     """
6337     inst = self.instance
6338
6339     if self.op.os_type is not None:
6340       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6341       inst.os = self.op.os_type
6342       # Write to configuration
6343       self.cfg.Update(inst, feedback_fn)
6344
6345     _StartInstanceDisks(self, inst, None)
6346     try:
6347       feedback_fn("Running the instance OS create scripts...")
6348       # FIXME: pass debug option from opcode to backend
6349       result = self.rpc.call_instance_os_add(inst.primary_node,
6350                                              (inst, self.os_inst), True,
6351                                              self.op.debug_level)
6352       result.Raise("Could not install OS for instance %s on node %s" %
6353                    (inst.name, inst.primary_node))
6354     finally:
6355       _ShutdownInstanceDisks(self, inst)
6356
6357
6358 class LUInstanceRecreateDisks(LogicalUnit):
6359   """Recreate an instance's missing disks.
6360
6361   """
6362   HPATH = "instance-recreate-disks"
6363   HTYPE = constants.HTYPE_INSTANCE
6364   REQ_BGL = False
6365
6366   def CheckArguments(self):
6367     # normalise the disk list
6368     self.op.disks = sorted(frozenset(self.op.disks))
6369
6370   def ExpandNames(self):
6371     self._ExpandAndLockInstance()
6372     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6373     if self.op.nodes:
6374       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6375       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6376     else:
6377       self.needed_locks[locking.LEVEL_NODE] = []
6378
6379   def DeclareLocks(self, level):
6380     if level == locking.LEVEL_NODE:
6381       # if we replace the nodes, we only need to lock the old primary,
6382       # otherwise we need to lock all nodes for disk re-creation
6383       primary_only = bool(self.op.nodes)
6384       self._LockInstancesNodes(primary_only=primary_only)
6385
6386   def BuildHooksEnv(self):
6387     """Build hooks env.
6388
6389     This runs on master, primary and secondary nodes of the instance.
6390
6391     """
6392     return _BuildInstanceHookEnvByObject(self, self.instance)
6393
6394   def BuildHooksNodes(self):
6395     """Build hooks nodes.
6396
6397     """
6398     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6399     return (nl, nl)
6400
6401   def CheckPrereq(self):
6402     """Check prerequisites.
6403
6404     This checks that the instance is in the cluster and is not running.
6405
6406     """
6407     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6408     assert instance is not None, \
6409       "Cannot retrieve locked instance %s" % self.op.instance_name
6410     if self.op.nodes:
6411       if len(self.op.nodes) != len(instance.all_nodes):
6412         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6413                                    " %d replacement nodes were specified" %
6414                                    (instance.name, len(instance.all_nodes),
6415                                     len(self.op.nodes)),
6416                                    errors.ECODE_INVAL)
6417       assert instance.disk_template != constants.DT_DRBD8 or \
6418           len(self.op.nodes) == 2
6419       assert instance.disk_template != constants.DT_PLAIN or \
6420           len(self.op.nodes) == 1
6421       primary_node = self.op.nodes[0]
6422     else:
6423       primary_node = instance.primary_node
6424     _CheckNodeOnline(self, primary_node)
6425
6426     if instance.disk_template == constants.DT_DISKLESS:
6427       raise errors.OpPrereqError("Instance '%s' has no disks" %
6428                                  self.op.instance_name, errors.ECODE_INVAL)
6429     # if we replace nodes *and* the old primary is offline, we don't
6430     # check
6431     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6432     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6433     if not (self.op.nodes and old_pnode.offline):
6434       _CheckInstanceDown(self, instance, "cannot recreate disks")
6435
6436     if not self.op.disks:
6437       self.op.disks = range(len(instance.disks))
6438     else:
6439       for idx in self.op.disks:
6440         if idx >= len(instance.disks):
6441           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6442                                      errors.ECODE_INVAL)
6443     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6444       raise errors.OpPrereqError("Can't recreate disks partially and"
6445                                  " change the nodes at the same time",
6446                                  errors.ECODE_INVAL)
6447     self.instance = instance
6448
6449   def Exec(self, feedback_fn):
6450     """Recreate the disks.
6451
6452     """
6453     instance = self.instance
6454
6455     to_skip = []
6456     mods = [] # keeps track of needed logical_id changes
6457
6458     for idx, disk in enumerate(instance.disks):
6459       if idx not in self.op.disks: # disk idx has not been passed in
6460         to_skip.append(idx)
6461         continue
6462       # update secondaries for disks, if needed
6463       if self.op.nodes:
6464         if disk.dev_type == constants.LD_DRBD8:
6465           # need to update the nodes and minors
6466           assert len(self.op.nodes) == 2
6467           assert len(disk.logical_id) == 6 # otherwise disk internals
6468                                            # have changed
6469           (_, _, old_port, _, _, old_secret) = disk.logical_id
6470           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6471           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6472                     new_minors[0], new_minors[1], old_secret)
6473           assert len(disk.logical_id) == len(new_id)
6474           mods.append((idx, new_id))
6475
6476     # now that we have passed all asserts above, we can apply the mods
6477     # in a single run (to avoid partial changes)
6478     for idx, new_id in mods:
6479       instance.disks[idx].logical_id = new_id
6480
6481     # change primary node, if needed
6482     if self.op.nodes:
6483       instance.primary_node = self.op.nodes[0]
6484       self.LogWarning("Changing the instance's nodes, you will have to"
6485                       " remove any disks left on the older nodes manually")
6486
6487     if self.op.nodes:
6488       self.cfg.Update(instance, feedback_fn)
6489
6490     _CreateDisks(self, instance, to_skip=to_skip)
6491
6492
6493 class LUInstanceRename(LogicalUnit):
6494   """Rename an instance.
6495
6496   """
6497   HPATH = "instance-rename"
6498   HTYPE = constants.HTYPE_INSTANCE
6499
6500   def CheckArguments(self):
6501     """Check arguments.
6502
6503     """
6504     if self.op.ip_check and not self.op.name_check:
6505       # TODO: make the ip check more flexible and not depend on the name check
6506       raise errors.OpPrereqError("IP address check requires a name check",
6507                                  errors.ECODE_INVAL)
6508
6509   def BuildHooksEnv(self):
6510     """Build hooks env.
6511
6512     This runs on master, primary and secondary nodes of the instance.
6513
6514     """
6515     env = _BuildInstanceHookEnvByObject(self, self.instance)
6516     env["INSTANCE_NEW_NAME"] = self.op.new_name
6517     return env
6518
6519   def BuildHooksNodes(self):
6520     """Build hooks nodes.
6521
6522     """
6523     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6524     return (nl, nl)
6525
6526   def CheckPrereq(self):
6527     """Check prerequisites.
6528
6529     This checks that the instance is in the cluster and is not running.
6530
6531     """
6532     self.op.instance_name = _ExpandInstanceName(self.cfg,
6533                                                 self.op.instance_name)
6534     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6535     assert instance is not None
6536     _CheckNodeOnline(self, instance.primary_node)
6537     _CheckInstanceDown(self, instance, "cannot rename")
6538     self.instance = instance
6539
6540     new_name = self.op.new_name
6541     if self.op.name_check:
6542       hostname = netutils.GetHostname(name=new_name)
6543       if hostname != new_name:
6544         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6545                      hostname.name)
6546       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6547         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6548                                     " same as given hostname '%s'") %
6549                                     (hostname.name, self.op.new_name),
6550                                     errors.ECODE_INVAL)
6551       new_name = self.op.new_name = hostname.name
6552       if (self.op.ip_check and
6553           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6554         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6555                                    (hostname.ip, new_name),
6556                                    errors.ECODE_NOTUNIQUE)
6557
6558     instance_list = self.cfg.GetInstanceList()
6559     if new_name in instance_list and new_name != instance.name:
6560       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6561                                  new_name, errors.ECODE_EXISTS)
6562
6563   def Exec(self, feedback_fn):
6564     """Rename the instance.
6565
6566     """
6567     inst = self.instance
6568     old_name = inst.name
6569
6570     rename_file_storage = False
6571     if (inst.disk_template in constants.DTS_FILEBASED and
6572         self.op.new_name != inst.name):
6573       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6574       rename_file_storage = True
6575
6576     self.cfg.RenameInstance(inst.name, self.op.new_name)
6577     # Change the instance lock. This is definitely safe while we hold the BGL.
6578     # Otherwise the new lock would have to be added in acquired mode.
6579     assert self.REQ_BGL
6580     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6581     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6582
6583     # re-read the instance from the configuration after rename
6584     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6585
6586     if rename_file_storage:
6587       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6588       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6589                                                      old_file_storage_dir,
6590                                                      new_file_storage_dir)
6591       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6592                    " (but the instance has been renamed in Ganeti)" %
6593                    (inst.primary_node, old_file_storage_dir,
6594                     new_file_storage_dir))
6595
6596     _StartInstanceDisks(self, inst, None)
6597     try:
6598       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6599                                                  old_name, self.op.debug_level)
6600       msg = result.fail_msg
6601       if msg:
6602         msg = ("Could not run OS rename script for instance %s on node %s"
6603                " (but the instance has been renamed in Ganeti): %s" %
6604                (inst.name, inst.primary_node, msg))
6605         self.proc.LogWarning(msg)
6606     finally:
6607       _ShutdownInstanceDisks(self, inst)
6608
6609     return inst.name
6610
6611
6612 class LUInstanceRemove(LogicalUnit):
6613   """Remove an instance.
6614
6615   """
6616   HPATH = "instance-remove"
6617   HTYPE = constants.HTYPE_INSTANCE
6618   REQ_BGL = False
6619
6620   def ExpandNames(self):
6621     self._ExpandAndLockInstance()
6622     self.needed_locks[locking.LEVEL_NODE] = []
6623     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6624
6625   def DeclareLocks(self, level):
6626     if level == locking.LEVEL_NODE:
6627       self._LockInstancesNodes()
6628
6629   def BuildHooksEnv(self):
6630     """Build hooks env.
6631
6632     This runs on master, primary and secondary nodes of the instance.
6633
6634     """
6635     env = _BuildInstanceHookEnvByObject(self, self.instance)
6636     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6637     return env
6638
6639   def BuildHooksNodes(self):
6640     """Build hooks nodes.
6641
6642     """
6643     nl = [self.cfg.GetMasterNode()]
6644     nl_post = list(self.instance.all_nodes) + nl
6645     return (nl, nl_post)
6646
6647   def CheckPrereq(self):
6648     """Check prerequisites.
6649
6650     This checks that the instance is in the cluster.
6651
6652     """
6653     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6654     assert self.instance is not None, \
6655       "Cannot retrieve locked instance %s" % self.op.instance_name
6656
6657   def Exec(self, feedback_fn):
6658     """Remove the instance.
6659
6660     """
6661     instance = self.instance
6662     logging.info("Shutting down instance %s on node %s",
6663                  instance.name, instance.primary_node)
6664
6665     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6666                                              self.op.shutdown_timeout)
6667     msg = result.fail_msg
6668     if msg:
6669       if self.op.ignore_failures:
6670         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6671       else:
6672         raise errors.OpExecError("Could not shutdown instance %s on"
6673                                  " node %s: %s" %
6674                                  (instance.name, instance.primary_node, msg))
6675
6676     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6677
6678
6679 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6680   """Utility function to remove an instance.
6681
6682   """
6683   logging.info("Removing block devices for instance %s", instance.name)
6684
6685   if not _RemoveDisks(lu, instance):
6686     if not ignore_failures:
6687       raise errors.OpExecError("Can't remove instance's disks")
6688     feedback_fn("Warning: can't remove instance's disks")
6689
6690   logging.info("Removing instance %s out of cluster config", instance.name)
6691
6692   lu.cfg.RemoveInstance(instance.name)
6693
6694   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6695     "Instance lock removal conflict"
6696
6697   # Remove lock for the instance
6698   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6699
6700
6701 class LUInstanceQuery(NoHooksLU):
6702   """Logical unit for querying instances.
6703
6704   """
6705   # pylint: disable=W0142
6706   REQ_BGL = False
6707
6708   def CheckArguments(self):
6709     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6710                              self.op.output_fields, self.op.use_locking)
6711
6712   def ExpandNames(self):
6713     self.iq.ExpandNames(self)
6714
6715   def DeclareLocks(self, level):
6716     self.iq.DeclareLocks(self, level)
6717
6718   def Exec(self, feedback_fn):
6719     return self.iq.OldStyleQuery(self)
6720
6721
6722 class LUInstanceFailover(LogicalUnit):
6723   """Failover an instance.
6724
6725   """
6726   HPATH = "instance-failover"
6727   HTYPE = constants.HTYPE_INSTANCE
6728   REQ_BGL = False
6729
6730   def CheckArguments(self):
6731     """Check the arguments.
6732
6733     """
6734     self.iallocator = getattr(self.op, "iallocator", None)
6735     self.target_node = getattr(self.op, "target_node", None)
6736
6737   def ExpandNames(self):
6738     self._ExpandAndLockInstance()
6739
6740     if self.op.target_node is not None:
6741       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6742
6743     self.needed_locks[locking.LEVEL_NODE] = []
6744     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6745
6746     ignore_consistency = self.op.ignore_consistency
6747     shutdown_timeout = self.op.shutdown_timeout
6748     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6749                                        cleanup=False,
6750                                        failover=True,
6751                                        ignore_consistency=ignore_consistency,
6752                                        shutdown_timeout=shutdown_timeout)
6753     self.tasklets = [self._migrater]
6754
6755   def DeclareLocks(self, level):
6756     if level == locking.LEVEL_NODE:
6757       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6758       if instance.disk_template in constants.DTS_EXT_MIRROR:
6759         if self.op.target_node is None:
6760           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6761         else:
6762           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6763                                                    self.op.target_node]
6764         del self.recalculate_locks[locking.LEVEL_NODE]
6765       else:
6766         self._LockInstancesNodes()
6767
6768   def BuildHooksEnv(self):
6769     """Build hooks env.
6770
6771     This runs on master, primary and secondary nodes of the instance.
6772
6773     """
6774     instance = self._migrater.instance
6775     source_node = instance.primary_node
6776     target_node = self.op.target_node
6777     env = {
6778       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6779       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6780       "OLD_PRIMARY": source_node,
6781       "NEW_PRIMARY": target_node,
6782       }
6783
6784     if instance.disk_template in constants.DTS_INT_MIRROR:
6785       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6786       env["NEW_SECONDARY"] = source_node
6787     else:
6788       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6789
6790     env.update(_BuildInstanceHookEnvByObject(self, instance))
6791
6792     return env
6793
6794   def BuildHooksNodes(self):
6795     """Build hooks nodes.
6796
6797     """
6798     instance = self._migrater.instance
6799     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6800     return (nl, nl + [instance.primary_node])
6801
6802
6803 class LUInstanceMigrate(LogicalUnit):
6804   """Migrate an instance.
6805
6806   This is migration without shutting down, compared to the failover,
6807   which is done with shutdown.
6808
6809   """
6810   HPATH = "instance-migrate"
6811   HTYPE = constants.HTYPE_INSTANCE
6812   REQ_BGL = False
6813
6814   def ExpandNames(self):
6815     self._ExpandAndLockInstance()
6816
6817     if self.op.target_node is not None:
6818       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6819
6820     self.needed_locks[locking.LEVEL_NODE] = []
6821     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6822
6823     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6824                                        cleanup=self.op.cleanup,
6825                                        failover=False,
6826                                        fallback=self.op.allow_failover)
6827     self.tasklets = [self._migrater]
6828
6829   def DeclareLocks(self, level):
6830     if level == locking.LEVEL_NODE:
6831       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6832       if instance.disk_template in constants.DTS_EXT_MIRROR:
6833         if self.op.target_node is None:
6834           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6835         else:
6836           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6837                                                    self.op.target_node]
6838         del self.recalculate_locks[locking.LEVEL_NODE]
6839       else:
6840         self._LockInstancesNodes()
6841
6842   def BuildHooksEnv(self):
6843     """Build hooks env.
6844
6845     This runs on master, primary and secondary nodes of the instance.
6846
6847     """
6848     instance = self._migrater.instance
6849     source_node = instance.primary_node
6850     target_node = self.op.target_node
6851     env = _BuildInstanceHookEnvByObject(self, instance)
6852     env.update({
6853       "MIGRATE_LIVE": self._migrater.live,
6854       "MIGRATE_CLEANUP": self.op.cleanup,
6855       "OLD_PRIMARY": source_node,
6856       "NEW_PRIMARY": target_node,
6857       })
6858
6859     if instance.disk_template in constants.DTS_INT_MIRROR:
6860       env["OLD_SECONDARY"] = target_node
6861       env["NEW_SECONDARY"] = source_node
6862     else:
6863       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6864
6865     return env
6866
6867   def BuildHooksNodes(self):
6868     """Build hooks nodes.
6869
6870     """
6871     instance = self._migrater.instance
6872     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6873     return (nl, nl + [instance.primary_node])
6874
6875
6876 class LUInstanceMove(LogicalUnit):
6877   """Move an instance by data-copying.
6878
6879   """
6880   HPATH = "instance-move"
6881   HTYPE = constants.HTYPE_INSTANCE
6882   REQ_BGL = False
6883
6884   def ExpandNames(self):
6885     self._ExpandAndLockInstance()
6886     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6887     self.op.target_node = target_node
6888     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6889     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6890
6891   def DeclareLocks(self, level):
6892     if level == locking.LEVEL_NODE:
6893       self._LockInstancesNodes(primary_only=True)
6894
6895   def BuildHooksEnv(self):
6896     """Build hooks env.
6897
6898     This runs on master, primary and secondary nodes of the instance.
6899
6900     """
6901     env = {
6902       "TARGET_NODE": self.op.target_node,
6903       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6904       }
6905     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6906     return env
6907
6908   def BuildHooksNodes(self):
6909     """Build hooks nodes.
6910
6911     """
6912     nl = [
6913       self.cfg.GetMasterNode(),
6914       self.instance.primary_node,
6915       self.op.target_node,
6916       ]
6917     return (nl, nl)
6918
6919   def CheckPrereq(self):
6920     """Check prerequisites.
6921
6922     This checks that the instance is in the cluster.
6923
6924     """
6925     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6926     assert self.instance is not None, \
6927       "Cannot retrieve locked instance %s" % self.op.instance_name
6928
6929     node = self.cfg.GetNodeInfo(self.op.target_node)
6930     assert node is not None, \
6931       "Cannot retrieve locked node %s" % self.op.target_node
6932
6933     self.target_node = target_node = node.name
6934
6935     if target_node == instance.primary_node:
6936       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6937                                  (instance.name, target_node),
6938                                  errors.ECODE_STATE)
6939
6940     bep = self.cfg.GetClusterInfo().FillBE(instance)
6941
6942     for idx, dsk in enumerate(instance.disks):
6943       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6944         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6945                                    " cannot copy" % idx, errors.ECODE_STATE)
6946
6947     _CheckNodeOnline(self, target_node)
6948     _CheckNodeNotDrained(self, target_node)
6949     _CheckNodeVmCapable(self, target_node)
6950
6951     if instance.admin_up:
6952       # check memory requirements on the secondary node
6953       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6954                            instance.name, bep[constants.BE_MEMORY],
6955                            instance.hypervisor)
6956     else:
6957       self.LogInfo("Not checking memory on the secondary node as"
6958                    " instance will not be started")
6959
6960     # check bridge existance
6961     _CheckInstanceBridgesExist(self, instance, node=target_node)
6962
6963   def Exec(self, feedback_fn):
6964     """Move an instance.
6965
6966     The move is done by shutting it down on its present node, copying
6967     the data over (slow) and starting it on the new node.
6968
6969     """
6970     instance = self.instance
6971
6972     source_node = instance.primary_node
6973     target_node = self.target_node
6974
6975     self.LogInfo("Shutting down instance %s on source node %s",
6976                  instance.name, source_node)
6977
6978     result = self.rpc.call_instance_shutdown(source_node, instance,
6979                                              self.op.shutdown_timeout)
6980     msg = result.fail_msg
6981     if msg:
6982       if self.op.ignore_consistency:
6983         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6984                              " Proceeding anyway. Please make sure node"
6985                              " %s is down. Error details: %s",
6986                              instance.name, source_node, source_node, msg)
6987       else:
6988         raise errors.OpExecError("Could not shutdown instance %s on"
6989                                  " node %s: %s" %
6990                                  (instance.name, source_node, msg))
6991
6992     # create the target disks
6993     try:
6994       _CreateDisks(self, instance, target_node=target_node)
6995     except errors.OpExecError:
6996       self.LogWarning("Device creation failed, reverting...")
6997       try:
6998         _RemoveDisks(self, instance, target_node=target_node)
6999       finally:
7000         self.cfg.ReleaseDRBDMinors(instance.name)
7001         raise
7002
7003     cluster_name = self.cfg.GetClusterInfo().cluster_name
7004
7005     errs = []
7006     # activate, get path, copy the data over
7007     for idx, disk in enumerate(instance.disks):
7008       self.LogInfo("Copying data for disk %d", idx)
7009       result = self.rpc.call_blockdev_assemble(target_node, disk,
7010                                                instance.name, True, idx)
7011       if result.fail_msg:
7012         self.LogWarning("Can't assemble newly created disk %d: %s",
7013                         idx, result.fail_msg)
7014         errs.append(result.fail_msg)
7015         break
7016       dev_path = result.payload
7017       result = self.rpc.call_blockdev_export(source_node, disk,
7018                                              target_node, dev_path,
7019                                              cluster_name)
7020       if result.fail_msg:
7021         self.LogWarning("Can't copy data over for disk %d: %s",
7022                         idx, result.fail_msg)
7023         errs.append(result.fail_msg)
7024         break
7025
7026     if errs:
7027       self.LogWarning("Some disks failed to copy, aborting")
7028       try:
7029         _RemoveDisks(self, instance, target_node=target_node)
7030       finally:
7031         self.cfg.ReleaseDRBDMinors(instance.name)
7032         raise errors.OpExecError("Errors during disk copy: %s" %
7033                                  (",".join(errs),))
7034
7035     instance.primary_node = target_node
7036     self.cfg.Update(instance, feedback_fn)
7037
7038     self.LogInfo("Removing the disks on the original node")
7039     _RemoveDisks(self, instance, target_node=source_node)
7040
7041     # Only start the instance if it's marked as up
7042     if instance.admin_up:
7043       self.LogInfo("Starting instance %s on node %s",
7044                    instance.name, target_node)
7045
7046       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7047                                            ignore_secondaries=True)
7048       if not disks_ok:
7049         _ShutdownInstanceDisks(self, instance)
7050         raise errors.OpExecError("Can't activate the instance's disks")
7051
7052       result = self.rpc.call_instance_start(target_node,
7053                                             (instance, None, None), False)
7054       msg = result.fail_msg
7055       if msg:
7056         _ShutdownInstanceDisks(self, instance)
7057         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7058                                  (instance.name, target_node, msg))
7059
7060
7061 class LUNodeMigrate(LogicalUnit):
7062   """Migrate all instances from a node.
7063
7064   """
7065   HPATH = "node-migrate"
7066   HTYPE = constants.HTYPE_NODE
7067   REQ_BGL = False
7068
7069   def CheckArguments(self):
7070     pass
7071
7072   def ExpandNames(self):
7073     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7074
7075     self.share_locks = _ShareAll()
7076     self.needed_locks = {
7077       locking.LEVEL_NODE: [self.op.node_name],
7078       }
7079
7080   def BuildHooksEnv(self):
7081     """Build hooks env.
7082
7083     This runs on the master, the primary and all the secondaries.
7084
7085     """
7086     return {
7087       "NODE_NAME": self.op.node_name,
7088       }
7089
7090   def BuildHooksNodes(self):
7091     """Build hooks nodes.
7092
7093     """
7094     nl = [self.cfg.GetMasterNode()]
7095     return (nl, nl)
7096
7097   def CheckPrereq(self):
7098     pass
7099
7100   def Exec(self, feedback_fn):
7101     # Prepare jobs for migration instances
7102     jobs = [
7103       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7104                                  mode=self.op.mode,
7105                                  live=self.op.live,
7106                                  iallocator=self.op.iallocator,
7107                                  target_node=self.op.target_node)]
7108       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7109       ]
7110
7111     # TODO: Run iallocator in this opcode and pass correct placement options to
7112     # OpInstanceMigrate. Since other jobs can modify the cluster between
7113     # running the iallocator and the actual migration, a good consistency model
7114     # will have to be found.
7115
7116     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7117             frozenset([self.op.node_name]))
7118
7119     return ResultWithJobs(jobs)
7120
7121
7122 class TLMigrateInstance(Tasklet):
7123   """Tasklet class for instance migration.
7124
7125   @type live: boolean
7126   @ivar live: whether the migration will be done live or non-live;
7127       this variable is initalized only after CheckPrereq has run
7128   @type cleanup: boolean
7129   @ivar cleanup: Wheater we cleanup from a failed migration
7130   @type iallocator: string
7131   @ivar iallocator: The iallocator used to determine target_node
7132   @type target_node: string
7133   @ivar target_node: If given, the target_node to reallocate the instance to
7134   @type failover: boolean
7135   @ivar failover: Whether operation results in failover or migration
7136   @type fallback: boolean
7137   @ivar fallback: Whether fallback to failover is allowed if migration not
7138                   possible
7139   @type ignore_consistency: boolean
7140   @ivar ignore_consistency: Wheter we should ignore consistency between source
7141                             and target node
7142   @type shutdown_timeout: int
7143   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7144
7145   """
7146
7147   # Constants
7148   _MIGRATION_POLL_INTERVAL = 1      # seconds
7149   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7150
7151   def __init__(self, lu, instance_name, cleanup=False,
7152                failover=False, fallback=False,
7153                ignore_consistency=False,
7154                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7155     """Initializes this class.
7156
7157     """
7158     Tasklet.__init__(self, lu)
7159
7160     # Parameters
7161     self.instance_name = instance_name
7162     self.cleanup = cleanup
7163     self.live = False # will be overridden later
7164     self.failover = failover
7165     self.fallback = fallback
7166     self.ignore_consistency = ignore_consistency
7167     self.shutdown_timeout = shutdown_timeout
7168
7169   def CheckPrereq(self):
7170     """Check prerequisites.
7171
7172     This checks that the instance is in the cluster.
7173
7174     """
7175     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7176     instance = self.cfg.GetInstanceInfo(instance_name)
7177     assert instance is not None
7178     self.instance = instance
7179
7180     if (not self.cleanup and not instance.admin_up and not self.failover and
7181         self.fallback):
7182       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7183                       " to failover")
7184       self.failover = True
7185
7186     if instance.disk_template not in constants.DTS_MIRRORED:
7187       if self.failover:
7188         text = "failovers"
7189       else:
7190         text = "migrations"
7191       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7192                                  " %s" % (instance.disk_template, text),
7193                                  errors.ECODE_STATE)
7194
7195     if instance.disk_template in constants.DTS_EXT_MIRROR:
7196       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7197
7198       if self.lu.op.iallocator:
7199         self._RunAllocator()
7200       else:
7201         # We set set self.target_node as it is required by
7202         # BuildHooksEnv
7203         self.target_node = self.lu.op.target_node
7204
7205       # self.target_node is already populated, either directly or by the
7206       # iallocator run
7207       target_node = self.target_node
7208       if self.target_node == instance.primary_node:
7209         raise errors.OpPrereqError("Cannot migrate instance %s"
7210                                    " to its primary (%s)" %
7211                                    (instance.name, instance.primary_node))
7212
7213       if len(self.lu.tasklets) == 1:
7214         # It is safe to release locks only when we're the only tasklet
7215         # in the LU
7216         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7217                       keep=[instance.primary_node, self.target_node])
7218
7219     else:
7220       secondary_nodes = instance.secondary_nodes
7221       if not secondary_nodes:
7222         raise errors.ConfigurationError("No secondary node but using"
7223                                         " %s disk template" %
7224                                         instance.disk_template)
7225       target_node = secondary_nodes[0]
7226       if self.lu.op.iallocator or (self.lu.op.target_node and
7227                                    self.lu.op.target_node != target_node):
7228         if self.failover:
7229           text = "failed over"
7230         else:
7231           text = "migrated"
7232         raise errors.OpPrereqError("Instances with disk template %s cannot"
7233                                    " be %s to arbitrary nodes"
7234                                    " (neither an iallocator nor a target"
7235                                    " node can be passed)" %
7236                                    (instance.disk_template, text),
7237                                    errors.ECODE_INVAL)
7238
7239     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7240
7241     # check memory requirements on the secondary node
7242     if not self.failover or instance.admin_up:
7243       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7244                            instance.name, i_be[constants.BE_MEMORY],
7245                            instance.hypervisor)
7246     else:
7247       self.lu.LogInfo("Not checking memory on the secondary node as"
7248                       " instance will not be started")
7249
7250     # check bridge existance
7251     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7252
7253     if not self.cleanup:
7254       _CheckNodeNotDrained(self.lu, target_node)
7255       if not self.failover:
7256         result = self.rpc.call_instance_migratable(instance.primary_node,
7257                                                    instance)
7258         if result.fail_msg and self.fallback:
7259           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7260                           " failover")
7261           self.failover = True
7262         else:
7263           result.Raise("Can't migrate, please use failover",
7264                        prereq=True, ecode=errors.ECODE_STATE)
7265
7266     assert not (self.failover and self.cleanup)
7267
7268     if not self.failover:
7269       if self.lu.op.live is not None and self.lu.op.mode is not None:
7270         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7271                                    " parameters are accepted",
7272                                    errors.ECODE_INVAL)
7273       if self.lu.op.live is not None:
7274         if self.lu.op.live:
7275           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7276         else:
7277           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7278         # reset the 'live' parameter to None so that repeated
7279         # invocations of CheckPrereq do not raise an exception
7280         self.lu.op.live = None
7281       elif self.lu.op.mode is None:
7282         # read the default value from the hypervisor
7283         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7284                                                 skip_globals=False)
7285         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7286
7287       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7288     else:
7289       # Failover is never live
7290       self.live = False
7291
7292   def _RunAllocator(self):
7293     """Run the allocator based on input opcode.
7294
7295     """
7296     ial = IAllocator(self.cfg, self.rpc,
7297                      mode=constants.IALLOCATOR_MODE_RELOC,
7298                      name=self.instance_name,
7299                      # TODO See why hail breaks with a single node below
7300                      relocate_from=[self.instance.primary_node,
7301                                     self.instance.primary_node],
7302                      )
7303
7304     ial.Run(self.lu.op.iallocator)
7305
7306     if not ial.success:
7307       raise errors.OpPrereqError("Can't compute nodes using"
7308                                  " iallocator '%s': %s" %
7309                                  (self.lu.op.iallocator, ial.info),
7310                                  errors.ECODE_NORES)
7311     if len(ial.result) != ial.required_nodes:
7312       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7313                                  " of nodes (%s), required %s" %
7314                                  (self.lu.op.iallocator, len(ial.result),
7315                                   ial.required_nodes), errors.ECODE_FAULT)
7316     self.target_node = ial.result[0]
7317     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7318                  self.instance_name, self.lu.op.iallocator,
7319                  utils.CommaJoin(ial.result))
7320
7321   def _WaitUntilSync(self):
7322     """Poll with custom rpc for disk sync.
7323
7324     This uses our own step-based rpc call.
7325
7326     """
7327     self.feedback_fn("* wait until resync is done")
7328     all_done = False
7329     while not all_done:
7330       all_done = True
7331       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7332                                             self.nodes_ip,
7333                                             self.instance.disks)
7334       min_percent = 100
7335       for node, nres in result.items():
7336         nres.Raise("Cannot resync disks on node %s" % node)
7337         node_done, node_percent = nres.payload
7338         all_done = all_done and node_done
7339         if node_percent is not None:
7340           min_percent = min(min_percent, node_percent)
7341       if not all_done:
7342         if min_percent < 100:
7343           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7344         time.sleep(2)
7345
7346   def _EnsureSecondary(self, node):
7347     """Demote a node to secondary.
7348
7349     """
7350     self.feedback_fn("* switching node %s to secondary mode" % node)
7351
7352     for dev in self.instance.disks:
7353       self.cfg.SetDiskID(dev, node)
7354
7355     result = self.rpc.call_blockdev_close(node, self.instance.name,
7356                                           self.instance.disks)
7357     result.Raise("Cannot change disk to secondary on node %s" % node)
7358
7359   def _GoStandalone(self):
7360     """Disconnect from the network.
7361
7362     """
7363     self.feedback_fn("* changing into standalone mode")
7364     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7365                                                self.instance.disks)
7366     for node, nres in result.items():
7367       nres.Raise("Cannot disconnect disks node %s" % node)
7368
7369   def _GoReconnect(self, multimaster):
7370     """Reconnect to the network.
7371
7372     """
7373     if multimaster:
7374       msg = "dual-master"
7375     else:
7376       msg = "single-master"
7377     self.feedback_fn("* changing disks into %s mode" % msg)
7378     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7379                                            self.instance.disks,
7380                                            self.instance.name, multimaster)
7381     for node, nres in result.items():
7382       nres.Raise("Cannot change disks config on node %s" % node)
7383
7384   def _ExecCleanup(self):
7385     """Try to cleanup after a failed migration.
7386
7387     The cleanup is done by:
7388       - check that the instance is running only on one node
7389         (and update the config if needed)
7390       - change disks on its secondary node to secondary
7391       - wait until disks are fully synchronized
7392       - disconnect from the network
7393       - change disks into single-master mode
7394       - wait again until disks are fully synchronized
7395
7396     """
7397     instance = self.instance
7398     target_node = self.target_node
7399     source_node = self.source_node
7400
7401     # check running on only one node
7402     self.feedback_fn("* checking where the instance actually runs"
7403                      " (if this hangs, the hypervisor might be in"
7404                      " a bad state)")
7405     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7406     for node, result in ins_l.items():
7407       result.Raise("Can't contact node %s" % node)
7408
7409     runningon_source = instance.name in ins_l[source_node].payload
7410     runningon_target = instance.name in ins_l[target_node].payload
7411
7412     if runningon_source and runningon_target:
7413       raise errors.OpExecError("Instance seems to be running on two nodes,"
7414                                " or the hypervisor is confused; you will have"
7415                                " to ensure manually that it runs only on one"
7416                                " and restart this operation")
7417
7418     if not (runningon_source or runningon_target):
7419       raise errors.OpExecError("Instance does not seem to be running at all;"
7420                                " in this case it's safer to repair by"
7421                                " running 'gnt-instance stop' to ensure disk"
7422                                " shutdown, and then restarting it")
7423
7424     if runningon_target:
7425       # the migration has actually succeeded, we need to update the config
7426       self.feedback_fn("* instance running on secondary node (%s),"
7427                        " updating config" % target_node)
7428       instance.primary_node = target_node
7429       self.cfg.Update(instance, self.feedback_fn)
7430       demoted_node = source_node
7431     else:
7432       self.feedback_fn("* instance confirmed to be running on its"
7433                        " primary node (%s)" % source_node)
7434       demoted_node = target_node
7435
7436     if instance.disk_template in constants.DTS_INT_MIRROR:
7437       self._EnsureSecondary(demoted_node)
7438       try:
7439         self._WaitUntilSync()
7440       except errors.OpExecError:
7441         # we ignore here errors, since if the device is standalone, it
7442         # won't be able to sync
7443         pass
7444       self._GoStandalone()
7445       self._GoReconnect(False)
7446       self._WaitUntilSync()
7447
7448     self.feedback_fn("* done")
7449
7450   def _RevertDiskStatus(self):
7451     """Try to revert the disk status after a failed migration.
7452
7453     """
7454     target_node = self.target_node
7455     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7456       return
7457
7458     try:
7459       self._EnsureSecondary(target_node)
7460       self._GoStandalone()
7461       self._GoReconnect(False)
7462       self._WaitUntilSync()
7463     except errors.OpExecError, err:
7464       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7465                          " please try to recover the instance manually;"
7466                          " error '%s'" % str(err))
7467
7468   def _AbortMigration(self):
7469     """Call the hypervisor code to abort a started migration.
7470
7471     """
7472     instance = self.instance
7473     target_node = self.target_node
7474     source_node = self.source_node
7475     migration_info = self.migration_info
7476
7477     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7478                                                                  instance,
7479                                                                  migration_info,
7480                                                                  False)
7481     abort_msg = abort_result.fail_msg
7482     if abort_msg:
7483       logging.error("Aborting migration failed on target node %s: %s",
7484                     target_node, abort_msg)
7485       # Don't raise an exception here, as we stil have to try to revert the
7486       # disk status, even if this step failed.
7487
7488     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7489         instance, False, self.live)
7490     abort_msg = abort_result.fail_msg
7491     if abort_msg:
7492       logging.error("Aborting migration failed on source node %s: %s",
7493                     source_node, abort_msg)
7494
7495   def _ExecMigration(self):
7496     """Migrate an instance.
7497
7498     The migrate is done by:
7499       - change the disks into dual-master mode
7500       - wait until disks are fully synchronized again
7501       - migrate the instance
7502       - change disks on the new secondary node (the old primary) to secondary
7503       - wait until disks are fully synchronized
7504       - change disks into single-master mode
7505
7506     """
7507     instance = self.instance
7508     target_node = self.target_node
7509     source_node = self.source_node
7510
7511     # Check for hypervisor version mismatch and warn the user.
7512     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7513                                        None, self.instance.hypervisor)
7514     src_info = nodeinfo[source_node]
7515     dst_info = nodeinfo[target_node]
7516
7517     if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7518         (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7519       src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7520       dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7521       if src_version != dst_version:
7522         self.feedback_fn("* warning: hypervisor version mismatch between"
7523                          " source (%s) and target (%s) node" %
7524                          (src_version, dst_version))
7525
7526     self.feedback_fn("* checking disk consistency between source and target")
7527     for dev in instance.disks:
7528       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7529         raise errors.OpExecError("Disk %s is degraded or not fully"
7530                                  " synchronized on target node,"
7531                                  " aborting migration" % dev.iv_name)
7532
7533     # First get the migration information from the remote node
7534     result = self.rpc.call_migration_info(source_node, instance)
7535     msg = result.fail_msg
7536     if msg:
7537       log_err = ("Failed fetching source migration information from %s: %s" %
7538                  (source_node, msg))
7539       logging.error(log_err)
7540       raise errors.OpExecError(log_err)
7541
7542     self.migration_info = migration_info = result.payload
7543
7544     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7545       # Then switch the disks to master/master mode
7546       self._EnsureSecondary(target_node)
7547       self._GoStandalone()
7548       self._GoReconnect(True)
7549       self._WaitUntilSync()
7550
7551     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7552     result = self.rpc.call_accept_instance(target_node,
7553                                            instance,
7554                                            migration_info,
7555                                            self.nodes_ip[target_node])
7556
7557     msg = result.fail_msg
7558     if msg:
7559       logging.error("Instance pre-migration failed, trying to revert"
7560                     " disk status: %s", msg)
7561       self.feedback_fn("Pre-migration failed, aborting")
7562       self._AbortMigration()
7563       self._RevertDiskStatus()
7564       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7565                                (instance.name, msg))
7566
7567     self.feedback_fn("* migrating instance to %s" % target_node)
7568     result = self.rpc.call_instance_migrate(source_node, instance,
7569                                             self.nodes_ip[target_node],
7570                                             self.live)
7571     msg = result.fail_msg
7572     if msg:
7573       logging.error("Instance migration failed, trying to revert"
7574                     " disk status: %s", msg)
7575       self.feedback_fn("Migration failed, aborting")
7576       self._AbortMigration()
7577       self._RevertDiskStatus()
7578       raise errors.OpExecError("Could not migrate instance %s: %s" %
7579                                (instance.name, msg))
7580
7581     self.feedback_fn("* starting memory transfer")
7582     last_feedback = time.time()
7583     while True:
7584       result = self.rpc.call_instance_get_migration_status(source_node,
7585                                                            instance)
7586       msg = result.fail_msg
7587       ms = result.payload   # MigrationStatus instance
7588       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7589         logging.error("Instance migration failed, trying to revert"
7590                       " disk status: %s", msg)
7591         self.feedback_fn("Migration failed, aborting")
7592         self._AbortMigration()
7593         self._RevertDiskStatus()
7594         raise errors.OpExecError("Could not migrate instance %s: %s" %
7595                                  (instance.name, msg))
7596
7597       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7598         self.feedback_fn("* memory transfer complete")
7599         break
7600
7601       if (utils.TimeoutExpired(last_feedback,
7602                                self._MIGRATION_FEEDBACK_INTERVAL) and
7603           ms.transferred_ram is not None):
7604         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7605         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7606         last_feedback = time.time()
7607
7608       time.sleep(self._MIGRATION_POLL_INTERVAL)
7609
7610     result = self.rpc.call_instance_finalize_migration_src(source_node,
7611                                                            instance,
7612                                                            True,
7613                                                            self.live)
7614     msg = result.fail_msg
7615     if msg:
7616       logging.error("Instance migration succeeded, but finalization failed"
7617                     " on the source node: %s", msg)
7618       raise errors.OpExecError("Could not finalize instance migration: %s" %
7619                                msg)
7620
7621     instance.primary_node = target_node
7622
7623     # distribute new instance config to the other nodes
7624     self.cfg.Update(instance, self.feedback_fn)
7625
7626     result = self.rpc.call_instance_finalize_migration_dst(target_node,
7627                                                            instance,
7628                                                            migration_info,
7629                                                            True)
7630     msg = result.fail_msg
7631     if msg:
7632       logging.error("Instance migration succeeded, but finalization failed"
7633                     " on the target node: %s", msg)
7634       raise errors.OpExecError("Could not finalize instance migration: %s" %
7635                                msg)
7636
7637     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7638       self._EnsureSecondary(source_node)
7639       self._WaitUntilSync()
7640       self._GoStandalone()
7641       self._GoReconnect(False)
7642       self._WaitUntilSync()
7643
7644     self.feedback_fn("* done")
7645
7646   def _ExecFailover(self):
7647     """Failover an instance.
7648
7649     The failover is done by shutting it down on its present node and
7650     starting it on the secondary.
7651
7652     """
7653     instance = self.instance
7654     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7655
7656     source_node = instance.primary_node
7657     target_node = self.target_node
7658
7659     if instance.admin_up:
7660       self.feedback_fn("* checking disk consistency between source and target")
7661       for dev in instance.disks:
7662         # for drbd, these are drbd over lvm
7663         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7664           if primary_node.offline:
7665             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7666                              " target node %s" %
7667                              (primary_node.name, dev.iv_name, target_node))
7668           elif not self.ignore_consistency:
7669             raise errors.OpExecError("Disk %s is degraded on target node,"
7670                                      " aborting failover" % dev.iv_name)
7671     else:
7672       self.feedback_fn("* not checking disk consistency as instance is not"
7673                        " running")
7674
7675     self.feedback_fn("* shutting down instance on source node")
7676     logging.info("Shutting down instance %s on node %s",
7677                  instance.name, source_node)
7678
7679     result = self.rpc.call_instance_shutdown(source_node, instance,
7680                                              self.shutdown_timeout)
7681     msg = result.fail_msg
7682     if msg:
7683       if self.ignore_consistency or primary_node.offline:
7684         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7685                            " proceeding anyway; please make sure node"
7686                            " %s is down; error details: %s",
7687                            instance.name, source_node, source_node, msg)
7688       else:
7689         raise errors.OpExecError("Could not shutdown instance %s on"
7690                                  " node %s: %s" %
7691                                  (instance.name, source_node, msg))
7692
7693     self.feedback_fn("* deactivating the instance's disks on source node")
7694     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7695       raise errors.OpExecError("Can't shut down the instance's disks")
7696
7697     instance.primary_node = target_node
7698     # distribute new instance config to the other nodes
7699     self.cfg.Update(instance, self.feedback_fn)
7700
7701     # Only start the instance if it's marked as up
7702     if instance.admin_up:
7703       self.feedback_fn("* activating the instance's disks on target node %s" %
7704                        target_node)
7705       logging.info("Starting instance %s on node %s",
7706                    instance.name, target_node)
7707
7708       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7709                                            ignore_secondaries=True)
7710       if not disks_ok:
7711         _ShutdownInstanceDisks(self.lu, instance)
7712         raise errors.OpExecError("Can't activate the instance's disks")
7713
7714       self.feedback_fn("* starting the instance on the target node %s" %
7715                        target_node)
7716       result = self.rpc.call_instance_start(target_node, (instance, None, None),
7717                                             False)
7718       msg = result.fail_msg
7719       if msg:
7720         _ShutdownInstanceDisks(self.lu, instance)
7721         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7722                                  (instance.name, target_node, msg))
7723
7724   def Exec(self, feedback_fn):
7725     """Perform the migration.
7726
7727     """
7728     self.feedback_fn = feedback_fn
7729     self.source_node = self.instance.primary_node
7730
7731     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7732     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7733       self.target_node = self.instance.secondary_nodes[0]
7734       # Otherwise self.target_node has been populated either
7735       # directly, or through an iallocator.
7736
7737     self.all_nodes = [self.source_node, self.target_node]
7738     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7739                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7740
7741     if self.failover:
7742       feedback_fn("Failover instance %s" % self.instance.name)
7743       self._ExecFailover()
7744     else:
7745       feedback_fn("Migrating instance %s" % self.instance.name)
7746
7747       if self.cleanup:
7748         return self._ExecCleanup()
7749       else:
7750         return self._ExecMigration()
7751
7752
7753 def _CreateBlockDev(lu, node, instance, device, force_create,
7754                     info, force_open):
7755   """Create a tree of block devices on a given node.
7756
7757   If this device type has to be created on secondaries, create it and
7758   all its children.
7759
7760   If not, just recurse to children keeping the same 'force' value.
7761
7762   @param lu: the lu on whose behalf we execute
7763   @param node: the node on which to create the device
7764   @type instance: L{objects.Instance}
7765   @param instance: the instance which owns the device
7766   @type device: L{objects.Disk}
7767   @param device: the device to create
7768   @type force_create: boolean
7769   @param force_create: whether to force creation of this device; this
7770       will be change to True whenever we find a device which has
7771       CreateOnSecondary() attribute
7772   @param info: the extra 'metadata' we should attach to the device
7773       (this will be represented as a LVM tag)
7774   @type force_open: boolean
7775   @param force_open: this parameter will be passes to the
7776       L{backend.BlockdevCreate} function where it specifies
7777       whether we run on primary or not, and it affects both
7778       the child assembly and the device own Open() execution
7779
7780   """
7781   if device.CreateOnSecondary():
7782     force_create = True
7783
7784   if device.children:
7785     for child in device.children:
7786       _CreateBlockDev(lu, node, instance, child, force_create,
7787                       info, force_open)
7788
7789   if not force_create:
7790     return
7791
7792   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7793
7794
7795 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7796   """Create a single block device on a given node.
7797
7798   This will not recurse over children of the device, so they must be
7799   created in advance.
7800
7801   @param lu: the lu on whose behalf we execute
7802   @param node: the node on which to create the device
7803   @type instance: L{objects.Instance}
7804   @param instance: the instance which owns the device
7805   @type device: L{objects.Disk}
7806   @param device: the device to create
7807   @param info: the extra 'metadata' we should attach to the device
7808       (this will be represented as a LVM tag)
7809   @type force_open: boolean
7810   @param force_open: this parameter will be passes to the
7811       L{backend.BlockdevCreate} function where it specifies
7812       whether we run on primary or not, and it affects both
7813       the child assembly and the device own Open() execution
7814
7815   """
7816   lu.cfg.SetDiskID(device, node)
7817   result = lu.rpc.call_blockdev_create(node, device, device.size,
7818                                        instance.name, force_open, info)
7819   result.Raise("Can't create block device %s on"
7820                " node %s for instance %s" % (device, node, instance.name))
7821   if device.physical_id is None:
7822     device.physical_id = result.payload
7823
7824
7825 def _GenerateUniqueNames(lu, exts):
7826   """Generate a suitable LV name.
7827
7828   This will generate a logical volume name for the given instance.
7829
7830   """
7831   results = []
7832   for val in exts:
7833     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7834     results.append("%s%s" % (new_id, val))
7835   return results
7836
7837
7838 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7839                          iv_name, p_minor, s_minor):
7840   """Generate a drbd8 device complete with its children.
7841
7842   """
7843   assert len(vgnames) == len(names) == 2
7844   port = lu.cfg.AllocatePort()
7845   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7846   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7847                           logical_id=(vgnames[0], names[0]))
7848   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7849                           logical_id=(vgnames[1], names[1]))
7850   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7851                           logical_id=(primary, secondary, port,
7852                                       p_minor, s_minor,
7853                                       shared_secret),
7854                           children=[dev_data, dev_meta],
7855                           iv_name=iv_name)
7856   return drbd_dev
7857
7858
7859 def _GenerateDiskTemplate(lu, template_name,
7860                           instance_name, primary_node,
7861                           secondary_nodes, disk_info,
7862                           file_storage_dir, file_driver,
7863                           base_index, feedback_fn):
7864   """Generate the entire disk layout for a given template type.
7865
7866   """
7867   #TODO: compute space requirements
7868
7869   vgname = lu.cfg.GetVGName()
7870   disk_count = len(disk_info)
7871   disks = []
7872   if template_name == constants.DT_DISKLESS:
7873     pass
7874   elif template_name == constants.DT_PLAIN:
7875     if len(secondary_nodes) != 0:
7876       raise errors.ProgrammerError("Wrong template configuration")
7877
7878     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7879                                       for i in range(disk_count)])
7880     for idx, disk in enumerate(disk_info):
7881       disk_index = idx + base_index
7882       vg = disk.get(constants.IDISK_VG, vgname)
7883       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7884       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7885                               size=disk[constants.IDISK_SIZE],
7886                               logical_id=(vg, names[idx]),
7887                               iv_name="disk/%d" % disk_index,
7888                               mode=disk[constants.IDISK_MODE])
7889       disks.append(disk_dev)
7890   elif template_name == constants.DT_DRBD8:
7891     if len(secondary_nodes) != 1:
7892       raise errors.ProgrammerError("Wrong template configuration")
7893     remote_node = secondary_nodes[0]
7894     minors = lu.cfg.AllocateDRBDMinor(
7895       [primary_node, remote_node] * len(disk_info), instance_name)
7896
7897     names = []
7898     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7899                                                for i in range(disk_count)]):
7900       names.append(lv_prefix + "_data")
7901       names.append(lv_prefix + "_meta")
7902     for idx, disk in enumerate(disk_info):
7903       disk_index = idx + base_index
7904       data_vg = disk.get(constants.IDISK_VG, vgname)
7905       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7906       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7907                                       disk[constants.IDISK_SIZE],
7908                                       [data_vg, meta_vg],
7909                                       names[idx * 2:idx * 2 + 2],
7910                                       "disk/%d" % disk_index,
7911                                       minors[idx * 2], minors[idx * 2 + 1])
7912       disk_dev.mode = disk[constants.IDISK_MODE]
7913       disks.append(disk_dev)
7914   elif template_name == constants.DT_FILE:
7915     if len(secondary_nodes) != 0:
7916       raise errors.ProgrammerError("Wrong template configuration")
7917
7918     opcodes.RequireFileStorage()
7919
7920     for idx, disk in enumerate(disk_info):
7921       disk_index = idx + base_index
7922       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7923                               size=disk[constants.IDISK_SIZE],
7924                               iv_name="disk/%d" % disk_index,
7925                               logical_id=(file_driver,
7926                                           "%s/disk%d" % (file_storage_dir,
7927                                                          disk_index)),
7928                               mode=disk[constants.IDISK_MODE])
7929       disks.append(disk_dev)
7930   elif template_name == constants.DT_SHARED_FILE:
7931     if len(secondary_nodes) != 0:
7932       raise errors.ProgrammerError("Wrong template configuration")
7933
7934     opcodes.RequireSharedFileStorage()
7935
7936     for idx, disk in enumerate(disk_info):
7937       disk_index = idx + base_index
7938       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7939                               size=disk[constants.IDISK_SIZE],
7940                               iv_name="disk/%d" % disk_index,
7941                               logical_id=(file_driver,
7942                                           "%s/disk%d" % (file_storage_dir,
7943                                                          disk_index)),
7944                               mode=disk[constants.IDISK_MODE])
7945       disks.append(disk_dev)
7946   elif template_name == constants.DT_BLOCK:
7947     if len(secondary_nodes) != 0:
7948       raise errors.ProgrammerError("Wrong template configuration")
7949
7950     for idx, disk in enumerate(disk_info):
7951       disk_index = idx + base_index
7952       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7953                               size=disk[constants.IDISK_SIZE],
7954                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7955                                           disk[constants.IDISK_ADOPT]),
7956                               iv_name="disk/%d" % disk_index,
7957                               mode=disk[constants.IDISK_MODE])
7958       disks.append(disk_dev)
7959
7960   else:
7961     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7962   return disks
7963
7964
7965 def _GetInstanceInfoText(instance):
7966   """Compute that text that should be added to the disk's metadata.
7967
7968   """
7969   return "originstname+%s" % instance.name
7970
7971
7972 def _CalcEta(time_taken, written, total_size):
7973   """Calculates the ETA based on size written and total size.
7974
7975   @param time_taken: The time taken so far
7976   @param written: amount written so far
7977   @param total_size: The total size of data to be written
7978   @return: The remaining time in seconds
7979
7980   """
7981   avg_time = time_taken / float(written)
7982   return (total_size - written) * avg_time
7983
7984
7985 def _WipeDisks(lu, instance):
7986   """Wipes instance disks.
7987
7988   @type lu: L{LogicalUnit}
7989   @param lu: the logical unit on whose behalf we execute
7990   @type instance: L{objects.Instance}
7991   @param instance: the instance whose disks we should create
7992   @return: the success of the wipe
7993
7994   """
7995   node = instance.primary_node
7996
7997   for device in instance.disks:
7998     lu.cfg.SetDiskID(device, node)
7999
8000   logging.info("Pause sync of instance %s disks", instance.name)
8001   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8002
8003   for idx, success in enumerate(result.payload):
8004     if not success:
8005       logging.warn("pause-sync of instance %s for disks %d failed",
8006                    instance.name, idx)
8007
8008   try:
8009     for idx, device in enumerate(instance.disks):
8010       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8011       # MAX_WIPE_CHUNK at max
8012       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8013                             constants.MIN_WIPE_CHUNK_PERCENT)
8014       # we _must_ make this an int, otherwise rounding errors will
8015       # occur
8016       wipe_chunk_size = int(wipe_chunk_size)
8017
8018       lu.LogInfo("* Wiping disk %d", idx)
8019       logging.info("Wiping disk %d for instance %s, node %s using"
8020                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8021
8022       offset = 0
8023       size = device.size
8024       last_output = 0
8025       start_time = time.time()
8026
8027       while offset < size:
8028         wipe_size = min(wipe_chunk_size, size - offset)
8029         logging.debug("Wiping disk %d, offset %s, chunk %s",
8030                       idx, offset, wipe_size)
8031         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8032         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8033                      (idx, offset, wipe_size))
8034         now = time.time()
8035         offset += wipe_size
8036         if now - last_output >= 60:
8037           eta = _CalcEta(now - start_time, offset, size)
8038           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8039                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8040           last_output = now
8041   finally:
8042     logging.info("Resume sync of instance %s disks", instance.name)
8043
8044     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8045
8046     for idx, success in enumerate(result.payload):
8047       if not success:
8048         lu.LogWarning("Resume sync of disk %d failed, please have a"
8049                       " look at the status and troubleshoot the issue", idx)
8050         logging.warn("resume-sync of instance %s for disks %d failed",
8051                      instance.name, idx)
8052
8053
8054 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8055   """Create all disks for an instance.
8056
8057   This abstracts away some work from AddInstance.
8058
8059   @type lu: L{LogicalUnit}
8060   @param lu: the logical unit on whose behalf we execute
8061   @type instance: L{objects.Instance}
8062   @param instance: the instance whose disks we should create
8063   @type to_skip: list
8064   @param to_skip: list of indices to skip
8065   @type target_node: string
8066   @param target_node: if passed, overrides the target node for creation
8067   @rtype: boolean
8068   @return: the success of the creation
8069
8070   """
8071   info = _GetInstanceInfoText(instance)
8072   if target_node is None:
8073     pnode = instance.primary_node
8074     all_nodes = instance.all_nodes
8075   else:
8076     pnode = target_node
8077     all_nodes = [pnode]
8078
8079   if instance.disk_template in constants.DTS_FILEBASED:
8080     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8081     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8082
8083     result.Raise("Failed to create directory '%s' on"
8084                  " node %s" % (file_storage_dir, pnode))
8085
8086   # Note: this needs to be kept in sync with adding of disks in
8087   # LUInstanceSetParams
8088   for idx, device in enumerate(instance.disks):
8089     if to_skip and idx in to_skip:
8090       continue
8091     logging.info("Creating volume %s for instance %s",
8092                  device.iv_name, instance.name)
8093     #HARDCODE
8094     for node in all_nodes:
8095       f_create = node == pnode
8096       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8097
8098
8099 def _RemoveDisks(lu, instance, target_node=None):
8100   """Remove all disks for an instance.
8101
8102   This abstracts away some work from `AddInstance()` and
8103   `RemoveInstance()`. Note that in case some of the devices couldn't
8104   be removed, the removal will continue with the other ones (compare
8105   with `_CreateDisks()`).
8106
8107   @type lu: L{LogicalUnit}
8108   @param lu: the logical unit on whose behalf we execute
8109   @type instance: L{objects.Instance}
8110   @param instance: the instance whose disks we should remove
8111   @type target_node: string
8112   @param target_node: used to override the node on which to remove the disks
8113   @rtype: boolean
8114   @return: the success of the removal
8115
8116   """
8117   logging.info("Removing block devices for instance %s", instance.name)
8118
8119   all_result = True
8120   for device in instance.disks:
8121     if target_node:
8122       edata = [(target_node, device)]
8123     else:
8124       edata = device.ComputeNodeTree(instance.primary_node)
8125     for node, disk in edata:
8126       lu.cfg.SetDiskID(disk, node)
8127       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8128       if msg:
8129         lu.LogWarning("Could not remove block device %s on node %s,"
8130                       " continuing anyway: %s", device.iv_name, node, msg)
8131         all_result = False
8132
8133   if instance.disk_template == constants.DT_FILE:
8134     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8135     if target_node:
8136       tgt = target_node
8137     else:
8138       tgt = instance.primary_node
8139     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8140     if result.fail_msg:
8141       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8142                     file_storage_dir, instance.primary_node, result.fail_msg)
8143       all_result = False
8144
8145   return all_result
8146
8147
8148 def _ComputeDiskSizePerVG(disk_template, disks):
8149   """Compute disk size requirements in the volume group
8150
8151   """
8152   def _compute(disks, payload):
8153     """Universal algorithm.
8154
8155     """
8156     vgs = {}
8157     for disk in disks:
8158       vgs[disk[constants.IDISK_VG]] = \
8159         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8160
8161     return vgs
8162
8163   # Required free disk space as a function of disk and swap space
8164   req_size_dict = {
8165     constants.DT_DISKLESS: {},
8166     constants.DT_PLAIN: _compute(disks, 0),
8167     # 128 MB are added for drbd metadata for each disk
8168     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8169     constants.DT_FILE: {},
8170     constants.DT_SHARED_FILE: {},
8171   }
8172
8173   if disk_template not in req_size_dict:
8174     raise errors.ProgrammerError("Disk template '%s' size requirement"
8175                                  " is unknown" % disk_template)
8176
8177   return req_size_dict[disk_template]
8178
8179
8180 def _ComputeDiskSize(disk_template, disks):
8181   """Compute disk size requirements in the volume group
8182
8183   """
8184   # Required free disk space as a function of disk and swap space
8185   req_size_dict = {
8186     constants.DT_DISKLESS: None,
8187     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8188     # 128 MB are added for drbd metadata for each disk
8189     constants.DT_DRBD8:
8190       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8191     constants.DT_FILE: None,
8192     constants.DT_SHARED_FILE: 0,
8193     constants.DT_BLOCK: 0,
8194   }
8195
8196   if disk_template not in req_size_dict:
8197     raise errors.ProgrammerError("Disk template '%s' size requirement"
8198                                  " is unknown" % disk_template)
8199
8200   return req_size_dict[disk_template]
8201
8202
8203 def _FilterVmNodes(lu, nodenames):
8204   """Filters out non-vm_capable nodes from a list.
8205
8206   @type lu: L{LogicalUnit}
8207   @param lu: the logical unit for which we check
8208   @type nodenames: list
8209   @param nodenames: the list of nodes on which we should check
8210   @rtype: list
8211   @return: the list of vm-capable nodes
8212
8213   """
8214   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8215   return [name for name in nodenames if name not in vm_nodes]
8216
8217
8218 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8219   """Hypervisor parameter validation.
8220
8221   This function abstract the hypervisor parameter validation to be
8222   used in both instance create and instance modify.
8223
8224   @type lu: L{LogicalUnit}
8225   @param lu: the logical unit for which we check
8226   @type nodenames: list
8227   @param nodenames: the list of nodes on which we should check
8228   @type hvname: string
8229   @param hvname: the name of the hypervisor we should use
8230   @type hvparams: dict
8231   @param hvparams: the parameters which we need to check
8232   @raise errors.OpPrereqError: if the parameters are not valid
8233
8234   """
8235   nodenames = _FilterVmNodes(lu, nodenames)
8236
8237   cluster = lu.cfg.GetClusterInfo()
8238   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8239
8240   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8241   for node in nodenames:
8242     info = hvinfo[node]
8243     if info.offline:
8244       continue
8245     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8246
8247
8248 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8249   """OS parameters validation.
8250
8251   @type lu: L{LogicalUnit}
8252   @param lu: the logical unit for which we check
8253   @type required: boolean
8254   @param required: whether the validation should fail if the OS is not
8255       found
8256   @type nodenames: list
8257   @param nodenames: the list of nodes on which we should check
8258   @type osname: string
8259   @param osname: the name of the hypervisor we should use
8260   @type osparams: dict
8261   @param osparams: the parameters which we need to check
8262   @raise errors.OpPrereqError: if the parameters are not valid
8263
8264   """
8265   nodenames = _FilterVmNodes(lu, nodenames)
8266   result = lu.rpc.call_os_validate(nodenames, required, osname,
8267                                    [constants.OS_VALIDATE_PARAMETERS],
8268                                    osparams)
8269   for node, nres in result.items():
8270     # we don't check for offline cases since this should be run only
8271     # against the master node and/or an instance's nodes
8272     nres.Raise("OS Parameters validation failed on node %s" % node)
8273     if not nres.payload:
8274       lu.LogInfo("OS %s not found on node %s, validation skipped",
8275                  osname, node)
8276
8277
8278 class LUInstanceCreate(LogicalUnit):
8279   """Create an instance.
8280
8281   """
8282   HPATH = "instance-add"
8283   HTYPE = constants.HTYPE_INSTANCE
8284   REQ_BGL = False
8285
8286   def CheckArguments(self):
8287     """Check arguments.
8288
8289     """
8290     # do not require name_check to ease forward/backward compatibility
8291     # for tools
8292     if self.op.no_install and self.op.start:
8293       self.LogInfo("No-installation mode selected, disabling startup")
8294       self.op.start = False
8295     # validate/normalize the instance name
8296     self.op.instance_name = \
8297       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8298
8299     if self.op.ip_check and not self.op.name_check:
8300       # TODO: make the ip check more flexible and not depend on the name check
8301       raise errors.OpPrereqError("Cannot do IP address check without a name"
8302                                  " check", errors.ECODE_INVAL)
8303
8304     # check nics' parameter names
8305     for nic in self.op.nics:
8306       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8307
8308     # check disks. parameter names and consistent adopt/no-adopt strategy
8309     has_adopt = has_no_adopt = False
8310     for disk in self.op.disks:
8311       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8312       if constants.IDISK_ADOPT in disk:
8313         has_adopt = True
8314       else:
8315         has_no_adopt = True
8316     if has_adopt and has_no_adopt:
8317       raise errors.OpPrereqError("Either all disks are adopted or none is",
8318                                  errors.ECODE_INVAL)
8319     if has_adopt:
8320       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8321         raise errors.OpPrereqError("Disk adoption is not supported for the"
8322                                    " '%s' disk template" %
8323                                    self.op.disk_template,
8324                                    errors.ECODE_INVAL)
8325       if self.op.iallocator is not None:
8326         raise errors.OpPrereqError("Disk adoption not allowed with an"
8327                                    " iallocator script", errors.ECODE_INVAL)
8328       if self.op.mode == constants.INSTANCE_IMPORT:
8329         raise errors.OpPrereqError("Disk adoption not allowed for"
8330                                    " instance import", errors.ECODE_INVAL)
8331     else:
8332       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8333         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8334                                    " but no 'adopt' parameter given" %
8335                                    self.op.disk_template,
8336                                    errors.ECODE_INVAL)
8337
8338     self.adopt_disks = has_adopt
8339
8340     # instance name verification
8341     if self.op.name_check:
8342       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8343       self.op.instance_name = self.hostname1.name
8344       # used in CheckPrereq for ip ping check
8345       self.check_ip = self.hostname1.ip
8346     else:
8347       self.check_ip = None
8348
8349     # file storage checks
8350     if (self.op.file_driver and
8351         not self.op.file_driver in constants.FILE_DRIVER):
8352       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8353                                  self.op.file_driver, errors.ECODE_INVAL)
8354
8355     if self.op.disk_template == constants.DT_FILE:
8356       opcodes.RequireFileStorage()
8357     elif self.op.disk_template == constants.DT_SHARED_FILE:
8358       opcodes.RequireSharedFileStorage()
8359
8360     ### Node/iallocator related checks
8361     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8362
8363     if self.op.pnode is not None:
8364       if self.op.disk_template in constants.DTS_INT_MIRROR:
8365         if self.op.snode is None:
8366           raise errors.OpPrereqError("The networked disk templates need"
8367                                      " a mirror node", errors.ECODE_INVAL)
8368       elif self.op.snode:
8369         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8370                         " template")
8371         self.op.snode = None
8372
8373     self._cds = _GetClusterDomainSecret()
8374
8375     if self.op.mode == constants.INSTANCE_IMPORT:
8376       # On import force_variant must be True, because if we forced it at
8377       # initial install, our only chance when importing it back is that it
8378       # works again!
8379       self.op.force_variant = True
8380
8381       if self.op.no_install:
8382         self.LogInfo("No-installation mode has no effect during import")
8383
8384     elif self.op.mode == constants.INSTANCE_CREATE:
8385       if self.op.os_type is None:
8386         raise errors.OpPrereqError("No guest OS specified",
8387                                    errors.ECODE_INVAL)
8388       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8389         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8390                                    " installation" % self.op.os_type,
8391                                    errors.ECODE_STATE)
8392       if self.op.disk_template is None:
8393         raise errors.OpPrereqError("No disk template specified",
8394                                    errors.ECODE_INVAL)
8395
8396     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8397       # Check handshake to ensure both clusters have the same domain secret
8398       src_handshake = self.op.source_handshake
8399       if not src_handshake:
8400         raise errors.OpPrereqError("Missing source handshake",
8401                                    errors.ECODE_INVAL)
8402
8403       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8404                                                            src_handshake)
8405       if errmsg:
8406         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8407                                    errors.ECODE_INVAL)
8408
8409       # Load and check source CA
8410       self.source_x509_ca_pem = self.op.source_x509_ca
8411       if not self.source_x509_ca_pem:
8412         raise errors.OpPrereqError("Missing source X509 CA",
8413                                    errors.ECODE_INVAL)
8414
8415       try:
8416         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8417                                                     self._cds)
8418       except OpenSSL.crypto.Error, err:
8419         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8420                                    (err, ), errors.ECODE_INVAL)
8421
8422       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8423       if errcode is not None:
8424         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8425                                    errors.ECODE_INVAL)
8426
8427       self.source_x509_ca = cert
8428
8429       src_instance_name = self.op.source_instance_name
8430       if not src_instance_name:
8431         raise errors.OpPrereqError("Missing source instance name",
8432                                    errors.ECODE_INVAL)
8433
8434       self.source_instance_name = \
8435           netutils.GetHostname(name=src_instance_name).name
8436
8437     else:
8438       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8439                                  self.op.mode, errors.ECODE_INVAL)
8440
8441   def ExpandNames(self):
8442     """ExpandNames for CreateInstance.
8443
8444     Figure out the right locks for instance creation.
8445
8446     """
8447     self.needed_locks = {}
8448
8449     instance_name = self.op.instance_name
8450     # this is just a preventive check, but someone might still add this
8451     # instance in the meantime, and creation will fail at lock-add time
8452     if instance_name in self.cfg.GetInstanceList():
8453       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8454                                  instance_name, errors.ECODE_EXISTS)
8455
8456     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8457
8458     if self.op.iallocator:
8459       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8460     else:
8461       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8462       nodelist = [self.op.pnode]
8463       if self.op.snode is not None:
8464         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8465         nodelist.append(self.op.snode)
8466       self.needed_locks[locking.LEVEL_NODE] = nodelist
8467
8468     # in case of import lock the source node too
8469     if self.op.mode == constants.INSTANCE_IMPORT:
8470       src_node = self.op.src_node
8471       src_path = self.op.src_path
8472
8473       if src_path is None:
8474         self.op.src_path = src_path = self.op.instance_name
8475
8476       if src_node is None:
8477         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8478         self.op.src_node = None
8479         if os.path.isabs(src_path):
8480           raise errors.OpPrereqError("Importing an instance from a path"
8481                                      " requires a source node option",
8482                                      errors.ECODE_INVAL)
8483       else:
8484         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8485         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8486           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8487         if not os.path.isabs(src_path):
8488           self.op.src_path = src_path = \
8489             utils.PathJoin(constants.EXPORT_DIR, src_path)
8490
8491   def _RunAllocator(self):
8492     """Run the allocator based on input opcode.
8493
8494     """
8495     nics = [n.ToDict() for n in self.nics]
8496     ial = IAllocator(self.cfg, self.rpc,
8497                      mode=constants.IALLOCATOR_MODE_ALLOC,
8498                      name=self.op.instance_name,
8499                      disk_template=self.op.disk_template,
8500                      tags=self.op.tags,
8501                      os=self.op.os_type,
8502                      vcpus=self.be_full[constants.BE_VCPUS],
8503                      memory=self.be_full[constants.BE_MEMORY],
8504                      disks=self.disks,
8505                      nics=nics,
8506                      hypervisor=self.op.hypervisor,
8507                      )
8508
8509     ial.Run(self.op.iallocator)
8510
8511     if not ial.success:
8512       raise errors.OpPrereqError("Can't compute nodes using"
8513                                  " iallocator '%s': %s" %
8514                                  (self.op.iallocator, ial.info),
8515                                  errors.ECODE_NORES)
8516     if len(ial.result) != ial.required_nodes:
8517       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8518                                  " of nodes (%s), required %s" %
8519                                  (self.op.iallocator, len(ial.result),
8520                                   ial.required_nodes), errors.ECODE_FAULT)
8521     self.op.pnode = ial.result[0]
8522     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8523                  self.op.instance_name, self.op.iallocator,
8524                  utils.CommaJoin(ial.result))
8525     if ial.required_nodes == 2:
8526       self.op.snode = ial.result[1]
8527
8528   def BuildHooksEnv(self):
8529     """Build hooks env.
8530
8531     This runs on master, primary and secondary nodes of the instance.
8532
8533     """
8534     env = {
8535       "ADD_MODE": self.op.mode,
8536       }
8537     if self.op.mode == constants.INSTANCE_IMPORT:
8538       env["SRC_NODE"] = self.op.src_node
8539       env["SRC_PATH"] = self.op.src_path
8540       env["SRC_IMAGES"] = self.src_images
8541
8542     env.update(_BuildInstanceHookEnv(
8543       name=self.op.instance_name,
8544       primary_node=self.op.pnode,
8545       secondary_nodes=self.secondaries,
8546       status=self.op.start,
8547       os_type=self.op.os_type,
8548       memory=self.be_full[constants.BE_MEMORY],
8549       vcpus=self.be_full[constants.BE_VCPUS],
8550       nics=_NICListToTuple(self, self.nics),
8551       disk_template=self.op.disk_template,
8552       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8553              for d in self.disks],
8554       bep=self.be_full,
8555       hvp=self.hv_full,
8556       hypervisor_name=self.op.hypervisor,
8557       tags=self.op.tags,
8558     ))
8559
8560     return env
8561
8562   def BuildHooksNodes(self):
8563     """Build hooks nodes.
8564
8565     """
8566     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8567     return nl, nl
8568
8569   def _ReadExportInfo(self):
8570     """Reads the export information from disk.
8571
8572     It will override the opcode source node and path with the actual
8573     information, if these two were not specified before.
8574
8575     @return: the export information
8576
8577     """
8578     assert self.op.mode == constants.INSTANCE_IMPORT
8579
8580     src_node = self.op.src_node
8581     src_path = self.op.src_path
8582
8583     if src_node is None:
8584       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8585       exp_list = self.rpc.call_export_list(locked_nodes)
8586       found = False
8587       for node in exp_list:
8588         if exp_list[node].fail_msg:
8589           continue
8590         if src_path in exp_list[node].payload:
8591           found = True
8592           self.op.src_node = src_node = node
8593           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8594                                                        src_path)
8595           break
8596       if not found:
8597         raise errors.OpPrereqError("No export found for relative path %s" %
8598                                     src_path, errors.ECODE_INVAL)
8599
8600     _CheckNodeOnline(self, src_node)
8601     result = self.rpc.call_export_info(src_node, src_path)
8602     result.Raise("No export or invalid export found in dir %s" % src_path)
8603
8604     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8605     if not export_info.has_section(constants.INISECT_EXP):
8606       raise errors.ProgrammerError("Corrupted export config",
8607                                    errors.ECODE_ENVIRON)
8608
8609     ei_version = export_info.get(constants.INISECT_EXP, "version")
8610     if (int(ei_version) != constants.EXPORT_VERSION):
8611       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8612                                  (ei_version, constants.EXPORT_VERSION),
8613                                  errors.ECODE_ENVIRON)
8614     return export_info
8615
8616   def _ReadExportParams(self, einfo):
8617     """Use export parameters as defaults.
8618
8619     In case the opcode doesn't specify (as in override) some instance
8620     parameters, then try to use them from the export information, if
8621     that declares them.
8622
8623     """
8624     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8625
8626     if self.op.disk_template is None:
8627       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8628         self.op.disk_template = einfo.get(constants.INISECT_INS,
8629                                           "disk_template")
8630         if self.op.disk_template not in constants.DISK_TEMPLATES:
8631           raise errors.OpPrereqError("Disk template specified in configuration"
8632                                      " file is not one of the allowed values:"
8633                                      " %s" % " ".join(constants.DISK_TEMPLATES))
8634       else:
8635         raise errors.OpPrereqError("No disk template specified and the export"
8636                                    " is missing the disk_template information",
8637                                    errors.ECODE_INVAL)
8638
8639     if not self.op.disks:
8640       disks = []
8641       # TODO: import the disk iv_name too
8642       for idx in range(constants.MAX_DISKS):
8643         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8644           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8645           disks.append({constants.IDISK_SIZE: disk_sz})
8646       self.op.disks = disks
8647       if not disks and self.op.disk_template != constants.DT_DISKLESS:
8648         raise errors.OpPrereqError("No disk info specified and the export"
8649                                    " is missing the disk information",
8650                                    errors.ECODE_INVAL)
8651
8652     if not self.op.nics:
8653       nics = []
8654       for idx in range(constants.MAX_NICS):
8655         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8656           ndict = {}
8657           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8658             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8659             ndict[name] = v
8660           nics.append(ndict)
8661         else:
8662           break
8663       self.op.nics = nics
8664
8665     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8666       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8667
8668     if (self.op.hypervisor is None and
8669         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8670       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8671
8672     if einfo.has_section(constants.INISECT_HYP):
8673       # use the export parameters but do not override the ones
8674       # specified by the user
8675       for name, value in einfo.items(constants.INISECT_HYP):
8676         if name not in self.op.hvparams:
8677           self.op.hvparams[name] = value
8678
8679     if einfo.has_section(constants.INISECT_BEP):
8680       # use the parameters, without overriding
8681       for name, value in einfo.items(constants.INISECT_BEP):
8682         if name not in self.op.beparams:
8683           self.op.beparams[name] = value
8684     else:
8685       # try to read the parameters old style, from the main section
8686       for name in constants.BES_PARAMETERS:
8687         if (name not in self.op.beparams and
8688             einfo.has_option(constants.INISECT_INS, name)):
8689           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8690
8691     if einfo.has_section(constants.INISECT_OSP):
8692       # use the parameters, without overriding
8693       for name, value in einfo.items(constants.INISECT_OSP):
8694         if name not in self.op.osparams:
8695           self.op.osparams[name] = value
8696
8697   def _RevertToDefaults(self, cluster):
8698     """Revert the instance parameters to the default values.
8699
8700     """
8701     # hvparams
8702     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8703     for name in self.op.hvparams.keys():
8704       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8705         del self.op.hvparams[name]
8706     # beparams
8707     be_defs = cluster.SimpleFillBE({})
8708     for name in self.op.beparams.keys():
8709       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8710         del self.op.beparams[name]
8711     # nic params
8712     nic_defs = cluster.SimpleFillNIC({})
8713     for nic in self.op.nics:
8714       for name in constants.NICS_PARAMETERS:
8715         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8716           del nic[name]
8717     # osparams
8718     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8719     for name in self.op.osparams.keys():
8720       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8721         del self.op.osparams[name]
8722
8723   def _CalculateFileStorageDir(self):
8724     """Calculate final instance file storage dir.
8725
8726     """
8727     # file storage dir calculation/check
8728     self.instance_file_storage_dir = None
8729     if self.op.disk_template in constants.DTS_FILEBASED:
8730       # build the full file storage dir path
8731       joinargs = []
8732
8733       if self.op.disk_template == constants.DT_SHARED_FILE:
8734         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8735       else:
8736         get_fsd_fn = self.cfg.GetFileStorageDir
8737
8738       cfg_storagedir = get_fsd_fn()
8739       if not cfg_storagedir:
8740         raise errors.OpPrereqError("Cluster file storage dir not defined")
8741       joinargs.append(cfg_storagedir)
8742
8743       if self.op.file_storage_dir is not None:
8744         joinargs.append(self.op.file_storage_dir)
8745
8746       joinargs.append(self.op.instance_name)
8747
8748       # pylint: disable=W0142
8749       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8750
8751   def CheckPrereq(self):
8752     """Check prerequisites.
8753
8754     """
8755     self._CalculateFileStorageDir()
8756
8757     if self.op.mode == constants.INSTANCE_IMPORT:
8758       export_info = self._ReadExportInfo()
8759       self._ReadExportParams(export_info)
8760
8761     if (not self.cfg.GetVGName() and
8762         self.op.disk_template not in constants.DTS_NOT_LVM):
8763       raise errors.OpPrereqError("Cluster does not support lvm-based"
8764                                  " instances", errors.ECODE_STATE)
8765
8766     if (self.op.hypervisor is None or
8767         self.op.hypervisor == constants.VALUE_AUTO):
8768       self.op.hypervisor = self.cfg.GetHypervisorType()
8769
8770     cluster = self.cfg.GetClusterInfo()
8771     enabled_hvs = cluster.enabled_hypervisors
8772     if self.op.hypervisor not in enabled_hvs:
8773       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8774                                  " cluster (%s)" % (self.op.hypervisor,
8775                                   ",".join(enabled_hvs)),
8776                                  errors.ECODE_STATE)
8777
8778     # Check tag validity
8779     for tag in self.op.tags:
8780       objects.TaggableObject.ValidateTag(tag)
8781
8782     # check hypervisor parameter syntax (locally)
8783     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8784     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8785                                       self.op.hvparams)
8786     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8787     hv_type.CheckParameterSyntax(filled_hvp)
8788     self.hv_full = filled_hvp
8789     # check that we don't specify global parameters on an instance
8790     _CheckGlobalHvParams(self.op.hvparams)
8791
8792     # fill and remember the beparams dict
8793     default_beparams = cluster.beparams[constants.PP_DEFAULT]
8794     for param, value in self.op.beparams.iteritems():
8795       if value == constants.VALUE_AUTO:
8796         self.op.beparams[param] = default_beparams[param]
8797     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8798     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8799
8800     # build os parameters
8801     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8802
8803     # now that hvp/bep are in final format, let's reset to defaults,
8804     # if told to do so
8805     if self.op.identify_defaults:
8806       self._RevertToDefaults(cluster)
8807
8808     # NIC buildup
8809     self.nics = []
8810     for idx, nic in enumerate(self.op.nics):
8811       nic_mode_req = nic.get(constants.INIC_MODE, None)
8812       nic_mode = nic_mode_req
8813       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8814         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8815
8816       # in routed mode, for the first nic, the default ip is 'auto'
8817       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8818         default_ip_mode = constants.VALUE_AUTO
8819       else:
8820         default_ip_mode = constants.VALUE_NONE
8821
8822       # ip validity checks
8823       ip = nic.get(constants.INIC_IP, default_ip_mode)
8824       if ip is None or ip.lower() == constants.VALUE_NONE:
8825         nic_ip = None
8826       elif ip.lower() == constants.VALUE_AUTO:
8827         if not self.op.name_check:
8828           raise errors.OpPrereqError("IP address set to auto but name checks"
8829                                      " have been skipped",
8830                                      errors.ECODE_INVAL)
8831         nic_ip = self.hostname1.ip
8832       else:
8833         if not netutils.IPAddress.IsValid(ip):
8834           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8835                                      errors.ECODE_INVAL)
8836         nic_ip = ip
8837
8838       # TODO: check the ip address for uniqueness
8839       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8840         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8841                                    errors.ECODE_INVAL)
8842
8843       # MAC address verification
8844       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8845       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8846         mac = utils.NormalizeAndValidateMac(mac)
8847
8848         try:
8849           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8850         except errors.ReservationError:
8851           raise errors.OpPrereqError("MAC address %s already in use"
8852                                      " in cluster" % mac,
8853                                      errors.ECODE_NOTUNIQUE)
8854
8855       #  Build nic parameters
8856       link = nic.get(constants.INIC_LINK, None)
8857       if link == constants.VALUE_AUTO:
8858         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8859       nicparams = {}
8860       if nic_mode_req:
8861         nicparams[constants.NIC_MODE] = nic_mode
8862       if link:
8863         nicparams[constants.NIC_LINK] = link
8864
8865       check_params = cluster.SimpleFillNIC(nicparams)
8866       objects.NIC.CheckParameterSyntax(check_params)
8867       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8868
8869     # disk checks/pre-build
8870     default_vg = self.cfg.GetVGName()
8871     self.disks = []
8872     for disk in self.op.disks:
8873       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8874       if mode not in constants.DISK_ACCESS_SET:
8875         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8876                                    mode, errors.ECODE_INVAL)
8877       size = disk.get(constants.IDISK_SIZE, None)
8878       if size is None:
8879         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8880       try:
8881         size = int(size)
8882       except (TypeError, ValueError):
8883         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8884                                    errors.ECODE_INVAL)
8885
8886       data_vg = disk.get(constants.IDISK_VG, default_vg)
8887       new_disk = {
8888         constants.IDISK_SIZE: size,
8889         constants.IDISK_MODE: mode,
8890         constants.IDISK_VG: data_vg,
8891         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8892         }
8893       if constants.IDISK_ADOPT in disk:
8894         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8895       self.disks.append(new_disk)
8896
8897     if self.op.mode == constants.INSTANCE_IMPORT:
8898       disk_images = []
8899       for idx in range(len(self.disks)):
8900         option = "disk%d_dump" % idx
8901         if export_info.has_option(constants.INISECT_INS, option):
8902           # FIXME: are the old os-es, disk sizes, etc. useful?
8903           export_name = export_info.get(constants.INISECT_INS, option)
8904           image = utils.PathJoin(self.op.src_path, export_name)
8905           disk_images.append(image)
8906         else:
8907           disk_images.append(False)
8908
8909       self.src_images = disk_images
8910
8911       old_name = export_info.get(constants.INISECT_INS, "name")
8912       if self.op.instance_name == old_name:
8913         for idx, nic in enumerate(self.nics):
8914           if nic.mac == constants.VALUE_AUTO:
8915             nic_mac_ini = "nic%d_mac" % idx
8916             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8917
8918     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8919
8920     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8921     if self.op.ip_check:
8922       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8923         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8924                                    (self.check_ip, self.op.instance_name),
8925                                    errors.ECODE_NOTUNIQUE)
8926
8927     #### mac address generation
8928     # By generating here the mac address both the allocator and the hooks get
8929     # the real final mac address rather than the 'auto' or 'generate' value.
8930     # There is a race condition between the generation and the instance object
8931     # creation, which means that we know the mac is valid now, but we're not
8932     # sure it will be when we actually add the instance. If things go bad
8933     # adding the instance will abort because of a duplicate mac, and the
8934     # creation job will fail.
8935     for nic in self.nics:
8936       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8937         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8938
8939     #### allocator run
8940
8941     if self.op.iallocator is not None:
8942       self._RunAllocator()
8943
8944     #### node related checks
8945
8946     # check primary node
8947     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8948     assert self.pnode is not None, \
8949       "Cannot retrieve locked node %s" % self.op.pnode
8950     if pnode.offline:
8951       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8952                                  pnode.name, errors.ECODE_STATE)
8953     if pnode.drained:
8954       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8955                                  pnode.name, errors.ECODE_STATE)
8956     if not pnode.vm_capable:
8957       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8958                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8959
8960     self.secondaries = []
8961
8962     # mirror node verification
8963     if self.op.disk_template in constants.DTS_INT_MIRROR:
8964       if self.op.snode == pnode.name:
8965         raise errors.OpPrereqError("The secondary node cannot be the"
8966                                    " primary node", errors.ECODE_INVAL)
8967       _CheckNodeOnline(self, self.op.snode)
8968       _CheckNodeNotDrained(self, self.op.snode)
8969       _CheckNodeVmCapable(self, self.op.snode)
8970       self.secondaries.append(self.op.snode)
8971
8972     nodenames = [pnode.name] + self.secondaries
8973
8974     if not self.adopt_disks:
8975       # Check lv size requirements, if not adopting
8976       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8977       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8978
8979     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8980       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8981                                 disk[constants.IDISK_ADOPT])
8982                      for disk in self.disks])
8983       if len(all_lvs) != len(self.disks):
8984         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8985                                    errors.ECODE_INVAL)
8986       for lv_name in all_lvs:
8987         try:
8988           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8989           # to ReserveLV uses the same syntax
8990           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8991         except errors.ReservationError:
8992           raise errors.OpPrereqError("LV named %s used by another instance" %
8993                                      lv_name, errors.ECODE_NOTUNIQUE)
8994
8995       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8996       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8997
8998       node_lvs = self.rpc.call_lv_list([pnode.name],
8999                                        vg_names.payload.keys())[pnode.name]
9000       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9001       node_lvs = node_lvs.payload
9002
9003       delta = all_lvs.difference(node_lvs.keys())
9004       if delta:
9005         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9006                                    utils.CommaJoin(delta),
9007                                    errors.ECODE_INVAL)
9008       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9009       if online_lvs:
9010         raise errors.OpPrereqError("Online logical volumes found, cannot"
9011                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9012                                    errors.ECODE_STATE)
9013       # update the size of disk based on what is found
9014       for dsk in self.disks:
9015         dsk[constants.IDISK_SIZE] = \
9016           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9017                                         dsk[constants.IDISK_ADOPT])][0]))
9018
9019     elif self.op.disk_template == constants.DT_BLOCK:
9020       # Normalize and de-duplicate device paths
9021       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9022                        for disk in self.disks])
9023       if len(all_disks) != len(self.disks):
9024         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9025                                    errors.ECODE_INVAL)
9026       baddisks = [d for d in all_disks
9027                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9028       if baddisks:
9029         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9030                                    " cannot be adopted" %
9031                                    (", ".join(baddisks),
9032                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9033                                    errors.ECODE_INVAL)
9034
9035       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9036                                             list(all_disks))[pnode.name]
9037       node_disks.Raise("Cannot get block device information from node %s" %
9038                        pnode.name)
9039       node_disks = node_disks.payload
9040       delta = all_disks.difference(node_disks.keys())
9041       if delta:
9042         raise errors.OpPrereqError("Missing block device(s): %s" %
9043                                    utils.CommaJoin(delta),
9044                                    errors.ECODE_INVAL)
9045       for dsk in self.disks:
9046         dsk[constants.IDISK_SIZE] = \
9047           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9048
9049     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9050
9051     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9052     # check OS parameters (remotely)
9053     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9054
9055     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9056
9057     # memory check on primary node
9058     if self.op.start:
9059       _CheckNodeFreeMemory(self, self.pnode.name,
9060                            "creating instance %s" % self.op.instance_name,
9061                            self.be_full[constants.BE_MEMORY],
9062                            self.op.hypervisor)
9063
9064     self.dry_run_result = list(nodenames)
9065
9066   def Exec(self, feedback_fn):
9067     """Create and add the instance to the cluster.
9068
9069     """
9070     instance = self.op.instance_name
9071     pnode_name = self.pnode.name
9072
9073     ht_kind = self.op.hypervisor
9074     if ht_kind in constants.HTS_REQ_PORT:
9075       network_port = self.cfg.AllocatePort()
9076     else:
9077       network_port = None
9078
9079     disks = _GenerateDiskTemplate(self,
9080                                   self.op.disk_template,
9081                                   instance, pnode_name,
9082                                   self.secondaries,
9083                                   self.disks,
9084                                   self.instance_file_storage_dir,
9085                                   self.op.file_driver,
9086                                   0,
9087                                   feedback_fn)
9088
9089     iobj = objects.Instance(name=instance, os=self.op.os_type,
9090                             primary_node=pnode_name,
9091                             nics=self.nics, disks=disks,
9092                             disk_template=self.op.disk_template,
9093                             admin_up=False,
9094                             network_port=network_port,
9095                             beparams=self.op.beparams,
9096                             hvparams=self.op.hvparams,
9097                             hypervisor=self.op.hypervisor,
9098                             osparams=self.op.osparams,
9099                             )
9100
9101     if self.op.tags:
9102       for tag in self.op.tags:
9103         iobj.AddTag(tag)
9104
9105     if self.adopt_disks:
9106       if self.op.disk_template == constants.DT_PLAIN:
9107         # rename LVs to the newly-generated names; we need to construct
9108         # 'fake' LV disks with the old data, plus the new unique_id
9109         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9110         rename_to = []
9111         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9112           rename_to.append(t_dsk.logical_id)
9113           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9114           self.cfg.SetDiskID(t_dsk, pnode_name)
9115         result = self.rpc.call_blockdev_rename(pnode_name,
9116                                                zip(tmp_disks, rename_to))
9117         result.Raise("Failed to rename adoped LVs")
9118     else:
9119       feedback_fn("* creating instance disks...")
9120       try:
9121         _CreateDisks(self, iobj)
9122       except errors.OpExecError:
9123         self.LogWarning("Device creation failed, reverting...")
9124         try:
9125           _RemoveDisks(self, iobj)
9126         finally:
9127           self.cfg.ReleaseDRBDMinors(instance)
9128           raise
9129
9130     feedback_fn("adding instance %s to cluster config" % instance)
9131
9132     self.cfg.AddInstance(iobj, self.proc.GetECId())
9133
9134     # Declare that we don't want to remove the instance lock anymore, as we've
9135     # added the instance to the config
9136     del self.remove_locks[locking.LEVEL_INSTANCE]
9137
9138     if self.op.mode == constants.INSTANCE_IMPORT:
9139       # Release unused nodes
9140       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9141     else:
9142       # Release all nodes
9143       _ReleaseLocks(self, locking.LEVEL_NODE)
9144
9145     disk_abort = False
9146     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9147       feedback_fn("* wiping instance disks...")
9148       try:
9149         _WipeDisks(self, iobj)
9150       except errors.OpExecError, err:
9151         logging.exception("Wiping disks failed")
9152         self.LogWarning("Wiping instance disks failed (%s)", err)
9153         disk_abort = True
9154
9155     if disk_abort:
9156       # Something is already wrong with the disks, don't do anything else
9157       pass
9158     elif self.op.wait_for_sync:
9159       disk_abort = not _WaitForSync(self, iobj)
9160     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9161       # make sure the disks are not degraded (still sync-ing is ok)
9162       feedback_fn("* checking mirrors status")
9163       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9164     else:
9165       disk_abort = False
9166
9167     if disk_abort:
9168       _RemoveDisks(self, iobj)
9169       self.cfg.RemoveInstance(iobj.name)
9170       # Make sure the instance lock gets removed
9171       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9172       raise errors.OpExecError("There are some degraded disks for"
9173                                " this instance")
9174
9175     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9176       if self.op.mode == constants.INSTANCE_CREATE:
9177         if not self.op.no_install:
9178           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9179                         not self.op.wait_for_sync)
9180           if pause_sync:
9181             feedback_fn("* pausing disk sync to install instance OS")
9182             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9183                                                               iobj.disks, True)
9184             for idx, success in enumerate(result.payload):
9185               if not success:
9186                 logging.warn("pause-sync of instance %s for disk %d failed",
9187                              instance, idx)
9188
9189           feedback_fn("* running the instance OS create scripts...")
9190           # FIXME: pass debug option from opcode to backend
9191           os_add_result = \
9192             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9193                                           self.op.debug_level)
9194           if pause_sync:
9195             feedback_fn("* resuming disk sync")
9196             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9197                                                               iobj.disks, False)
9198             for idx, success in enumerate(result.payload):
9199               if not success:
9200                 logging.warn("resume-sync of instance %s for disk %d failed",
9201                              instance, idx)
9202
9203           os_add_result.Raise("Could not add os for instance %s"
9204                               " on node %s" % (instance, pnode_name))
9205
9206       elif self.op.mode == constants.INSTANCE_IMPORT:
9207         feedback_fn("* running the instance OS import scripts...")
9208
9209         transfers = []
9210
9211         for idx, image in enumerate(self.src_images):
9212           if not image:
9213             continue
9214
9215           # FIXME: pass debug option from opcode to backend
9216           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9217                                              constants.IEIO_FILE, (image, ),
9218                                              constants.IEIO_SCRIPT,
9219                                              (iobj.disks[idx], idx),
9220                                              None)
9221           transfers.append(dt)
9222
9223         import_result = \
9224           masterd.instance.TransferInstanceData(self, feedback_fn,
9225                                                 self.op.src_node, pnode_name,
9226                                                 self.pnode.secondary_ip,
9227                                                 iobj, transfers)
9228         if not compat.all(import_result):
9229           self.LogWarning("Some disks for instance %s on node %s were not"
9230                           " imported successfully" % (instance, pnode_name))
9231
9232       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9233         feedback_fn("* preparing remote import...")
9234         # The source cluster will stop the instance before attempting to make a
9235         # connection. In some cases stopping an instance can take a long time,
9236         # hence the shutdown timeout is added to the connection timeout.
9237         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9238                            self.op.source_shutdown_timeout)
9239         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9240
9241         assert iobj.primary_node == self.pnode.name
9242         disk_results = \
9243           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9244                                         self.source_x509_ca,
9245                                         self._cds, timeouts)
9246         if not compat.all(disk_results):
9247           # TODO: Should the instance still be started, even if some disks
9248           # failed to import (valid for local imports, too)?
9249           self.LogWarning("Some disks for instance %s on node %s were not"
9250                           " imported successfully" % (instance, pnode_name))
9251
9252         # Run rename script on newly imported instance
9253         assert iobj.name == instance
9254         feedback_fn("Running rename script for %s" % instance)
9255         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9256                                                    self.source_instance_name,
9257                                                    self.op.debug_level)
9258         if result.fail_msg:
9259           self.LogWarning("Failed to run rename script for %s on node"
9260                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9261
9262       else:
9263         # also checked in the prereq part
9264         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9265                                      % self.op.mode)
9266
9267     if self.op.start:
9268       iobj.admin_up = True
9269       self.cfg.Update(iobj, feedback_fn)
9270       logging.info("Starting instance %s on node %s", instance, pnode_name)
9271       feedback_fn("* starting instance...")
9272       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9273                                             False)
9274       result.Raise("Could not start instance")
9275
9276     return list(iobj.all_nodes)
9277
9278
9279 class LUInstanceConsole(NoHooksLU):
9280   """Connect to an instance's console.
9281
9282   This is somewhat special in that it returns the command line that
9283   you need to run on the master node in order to connect to the
9284   console.
9285
9286   """
9287   REQ_BGL = False
9288
9289   def ExpandNames(self):
9290     self._ExpandAndLockInstance()
9291
9292   def CheckPrereq(self):
9293     """Check prerequisites.
9294
9295     This checks that the instance is in the cluster.
9296
9297     """
9298     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9299     assert self.instance is not None, \
9300       "Cannot retrieve locked instance %s" % self.op.instance_name
9301     _CheckNodeOnline(self, self.instance.primary_node)
9302
9303   def Exec(self, feedback_fn):
9304     """Connect to the console of an instance
9305
9306     """
9307     instance = self.instance
9308     node = instance.primary_node
9309
9310     node_insts = self.rpc.call_instance_list([node],
9311                                              [instance.hypervisor])[node]
9312     node_insts.Raise("Can't get node information from %s" % node)
9313
9314     if instance.name not in node_insts.payload:
9315       if instance.admin_up:
9316         state = constants.INSTST_ERRORDOWN
9317       else:
9318         state = constants.INSTST_ADMINDOWN
9319       raise errors.OpExecError("Instance %s is not running (state %s)" %
9320                                (instance.name, state))
9321
9322     logging.debug("Connecting to console of %s on %s", instance.name, node)
9323
9324     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9325
9326
9327 def _GetInstanceConsole(cluster, instance):
9328   """Returns console information for an instance.
9329
9330   @type cluster: L{objects.Cluster}
9331   @type instance: L{objects.Instance}
9332   @rtype: dict
9333
9334   """
9335   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9336   # beparams and hvparams are passed separately, to avoid editing the
9337   # instance and then saving the defaults in the instance itself.
9338   hvparams = cluster.FillHV(instance)
9339   beparams = cluster.FillBE(instance)
9340   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9341
9342   assert console.instance == instance.name
9343   assert console.Validate()
9344
9345   return console.ToDict()
9346
9347
9348 class LUInstanceReplaceDisks(LogicalUnit):
9349   """Replace the disks of an instance.
9350
9351   """
9352   HPATH = "mirrors-replace"
9353   HTYPE = constants.HTYPE_INSTANCE
9354   REQ_BGL = False
9355
9356   def CheckArguments(self):
9357     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9358                                   self.op.iallocator)
9359
9360   def ExpandNames(self):
9361     self._ExpandAndLockInstance()
9362
9363     assert locking.LEVEL_NODE not in self.needed_locks
9364     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9365
9366     assert self.op.iallocator is None or self.op.remote_node is None, \
9367       "Conflicting options"
9368
9369     if self.op.remote_node is not None:
9370       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9371
9372       # Warning: do not remove the locking of the new secondary here
9373       # unless DRBD8.AddChildren is changed to work in parallel;
9374       # currently it doesn't since parallel invocations of
9375       # FindUnusedMinor will conflict
9376       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9377       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9378     else:
9379       self.needed_locks[locking.LEVEL_NODE] = []
9380       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9381
9382       if self.op.iallocator is not None:
9383         # iallocator will select a new node in the same group
9384         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9385
9386     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9387                                    self.op.iallocator, self.op.remote_node,
9388                                    self.op.disks, False, self.op.early_release)
9389
9390     self.tasklets = [self.replacer]
9391
9392   def DeclareLocks(self, level):
9393     if level == locking.LEVEL_NODEGROUP:
9394       assert self.op.remote_node is None
9395       assert self.op.iallocator is not None
9396       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9397
9398       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9399       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9400         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9401
9402     elif level == locking.LEVEL_NODE:
9403       if self.op.iallocator is not None:
9404         assert self.op.remote_node is None
9405         assert not self.needed_locks[locking.LEVEL_NODE]
9406
9407         # Lock member nodes of all locked groups
9408         self.needed_locks[locking.LEVEL_NODE] = [node_name
9409           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9410           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9411       else:
9412         self._LockInstancesNodes()
9413
9414   def BuildHooksEnv(self):
9415     """Build hooks env.
9416
9417     This runs on the master, the primary and all the secondaries.
9418
9419     """
9420     instance = self.replacer.instance
9421     env = {
9422       "MODE": self.op.mode,
9423       "NEW_SECONDARY": self.op.remote_node,
9424       "OLD_SECONDARY": instance.secondary_nodes[0],
9425       }
9426     env.update(_BuildInstanceHookEnvByObject(self, instance))
9427     return env
9428
9429   def BuildHooksNodes(self):
9430     """Build hooks nodes.
9431
9432     """
9433     instance = self.replacer.instance
9434     nl = [
9435       self.cfg.GetMasterNode(),
9436       instance.primary_node,
9437       ]
9438     if self.op.remote_node is not None:
9439       nl.append(self.op.remote_node)
9440     return nl, nl
9441
9442   def CheckPrereq(self):
9443     """Check prerequisites.
9444
9445     """
9446     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9447             self.op.iallocator is None)
9448
9449     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9450     if owned_groups:
9451       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9452
9453     return LogicalUnit.CheckPrereq(self)
9454
9455
9456 class TLReplaceDisks(Tasklet):
9457   """Replaces disks for an instance.
9458
9459   Note: Locking is not within the scope of this class.
9460
9461   """
9462   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9463                disks, delay_iallocator, early_release):
9464     """Initializes this class.
9465
9466     """
9467     Tasklet.__init__(self, lu)
9468
9469     # Parameters
9470     self.instance_name = instance_name
9471     self.mode = mode
9472     self.iallocator_name = iallocator_name
9473     self.remote_node = remote_node
9474     self.disks = disks
9475     self.delay_iallocator = delay_iallocator
9476     self.early_release = early_release
9477
9478     # Runtime data
9479     self.instance = None
9480     self.new_node = None
9481     self.target_node = None
9482     self.other_node = None
9483     self.remote_node_info = None
9484     self.node_secondary_ip = None
9485
9486   @staticmethod
9487   def CheckArguments(mode, remote_node, iallocator):
9488     """Helper function for users of this class.
9489
9490     """
9491     # check for valid parameter combination
9492     if mode == constants.REPLACE_DISK_CHG:
9493       if remote_node is None and iallocator is None:
9494         raise errors.OpPrereqError("When changing the secondary either an"
9495                                    " iallocator script must be used or the"
9496                                    " new node given", errors.ECODE_INVAL)
9497
9498       if remote_node is not None and iallocator is not None:
9499         raise errors.OpPrereqError("Give either the iallocator or the new"
9500                                    " secondary, not both", errors.ECODE_INVAL)
9501
9502     elif remote_node is not None or iallocator is not None:
9503       # Not replacing the secondary
9504       raise errors.OpPrereqError("The iallocator and new node options can"
9505                                  " only be used when changing the"
9506                                  " secondary node", errors.ECODE_INVAL)
9507
9508   @staticmethod
9509   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9510     """Compute a new secondary node using an IAllocator.
9511
9512     """
9513     ial = IAllocator(lu.cfg, lu.rpc,
9514                      mode=constants.IALLOCATOR_MODE_RELOC,
9515                      name=instance_name,
9516                      relocate_from=list(relocate_from))
9517
9518     ial.Run(iallocator_name)
9519
9520     if not ial.success:
9521       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9522                                  " %s" % (iallocator_name, ial.info),
9523                                  errors.ECODE_NORES)
9524
9525     if len(ial.result) != ial.required_nodes:
9526       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9527                                  " of nodes (%s), required %s" %
9528                                  (iallocator_name,
9529                                   len(ial.result), ial.required_nodes),
9530                                  errors.ECODE_FAULT)
9531
9532     remote_node_name = ial.result[0]
9533
9534     lu.LogInfo("Selected new secondary for instance '%s': %s",
9535                instance_name, remote_node_name)
9536
9537     return remote_node_name
9538
9539   def _FindFaultyDisks(self, node_name):
9540     """Wrapper for L{_FindFaultyInstanceDisks}.
9541
9542     """
9543     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9544                                     node_name, True)
9545
9546   def _CheckDisksActivated(self, instance):
9547     """Checks if the instance disks are activated.
9548
9549     @param instance: The instance to check disks
9550     @return: True if they are activated, False otherwise
9551
9552     """
9553     nodes = instance.all_nodes
9554
9555     for idx, dev in enumerate(instance.disks):
9556       for node in nodes:
9557         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9558         self.cfg.SetDiskID(dev, node)
9559
9560         result = self.rpc.call_blockdev_find(node, dev)
9561
9562         if result.offline:
9563           continue
9564         elif result.fail_msg or not result.payload:
9565           return False
9566
9567     return True
9568
9569   def CheckPrereq(self):
9570     """Check prerequisites.
9571
9572     This checks that the instance is in the cluster.
9573
9574     """
9575     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9576     assert instance is not None, \
9577       "Cannot retrieve locked instance %s" % self.instance_name
9578
9579     if instance.disk_template != constants.DT_DRBD8:
9580       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9581                                  " instances", errors.ECODE_INVAL)
9582
9583     if len(instance.secondary_nodes) != 1:
9584       raise errors.OpPrereqError("The instance has a strange layout,"
9585                                  " expected one secondary but found %d" %
9586                                  len(instance.secondary_nodes),
9587                                  errors.ECODE_FAULT)
9588
9589     if not self.delay_iallocator:
9590       self._CheckPrereq2()
9591
9592   def _CheckPrereq2(self):
9593     """Check prerequisites, second part.
9594
9595     This function should always be part of CheckPrereq. It was separated and is
9596     now called from Exec because during node evacuation iallocator was only
9597     called with an unmodified cluster model, not taking planned changes into
9598     account.
9599
9600     """
9601     instance = self.instance
9602     secondary_node = instance.secondary_nodes[0]
9603
9604     if self.iallocator_name is None:
9605       remote_node = self.remote_node
9606     else:
9607       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9608                                        instance.name, instance.secondary_nodes)
9609
9610     if remote_node is None:
9611       self.remote_node_info = None
9612     else:
9613       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9614              "Remote node '%s' is not locked" % remote_node
9615
9616       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9617       assert self.remote_node_info is not None, \
9618         "Cannot retrieve locked node %s" % remote_node
9619
9620     if remote_node == self.instance.primary_node:
9621       raise errors.OpPrereqError("The specified node is the primary node of"
9622                                  " the instance", errors.ECODE_INVAL)
9623
9624     if remote_node == secondary_node:
9625       raise errors.OpPrereqError("The specified node is already the"
9626                                  " secondary node of the instance",
9627                                  errors.ECODE_INVAL)
9628
9629     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9630                                     constants.REPLACE_DISK_CHG):
9631       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9632                                  errors.ECODE_INVAL)
9633
9634     if self.mode == constants.REPLACE_DISK_AUTO:
9635       if not self._CheckDisksActivated(instance):
9636         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9637                                    " first" % self.instance_name,
9638                                    errors.ECODE_STATE)
9639       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9640       faulty_secondary = self._FindFaultyDisks(secondary_node)
9641
9642       if faulty_primary and faulty_secondary:
9643         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9644                                    " one node and can not be repaired"
9645                                    " automatically" % self.instance_name,
9646                                    errors.ECODE_STATE)
9647
9648       if faulty_primary:
9649         self.disks = faulty_primary
9650         self.target_node = instance.primary_node
9651         self.other_node = secondary_node
9652         check_nodes = [self.target_node, self.other_node]
9653       elif faulty_secondary:
9654         self.disks = faulty_secondary
9655         self.target_node = secondary_node
9656         self.other_node = instance.primary_node
9657         check_nodes = [self.target_node, self.other_node]
9658       else:
9659         self.disks = []
9660         check_nodes = []
9661
9662     else:
9663       # Non-automatic modes
9664       if self.mode == constants.REPLACE_DISK_PRI:
9665         self.target_node = instance.primary_node
9666         self.other_node = secondary_node
9667         check_nodes = [self.target_node, self.other_node]
9668
9669       elif self.mode == constants.REPLACE_DISK_SEC:
9670         self.target_node = secondary_node
9671         self.other_node = instance.primary_node
9672         check_nodes = [self.target_node, self.other_node]
9673
9674       elif self.mode == constants.REPLACE_DISK_CHG:
9675         self.new_node = remote_node
9676         self.other_node = instance.primary_node
9677         self.target_node = secondary_node
9678         check_nodes = [self.new_node, self.other_node]
9679
9680         _CheckNodeNotDrained(self.lu, remote_node)
9681         _CheckNodeVmCapable(self.lu, remote_node)
9682
9683         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9684         assert old_node_info is not None
9685         if old_node_info.offline and not self.early_release:
9686           # doesn't make sense to delay the release
9687           self.early_release = True
9688           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9689                           " early-release mode", secondary_node)
9690
9691       else:
9692         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9693                                      self.mode)
9694
9695       # If not specified all disks should be replaced
9696       if not self.disks:
9697         self.disks = range(len(self.instance.disks))
9698
9699     for node in check_nodes:
9700       _CheckNodeOnline(self.lu, node)
9701
9702     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9703                                                           self.other_node,
9704                                                           self.target_node]
9705                               if node_name is not None)
9706
9707     # Release unneeded node locks
9708     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9709
9710     # Release any owned node group
9711     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9712       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9713
9714     # Check whether disks are valid
9715     for disk_idx in self.disks:
9716       instance.FindDisk(disk_idx)
9717
9718     # Get secondary node IP addresses
9719     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9720                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9721
9722   def Exec(self, feedback_fn):
9723     """Execute disk replacement.
9724
9725     This dispatches the disk replacement to the appropriate handler.
9726
9727     """
9728     if self.delay_iallocator:
9729       self._CheckPrereq2()
9730
9731     if __debug__:
9732       # Verify owned locks before starting operation
9733       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9734       assert set(owned_nodes) == set(self.node_secondary_ip), \
9735           ("Incorrect node locks, owning %s, expected %s" %
9736            (owned_nodes, self.node_secondary_ip.keys()))
9737
9738       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9739       assert list(owned_instances) == [self.instance_name], \
9740           "Instance '%s' not locked" % self.instance_name
9741
9742       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9743           "Should not own any node group lock at this point"
9744
9745     if not self.disks:
9746       feedback_fn("No disks need replacement")
9747       return
9748
9749     feedback_fn("Replacing disk(s) %s for %s" %
9750                 (utils.CommaJoin(self.disks), self.instance.name))
9751
9752     activate_disks = (not self.instance.admin_up)
9753
9754     # Activate the instance disks if we're replacing them on a down instance
9755     if activate_disks:
9756       _StartInstanceDisks(self.lu, self.instance, True)
9757
9758     try:
9759       # Should we replace the secondary node?
9760       if self.new_node is not None:
9761         fn = self._ExecDrbd8Secondary
9762       else:
9763         fn = self._ExecDrbd8DiskOnly
9764
9765       result = fn(feedback_fn)
9766     finally:
9767       # Deactivate the instance disks if we're replacing them on a
9768       # down instance
9769       if activate_disks:
9770         _SafeShutdownInstanceDisks(self.lu, self.instance)
9771
9772     if __debug__:
9773       # Verify owned locks
9774       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9775       nodes = frozenset(self.node_secondary_ip)
9776       assert ((self.early_release and not owned_nodes) or
9777               (not self.early_release and not (set(owned_nodes) - nodes))), \
9778         ("Not owning the correct locks, early_release=%s, owned=%r,"
9779          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9780
9781     return result
9782
9783   def _CheckVolumeGroup(self, nodes):
9784     self.lu.LogInfo("Checking volume groups")
9785
9786     vgname = self.cfg.GetVGName()
9787
9788     # Make sure volume group exists on all involved nodes
9789     results = self.rpc.call_vg_list(nodes)
9790     if not results:
9791       raise errors.OpExecError("Can't list volume groups on the nodes")
9792
9793     for node in nodes:
9794       res = results[node]
9795       res.Raise("Error checking node %s" % node)
9796       if vgname not in res.payload:
9797         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9798                                  (vgname, node))
9799
9800   def _CheckDisksExistence(self, nodes):
9801     # Check disk existence
9802     for idx, dev in enumerate(self.instance.disks):
9803       if idx not in self.disks:
9804         continue
9805
9806       for node in nodes:
9807         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9808         self.cfg.SetDiskID(dev, node)
9809
9810         result = self.rpc.call_blockdev_find(node, dev)
9811
9812         msg = result.fail_msg
9813         if msg or not result.payload:
9814           if not msg:
9815             msg = "disk not found"
9816           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9817                                    (idx, node, msg))
9818
9819   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9820     for idx, dev in enumerate(self.instance.disks):
9821       if idx not in self.disks:
9822         continue
9823
9824       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9825                       (idx, node_name))
9826
9827       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9828                                    ldisk=ldisk):
9829         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9830                                  " replace disks for instance %s" %
9831                                  (node_name, self.instance.name))
9832
9833   def _CreateNewStorage(self, node_name):
9834     """Create new storage on the primary or secondary node.
9835
9836     This is only used for same-node replaces, not for changing the
9837     secondary node, hence we don't want to modify the existing disk.
9838
9839     """
9840     iv_names = {}
9841
9842     for idx, dev in enumerate(self.instance.disks):
9843       if idx not in self.disks:
9844         continue
9845
9846       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9847
9848       self.cfg.SetDiskID(dev, node_name)
9849
9850       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9851       names = _GenerateUniqueNames(self.lu, lv_names)
9852
9853       vg_data = dev.children[0].logical_id[0]
9854       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9855                              logical_id=(vg_data, names[0]))
9856       vg_meta = dev.children[1].logical_id[0]
9857       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9858                              logical_id=(vg_meta, names[1]))
9859
9860       new_lvs = [lv_data, lv_meta]
9861       old_lvs = [child.Copy() for child in dev.children]
9862       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9863
9864       # we pass force_create=True to force the LVM creation
9865       for new_lv in new_lvs:
9866         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9867                         _GetInstanceInfoText(self.instance), False)
9868
9869     return iv_names
9870
9871   def _CheckDevices(self, node_name, iv_names):
9872     for name, (dev, _, _) in iv_names.iteritems():
9873       self.cfg.SetDiskID(dev, node_name)
9874
9875       result = self.rpc.call_blockdev_find(node_name, dev)
9876
9877       msg = result.fail_msg
9878       if msg or not result.payload:
9879         if not msg:
9880           msg = "disk not found"
9881         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9882                                  (name, msg))
9883
9884       if result.payload.is_degraded:
9885         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9886
9887   def _RemoveOldStorage(self, node_name, iv_names):
9888     for name, (_, old_lvs, _) in iv_names.iteritems():
9889       self.lu.LogInfo("Remove logical volumes for %s" % name)
9890
9891       for lv in old_lvs:
9892         self.cfg.SetDiskID(lv, node_name)
9893
9894         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9895         if msg:
9896           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9897                              hint="remove unused LVs manually")
9898
9899   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9900     """Replace a disk on the primary or secondary for DRBD 8.
9901
9902     The algorithm for replace is quite complicated:
9903
9904       1. for each disk to be replaced:
9905
9906         1. create new LVs on the target node with unique names
9907         1. detach old LVs from the drbd device
9908         1. rename old LVs to name_replaced.<time_t>
9909         1. rename new LVs to old LVs
9910         1. attach the new LVs (with the old names now) to the drbd device
9911
9912       1. wait for sync across all devices
9913
9914       1. for each modified disk:
9915
9916         1. remove old LVs (which have the name name_replaces.<time_t>)
9917
9918     Failures are not very well handled.
9919
9920     """
9921     steps_total = 6
9922
9923     # Step: check device activation
9924     self.lu.LogStep(1, steps_total, "Check device existence")
9925     self._CheckDisksExistence([self.other_node, self.target_node])
9926     self._CheckVolumeGroup([self.target_node, self.other_node])
9927
9928     # Step: check other node consistency
9929     self.lu.LogStep(2, steps_total, "Check peer consistency")
9930     self._CheckDisksConsistency(self.other_node,
9931                                 self.other_node == self.instance.primary_node,
9932                                 False)
9933
9934     # Step: create new storage
9935     self.lu.LogStep(3, steps_total, "Allocate new storage")
9936     iv_names = self._CreateNewStorage(self.target_node)
9937
9938     # Step: for each lv, detach+rename*2+attach
9939     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9940     for dev, old_lvs, new_lvs in iv_names.itervalues():
9941       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9942
9943       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9944                                                      old_lvs)
9945       result.Raise("Can't detach drbd from local storage on node"
9946                    " %s for device %s" % (self.target_node, dev.iv_name))
9947       #dev.children = []
9948       #cfg.Update(instance)
9949
9950       # ok, we created the new LVs, so now we know we have the needed
9951       # storage; as such, we proceed on the target node to rename
9952       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9953       # using the assumption that logical_id == physical_id (which in
9954       # turn is the unique_id on that node)
9955
9956       # FIXME(iustin): use a better name for the replaced LVs
9957       temp_suffix = int(time.time())
9958       ren_fn = lambda d, suff: (d.physical_id[0],
9959                                 d.physical_id[1] + "_replaced-%s" % suff)
9960
9961       # Build the rename list based on what LVs exist on the node
9962       rename_old_to_new = []
9963       for to_ren in old_lvs:
9964         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9965         if not result.fail_msg and result.payload:
9966           # device exists
9967           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9968
9969       self.lu.LogInfo("Renaming the old LVs on the target node")
9970       result = self.rpc.call_blockdev_rename(self.target_node,
9971                                              rename_old_to_new)
9972       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9973
9974       # Now we rename the new LVs to the old LVs
9975       self.lu.LogInfo("Renaming the new LVs on the target node")
9976       rename_new_to_old = [(new, old.physical_id)
9977                            for old, new in zip(old_lvs, new_lvs)]
9978       result = self.rpc.call_blockdev_rename(self.target_node,
9979                                              rename_new_to_old)
9980       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9981
9982       # Intermediate steps of in memory modifications
9983       for old, new in zip(old_lvs, new_lvs):
9984         new.logical_id = old.logical_id
9985         self.cfg.SetDiskID(new, self.target_node)
9986
9987       # We need to modify old_lvs so that removal later removes the
9988       # right LVs, not the newly added ones; note that old_lvs is a
9989       # copy here
9990       for disk in old_lvs:
9991         disk.logical_id = ren_fn(disk, temp_suffix)
9992         self.cfg.SetDiskID(disk, self.target_node)
9993
9994       # Now that the new lvs have the old name, we can add them to the device
9995       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9996       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9997                                                   new_lvs)
9998       msg = result.fail_msg
9999       if msg:
10000         for new_lv in new_lvs:
10001           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10002                                                new_lv).fail_msg
10003           if msg2:
10004             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10005                                hint=("cleanup manually the unused logical"
10006                                      "volumes"))
10007         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10008
10009     cstep = 5
10010     if self.early_release:
10011       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10012       cstep += 1
10013       self._RemoveOldStorage(self.target_node, iv_names)
10014       # WARNING: we release both node locks here, do not do other RPCs
10015       # than WaitForSync to the primary node
10016       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10017                     names=[self.target_node, self.other_node])
10018
10019     # Wait for sync
10020     # This can fail as the old devices are degraded and _WaitForSync
10021     # does a combined result over all disks, so we don't check its return value
10022     self.lu.LogStep(cstep, steps_total, "Sync devices")
10023     cstep += 1
10024     _WaitForSync(self.lu, self.instance)
10025
10026     # Check all devices manually
10027     self._CheckDevices(self.instance.primary_node, iv_names)
10028
10029     # Step: remove old storage
10030     if not self.early_release:
10031       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10032       cstep += 1
10033       self._RemoveOldStorage(self.target_node, iv_names)
10034
10035   def _ExecDrbd8Secondary(self, feedback_fn):
10036     """Replace the secondary node for DRBD 8.
10037
10038     The algorithm for replace is quite complicated:
10039       - for all disks of the instance:
10040         - create new LVs on the new node with same names
10041         - shutdown the drbd device on the old secondary
10042         - disconnect the drbd network on the primary
10043         - create the drbd device on the new secondary
10044         - network attach the drbd on the primary, using an artifice:
10045           the drbd code for Attach() will connect to the network if it
10046           finds a device which is connected to the good local disks but
10047           not network enabled
10048       - wait for sync across all devices
10049       - remove all disks from the old secondary
10050
10051     Failures are not very well handled.
10052
10053     """
10054     steps_total = 6
10055
10056     pnode = self.instance.primary_node
10057
10058     # Step: check device activation
10059     self.lu.LogStep(1, steps_total, "Check device existence")
10060     self._CheckDisksExistence([self.instance.primary_node])
10061     self._CheckVolumeGroup([self.instance.primary_node])
10062
10063     # Step: check other node consistency
10064     self.lu.LogStep(2, steps_total, "Check peer consistency")
10065     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10066
10067     # Step: create new storage
10068     self.lu.LogStep(3, steps_total, "Allocate new storage")
10069     for idx, dev in enumerate(self.instance.disks):
10070       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10071                       (self.new_node, idx))
10072       # we pass force_create=True to force LVM creation
10073       for new_lv in dev.children:
10074         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10075                         _GetInstanceInfoText(self.instance), False)
10076
10077     # Step 4: dbrd minors and drbd setups changes
10078     # after this, we must manually remove the drbd minors on both the
10079     # error and the success paths
10080     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10081     minors = self.cfg.AllocateDRBDMinor([self.new_node
10082                                          for dev in self.instance.disks],
10083                                         self.instance.name)
10084     logging.debug("Allocated minors %r", minors)
10085
10086     iv_names = {}
10087     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10088       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10089                       (self.new_node, idx))
10090       # create new devices on new_node; note that we create two IDs:
10091       # one without port, so the drbd will be activated without
10092       # networking information on the new node at this stage, and one
10093       # with network, for the latter activation in step 4
10094       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10095       if self.instance.primary_node == o_node1:
10096         p_minor = o_minor1
10097       else:
10098         assert self.instance.primary_node == o_node2, "Three-node instance?"
10099         p_minor = o_minor2
10100
10101       new_alone_id = (self.instance.primary_node, self.new_node, None,
10102                       p_minor, new_minor, o_secret)
10103       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10104                     p_minor, new_minor, o_secret)
10105
10106       iv_names[idx] = (dev, dev.children, new_net_id)
10107       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10108                     new_net_id)
10109       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10110                               logical_id=new_alone_id,
10111                               children=dev.children,
10112                               size=dev.size)
10113       try:
10114         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10115                               _GetInstanceInfoText(self.instance), False)
10116       except errors.GenericError:
10117         self.cfg.ReleaseDRBDMinors(self.instance.name)
10118         raise
10119
10120     # We have new devices, shutdown the drbd on the old secondary
10121     for idx, dev in enumerate(self.instance.disks):
10122       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10123       self.cfg.SetDiskID(dev, self.target_node)
10124       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10125       if msg:
10126         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10127                            "node: %s" % (idx, msg),
10128                            hint=("Please cleanup this device manually as"
10129                                  " soon as possible"))
10130
10131     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10132     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10133                                                self.instance.disks)[pnode]
10134
10135     msg = result.fail_msg
10136     if msg:
10137       # detaches didn't succeed (unlikely)
10138       self.cfg.ReleaseDRBDMinors(self.instance.name)
10139       raise errors.OpExecError("Can't detach the disks from the network on"
10140                                " old node: %s" % (msg,))
10141
10142     # if we managed to detach at least one, we update all the disks of
10143     # the instance to point to the new secondary
10144     self.lu.LogInfo("Updating instance configuration")
10145     for dev, _, new_logical_id in iv_names.itervalues():
10146       dev.logical_id = new_logical_id
10147       self.cfg.SetDiskID(dev, self.instance.primary_node)
10148
10149     self.cfg.Update(self.instance, feedback_fn)
10150
10151     # and now perform the drbd attach
10152     self.lu.LogInfo("Attaching primary drbds to new secondary"
10153                     " (standalone => connected)")
10154     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10155                                             self.new_node],
10156                                            self.node_secondary_ip,
10157                                            self.instance.disks,
10158                                            self.instance.name,
10159                                            False)
10160     for to_node, to_result in result.items():
10161       msg = to_result.fail_msg
10162       if msg:
10163         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10164                            to_node, msg,
10165                            hint=("please do a gnt-instance info to see the"
10166                                  " status of disks"))
10167     cstep = 5
10168     if self.early_release:
10169       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10170       cstep += 1
10171       self._RemoveOldStorage(self.target_node, iv_names)
10172       # WARNING: we release all node locks here, do not do other RPCs
10173       # than WaitForSync to the primary node
10174       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10175                     names=[self.instance.primary_node,
10176                            self.target_node,
10177                            self.new_node])
10178
10179     # Wait for sync
10180     # This can fail as the old devices are degraded and _WaitForSync
10181     # does a combined result over all disks, so we don't check its return value
10182     self.lu.LogStep(cstep, steps_total, "Sync devices")
10183     cstep += 1
10184     _WaitForSync(self.lu, self.instance)
10185
10186     # Check all devices manually
10187     self._CheckDevices(self.instance.primary_node, iv_names)
10188
10189     # Step: remove old storage
10190     if not self.early_release:
10191       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10192       self._RemoveOldStorage(self.target_node, iv_names)
10193
10194
10195 class LURepairNodeStorage(NoHooksLU):
10196   """Repairs the volume group on a node.
10197
10198   """
10199   REQ_BGL = False
10200
10201   def CheckArguments(self):
10202     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10203
10204     storage_type = self.op.storage_type
10205
10206     if (constants.SO_FIX_CONSISTENCY not in
10207         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10208       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10209                                  " repaired" % storage_type,
10210                                  errors.ECODE_INVAL)
10211
10212   def ExpandNames(self):
10213     self.needed_locks = {
10214       locking.LEVEL_NODE: [self.op.node_name],
10215       }
10216
10217   def _CheckFaultyDisks(self, instance, node_name):
10218     """Ensure faulty disks abort the opcode or at least warn."""
10219     try:
10220       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10221                                   node_name, True):
10222         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10223                                    " node '%s'" % (instance.name, node_name),
10224                                    errors.ECODE_STATE)
10225     except errors.OpPrereqError, err:
10226       if self.op.ignore_consistency:
10227         self.proc.LogWarning(str(err.args[0]))
10228       else:
10229         raise
10230
10231   def CheckPrereq(self):
10232     """Check prerequisites.
10233
10234     """
10235     # Check whether any instance on this node has faulty disks
10236     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10237       if not inst.admin_up:
10238         continue
10239       check_nodes = set(inst.all_nodes)
10240       check_nodes.discard(self.op.node_name)
10241       for inst_node_name in check_nodes:
10242         self._CheckFaultyDisks(inst, inst_node_name)
10243
10244   def Exec(self, feedback_fn):
10245     feedback_fn("Repairing storage unit '%s' on %s ..." %
10246                 (self.op.name, self.op.node_name))
10247
10248     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10249     result = self.rpc.call_storage_execute(self.op.node_name,
10250                                            self.op.storage_type, st_args,
10251                                            self.op.name,
10252                                            constants.SO_FIX_CONSISTENCY)
10253     result.Raise("Failed to repair storage unit '%s' on %s" %
10254                  (self.op.name, self.op.node_name))
10255
10256
10257 class LUNodeEvacuate(NoHooksLU):
10258   """Evacuates instances off a list of nodes.
10259
10260   """
10261   REQ_BGL = False
10262
10263   def CheckArguments(self):
10264     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10265
10266   def ExpandNames(self):
10267     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10268
10269     if self.op.remote_node is not None:
10270       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10271       assert self.op.remote_node
10272
10273       if self.op.remote_node == self.op.node_name:
10274         raise errors.OpPrereqError("Can not use evacuated node as a new"
10275                                    " secondary node", errors.ECODE_INVAL)
10276
10277       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10278         raise errors.OpPrereqError("Without the use of an iallocator only"
10279                                    " secondary instances can be evacuated",
10280                                    errors.ECODE_INVAL)
10281
10282     # Declare locks
10283     self.share_locks = _ShareAll()
10284     self.needed_locks = {
10285       locking.LEVEL_INSTANCE: [],
10286       locking.LEVEL_NODEGROUP: [],
10287       locking.LEVEL_NODE: [],
10288       }
10289
10290     if self.op.remote_node is None:
10291       # Iallocator will choose any node(s) in the same group
10292       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10293     else:
10294       group_nodes = frozenset([self.op.remote_node])
10295
10296     # Determine nodes to be locked
10297     self.lock_nodes = set([self.op.node_name]) | group_nodes
10298
10299   def _DetermineInstances(self):
10300     """Builds list of instances to operate on.
10301
10302     """
10303     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10304
10305     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10306       # Primary instances only
10307       inst_fn = _GetNodePrimaryInstances
10308       assert self.op.remote_node is None, \
10309         "Evacuating primary instances requires iallocator"
10310     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10311       # Secondary instances only
10312       inst_fn = _GetNodeSecondaryInstances
10313     else:
10314       # All instances
10315       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10316       inst_fn = _GetNodeInstances
10317
10318     return inst_fn(self.cfg, self.op.node_name)
10319
10320   def DeclareLocks(self, level):
10321     if level == locking.LEVEL_INSTANCE:
10322       # Lock instances optimistically, needs verification once node and group
10323       # locks have been acquired
10324       self.needed_locks[locking.LEVEL_INSTANCE] = \
10325         set(i.name for i in self._DetermineInstances())
10326
10327     elif level == locking.LEVEL_NODEGROUP:
10328       # Lock node groups optimistically, needs verification once nodes have
10329       # been acquired
10330       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10331         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10332
10333     elif level == locking.LEVEL_NODE:
10334       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10335
10336   def CheckPrereq(self):
10337     # Verify locks
10338     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10339     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10340     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10341
10342     assert owned_nodes == self.lock_nodes
10343
10344     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10345     if owned_groups != wanted_groups:
10346       raise errors.OpExecError("Node groups changed since locks were acquired,"
10347                                " current groups are '%s', used to be '%s'" %
10348                                (utils.CommaJoin(wanted_groups),
10349                                 utils.CommaJoin(owned_groups)))
10350
10351     # Determine affected instances
10352     self.instances = self._DetermineInstances()
10353     self.instance_names = [i.name for i in self.instances]
10354
10355     if set(self.instance_names) != owned_instances:
10356       raise errors.OpExecError("Instances on node '%s' changed since locks"
10357                                " were acquired, current instances are '%s',"
10358                                " used to be '%s'" %
10359                                (self.op.node_name,
10360                                 utils.CommaJoin(self.instance_names),
10361                                 utils.CommaJoin(owned_instances)))
10362
10363     if self.instance_names:
10364       self.LogInfo("Evacuating instances from node '%s': %s",
10365                    self.op.node_name,
10366                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10367     else:
10368       self.LogInfo("No instances to evacuate from node '%s'",
10369                    self.op.node_name)
10370
10371     if self.op.remote_node is not None:
10372       for i in self.instances:
10373         if i.primary_node == self.op.remote_node:
10374           raise errors.OpPrereqError("Node %s is the primary node of"
10375                                      " instance %s, cannot use it as"
10376                                      " secondary" %
10377                                      (self.op.remote_node, i.name),
10378                                      errors.ECODE_INVAL)
10379
10380   def Exec(self, feedback_fn):
10381     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10382
10383     if not self.instance_names:
10384       # No instances to evacuate
10385       jobs = []
10386
10387     elif self.op.iallocator is not None:
10388       # TODO: Implement relocation to other group
10389       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10390                        evac_mode=self.op.mode,
10391                        instances=list(self.instance_names))
10392
10393       ial.Run(self.op.iallocator)
10394
10395       if not ial.success:
10396         raise errors.OpPrereqError("Can't compute node evacuation using"
10397                                    " iallocator '%s': %s" %
10398                                    (self.op.iallocator, ial.info),
10399                                    errors.ECODE_NORES)
10400
10401       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10402
10403     elif self.op.remote_node is not None:
10404       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10405       jobs = [
10406         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10407                                         remote_node=self.op.remote_node,
10408                                         disks=[],
10409                                         mode=constants.REPLACE_DISK_CHG,
10410                                         early_release=self.op.early_release)]
10411         for instance_name in self.instance_names
10412         ]
10413
10414     else:
10415       raise errors.ProgrammerError("No iallocator or remote node")
10416
10417     return ResultWithJobs(jobs)
10418
10419
10420 def _SetOpEarlyRelease(early_release, op):
10421   """Sets C{early_release} flag on opcodes if available.
10422
10423   """
10424   try:
10425     op.early_release = early_release
10426   except AttributeError:
10427     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10428
10429   return op
10430
10431
10432 def _NodeEvacDest(use_nodes, group, nodes):
10433   """Returns group or nodes depending on caller's choice.
10434
10435   """
10436   if use_nodes:
10437     return utils.CommaJoin(nodes)
10438   else:
10439     return group
10440
10441
10442 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10443   """Unpacks the result of change-group and node-evacuate iallocator requests.
10444
10445   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10446   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10447
10448   @type lu: L{LogicalUnit}
10449   @param lu: Logical unit instance
10450   @type alloc_result: tuple/list
10451   @param alloc_result: Result from iallocator
10452   @type early_release: bool
10453   @param early_release: Whether to release locks early if possible
10454   @type use_nodes: bool
10455   @param use_nodes: Whether to display node names instead of groups
10456
10457   """
10458   (moved, failed, jobs) = alloc_result
10459
10460   if failed:
10461     lu.LogWarning("Unable to evacuate instances %s",
10462                   utils.CommaJoin("%s (%s)" % (name, reason)
10463                                   for (name, reason) in failed))
10464
10465   if moved:
10466     lu.LogInfo("Instances to be moved: %s",
10467                utils.CommaJoin("%s (to %s)" %
10468                                (name, _NodeEvacDest(use_nodes, group, nodes))
10469                                for (name, group, nodes) in moved))
10470
10471   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10472               map(opcodes.OpCode.LoadOpCode, ops))
10473           for ops in jobs]
10474
10475
10476 class LUInstanceGrowDisk(LogicalUnit):
10477   """Grow a disk of an instance.
10478
10479   """
10480   HPATH = "disk-grow"
10481   HTYPE = constants.HTYPE_INSTANCE
10482   REQ_BGL = False
10483
10484   def ExpandNames(self):
10485     self._ExpandAndLockInstance()
10486     self.needed_locks[locking.LEVEL_NODE] = []
10487     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10488
10489   def DeclareLocks(self, level):
10490     if level == locking.LEVEL_NODE:
10491       self._LockInstancesNodes()
10492
10493   def BuildHooksEnv(self):
10494     """Build hooks env.
10495
10496     This runs on the master, the primary and all the secondaries.
10497
10498     """
10499     env = {
10500       "DISK": self.op.disk,
10501       "AMOUNT": self.op.amount,
10502       }
10503     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10504     return env
10505
10506   def BuildHooksNodes(self):
10507     """Build hooks nodes.
10508
10509     """
10510     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10511     return (nl, nl)
10512
10513   def CheckPrereq(self):
10514     """Check prerequisites.
10515
10516     This checks that the instance is in the cluster.
10517
10518     """
10519     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10520     assert instance is not None, \
10521       "Cannot retrieve locked instance %s" % self.op.instance_name
10522     nodenames = list(instance.all_nodes)
10523     for node in nodenames:
10524       _CheckNodeOnline(self, node)
10525
10526     self.instance = instance
10527
10528     if instance.disk_template not in constants.DTS_GROWABLE:
10529       raise errors.OpPrereqError("Instance's disk layout does not support"
10530                                  " growing", errors.ECODE_INVAL)
10531
10532     self.disk = instance.FindDisk(self.op.disk)
10533
10534     if instance.disk_template not in (constants.DT_FILE,
10535                                       constants.DT_SHARED_FILE):
10536       # TODO: check the free disk space for file, when that feature will be
10537       # supported
10538       _CheckNodesFreeDiskPerVG(self, nodenames,
10539                                self.disk.ComputeGrowth(self.op.amount))
10540
10541   def Exec(self, feedback_fn):
10542     """Execute disk grow.
10543
10544     """
10545     instance = self.instance
10546     disk = self.disk
10547
10548     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10549     if not disks_ok:
10550       raise errors.OpExecError("Cannot activate block device to grow")
10551
10552     # First run all grow ops in dry-run mode
10553     for node in instance.all_nodes:
10554       self.cfg.SetDiskID(disk, node)
10555       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10556       result.Raise("Grow request failed to node %s" % node)
10557
10558     # We know that (as far as we can test) operations across different
10559     # nodes will succeed, time to run it for real
10560     for node in instance.all_nodes:
10561       self.cfg.SetDiskID(disk, node)
10562       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10563       result.Raise("Grow request failed to node %s" % node)
10564
10565       # TODO: Rewrite code to work properly
10566       # DRBD goes into sync mode for a short amount of time after executing the
10567       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10568       # calling "resize" in sync mode fails. Sleeping for a short amount of
10569       # time is a work-around.
10570       time.sleep(5)
10571
10572     disk.RecordGrow(self.op.amount)
10573     self.cfg.Update(instance, feedback_fn)
10574     if self.op.wait_for_sync:
10575       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10576       if disk_abort:
10577         self.proc.LogWarning("Disk sync-ing has not returned a good"
10578                              " status; please check the instance")
10579       if not instance.admin_up:
10580         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10581     elif not instance.admin_up:
10582       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10583                            " not supposed to be running because no wait for"
10584                            " sync mode was requested")
10585
10586
10587 class LUInstanceQueryData(NoHooksLU):
10588   """Query runtime instance data.
10589
10590   """
10591   REQ_BGL = False
10592
10593   def ExpandNames(self):
10594     self.needed_locks = {}
10595
10596     # Use locking if requested or when non-static information is wanted
10597     if not (self.op.static or self.op.use_locking):
10598       self.LogWarning("Non-static data requested, locks need to be acquired")
10599       self.op.use_locking = True
10600
10601     if self.op.instances or not self.op.use_locking:
10602       # Expand instance names right here
10603       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10604     else:
10605       # Will use acquired locks
10606       self.wanted_names = None
10607
10608     if self.op.use_locking:
10609       self.share_locks = _ShareAll()
10610
10611       if self.wanted_names is None:
10612         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10613       else:
10614         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10615
10616       self.needed_locks[locking.LEVEL_NODE] = []
10617       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10618
10619   def DeclareLocks(self, level):
10620     if self.op.use_locking and level == locking.LEVEL_NODE:
10621       self._LockInstancesNodes()
10622
10623   def CheckPrereq(self):
10624     """Check prerequisites.
10625
10626     This only checks the optional instance list against the existing names.
10627
10628     """
10629     if self.wanted_names is None:
10630       assert self.op.use_locking, "Locking was not used"
10631       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10632
10633     self.wanted_instances = \
10634         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10635
10636   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10637     """Returns the status of a block device
10638
10639     """
10640     if self.op.static or not node:
10641       return None
10642
10643     self.cfg.SetDiskID(dev, node)
10644
10645     result = self.rpc.call_blockdev_find(node, dev)
10646     if result.offline:
10647       return None
10648
10649     result.Raise("Can't compute disk status for %s" % instance_name)
10650
10651     status = result.payload
10652     if status is None:
10653       return None
10654
10655     return (status.dev_path, status.major, status.minor,
10656             status.sync_percent, status.estimated_time,
10657             status.is_degraded, status.ldisk_status)
10658
10659   def _ComputeDiskStatus(self, instance, snode, dev):
10660     """Compute block device status.
10661
10662     """
10663     if dev.dev_type in constants.LDS_DRBD:
10664       # we change the snode then (otherwise we use the one passed in)
10665       if dev.logical_id[0] == instance.primary_node:
10666         snode = dev.logical_id[1]
10667       else:
10668         snode = dev.logical_id[0]
10669
10670     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10671                                               instance.name, dev)
10672     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10673
10674     if dev.children:
10675       dev_children = map(compat.partial(self._ComputeDiskStatus,
10676                                         instance, snode),
10677                          dev.children)
10678     else:
10679       dev_children = []
10680
10681     return {
10682       "iv_name": dev.iv_name,
10683       "dev_type": dev.dev_type,
10684       "logical_id": dev.logical_id,
10685       "physical_id": dev.physical_id,
10686       "pstatus": dev_pstatus,
10687       "sstatus": dev_sstatus,
10688       "children": dev_children,
10689       "mode": dev.mode,
10690       "size": dev.size,
10691       }
10692
10693   def Exec(self, feedback_fn):
10694     """Gather and return data"""
10695     result = {}
10696
10697     cluster = self.cfg.GetClusterInfo()
10698
10699     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10700                                           for i in self.wanted_instances)
10701     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10702       if self.op.static or pnode.offline:
10703         remote_state = None
10704         if pnode.offline:
10705           self.LogWarning("Primary node %s is marked offline, returning static"
10706                           " information only for instance %s" %
10707                           (pnode.name, instance.name))
10708       else:
10709         remote_info = self.rpc.call_instance_info(instance.primary_node,
10710                                                   instance.name,
10711                                                   instance.hypervisor)
10712         remote_info.Raise("Error checking node %s" % instance.primary_node)
10713         remote_info = remote_info.payload
10714         if remote_info and "state" in remote_info:
10715           remote_state = "up"
10716         else:
10717           remote_state = "down"
10718
10719       if instance.admin_up:
10720         config_state = "up"
10721       else:
10722         config_state = "down"
10723
10724       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10725                   instance.disks)
10726
10727       result[instance.name] = {
10728         "name": instance.name,
10729         "config_state": config_state,
10730         "run_state": remote_state,
10731         "pnode": instance.primary_node,
10732         "snodes": instance.secondary_nodes,
10733         "os": instance.os,
10734         # this happens to be the same format used for hooks
10735         "nics": _NICListToTuple(self, instance.nics),
10736         "disk_template": instance.disk_template,
10737         "disks": disks,
10738         "hypervisor": instance.hypervisor,
10739         "network_port": instance.network_port,
10740         "hv_instance": instance.hvparams,
10741         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10742         "be_instance": instance.beparams,
10743         "be_actual": cluster.FillBE(instance),
10744         "os_instance": instance.osparams,
10745         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10746         "serial_no": instance.serial_no,
10747         "mtime": instance.mtime,
10748         "ctime": instance.ctime,
10749         "uuid": instance.uuid,
10750         }
10751
10752     return result
10753
10754
10755 class LUInstanceSetParams(LogicalUnit):
10756   """Modifies an instances's parameters.
10757
10758   """
10759   HPATH = "instance-modify"
10760   HTYPE = constants.HTYPE_INSTANCE
10761   REQ_BGL = False
10762
10763   def CheckArguments(self):
10764     if not (self.op.nics or self.op.disks or self.op.disk_template or
10765             self.op.hvparams or self.op.beparams or self.op.os_name):
10766       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10767
10768     if self.op.hvparams:
10769       _CheckGlobalHvParams(self.op.hvparams)
10770
10771     # Disk validation
10772     disk_addremove = 0
10773     for disk_op, disk_dict in self.op.disks:
10774       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10775       if disk_op == constants.DDM_REMOVE:
10776         disk_addremove += 1
10777         continue
10778       elif disk_op == constants.DDM_ADD:
10779         disk_addremove += 1
10780       else:
10781         if not isinstance(disk_op, int):
10782           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10783         if not isinstance(disk_dict, dict):
10784           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10785           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10786
10787       if disk_op == constants.DDM_ADD:
10788         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10789         if mode not in constants.DISK_ACCESS_SET:
10790           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10791                                      errors.ECODE_INVAL)
10792         size = disk_dict.get(constants.IDISK_SIZE, None)
10793         if size is None:
10794           raise errors.OpPrereqError("Required disk parameter size missing",
10795                                      errors.ECODE_INVAL)
10796         try:
10797           size = int(size)
10798         except (TypeError, ValueError), err:
10799           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10800                                      str(err), errors.ECODE_INVAL)
10801         disk_dict[constants.IDISK_SIZE] = size
10802       else:
10803         # modification of disk
10804         if constants.IDISK_SIZE in disk_dict:
10805           raise errors.OpPrereqError("Disk size change not possible, use"
10806                                      " grow-disk", errors.ECODE_INVAL)
10807
10808     if disk_addremove > 1:
10809       raise errors.OpPrereqError("Only one disk add or remove operation"
10810                                  " supported at a time", errors.ECODE_INVAL)
10811
10812     if self.op.disks and self.op.disk_template is not None:
10813       raise errors.OpPrereqError("Disk template conversion and other disk"
10814                                  " changes not supported at the same time",
10815                                  errors.ECODE_INVAL)
10816
10817     if (self.op.disk_template and
10818         self.op.disk_template in constants.DTS_INT_MIRROR and
10819         self.op.remote_node is None):
10820       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10821                                  " one requires specifying a secondary node",
10822                                  errors.ECODE_INVAL)
10823
10824     # NIC validation
10825     nic_addremove = 0
10826     for nic_op, nic_dict in self.op.nics:
10827       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10828       if nic_op == constants.DDM_REMOVE:
10829         nic_addremove += 1
10830         continue
10831       elif nic_op == constants.DDM_ADD:
10832         nic_addremove += 1
10833       else:
10834         if not isinstance(nic_op, int):
10835           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10836         if not isinstance(nic_dict, dict):
10837           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10838           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10839
10840       # nic_dict should be a dict
10841       nic_ip = nic_dict.get(constants.INIC_IP, None)
10842       if nic_ip is not None:
10843         if nic_ip.lower() == constants.VALUE_NONE:
10844           nic_dict[constants.INIC_IP] = None
10845         else:
10846           if not netutils.IPAddress.IsValid(nic_ip):
10847             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10848                                        errors.ECODE_INVAL)
10849
10850       nic_bridge = nic_dict.get("bridge", None)
10851       nic_link = nic_dict.get(constants.INIC_LINK, None)
10852       if nic_bridge and nic_link:
10853         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10854                                    " at the same time", errors.ECODE_INVAL)
10855       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10856         nic_dict["bridge"] = None
10857       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10858         nic_dict[constants.INIC_LINK] = None
10859
10860       if nic_op == constants.DDM_ADD:
10861         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10862         if nic_mac is None:
10863           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10864
10865       if constants.INIC_MAC in nic_dict:
10866         nic_mac = nic_dict[constants.INIC_MAC]
10867         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10868           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10869
10870         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10871           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10872                                      " modifying an existing nic",
10873                                      errors.ECODE_INVAL)
10874
10875     if nic_addremove > 1:
10876       raise errors.OpPrereqError("Only one NIC add or remove operation"
10877                                  " supported at a time", errors.ECODE_INVAL)
10878
10879   def ExpandNames(self):
10880     self._ExpandAndLockInstance()
10881     self.needed_locks[locking.LEVEL_NODE] = []
10882     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10883
10884   def DeclareLocks(self, level):
10885     if level == locking.LEVEL_NODE:
10886       self._LockInstancesNodes()
10887       if self.op.disk_template and self.op.remote_node:
10888         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10889         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10890
10891   def BuildHooksEnv(self):
10892     """Build hooks env.
10893
10894     This runs on the master, primary and secondaries.
10895
10896     """
10897     args = dict()
10898     if constants.BE_MEMORY in self.be_new:
10899       args["memory"] = self.be_new[constants.BE_MEMORY]
10900     if constants.BE_VCPUS in self.be_new:
10901       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10902     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10903     # information at all.
10904     if self.op.nics:
10905       args["nics"] = []
10906       nic_override = dict(self.op.nics)
10907       for idx, nic in enumerate(self.instance.nics):
10908         if idx in nic_override:
10909           this_nic_override = nic_override[idx]
10910         else:
10911           this_nic_override = {}
10912         if constants.INIC_IP in this_nic_override:
10913           ip = this_nic_override[constants.INIC_IP]
10914         else:
10915           ip = nic.ip
10916         if constants.INIC_MAC in this_nic_override:
10917           mac = this_nic_override[constants.INIC_MAC]
10918         else:
10919           mac = nic.mac
10920         if idx in self.nic_pnew:
10921           nicparams = self.nic_pnew[idx]
10922         else:
10923           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10924         mode = nicparams[constants.NIC_MODE]
10925         link = nicparams[constants.NIC_LINK]
10926         args["nics"].append((ip, mac, mode, link))
10927       if constants.DDM_ADD in nic_override:
10928         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10929         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10930         nicparams = self.nic_pnew[constants.DDM_ADD]
10931         mode = nicparams[constants.NIC_MODE]
10932         link = nicparams[constants.NIC_LINK]
10933         args["nics"].append((ip, mac, mode, link))
10934       elif constants.DDM_REMOVE in nic_override:
10935         del args["nics"][-1]
10936
10937     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10938     if self.op.disk_template:
10939       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10940
10941     return env
10942
10943   def BuildHooksNodes(self):
10944     """Build hooks nodes.
10945
10946     """
10947     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10948     return (nl, nl)
10949
10950   def CheckPrereq(self):
10951     """Check prerequisites.
10952
10953     This only checks the instance list against the existing names.
10954
10955     """
10956     # checking the new params on the primary/secondary nodes
10957
10958     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10959     cluster = self.cluster = self.cfg.GetClusterInfo()
10960     assert self.instance is not None, \
10961       "Cannot retrieve locked instance %s" % self.op.instance_name
10962     pnode = instance.primary_node
10963     nodelist = list(instance.all_nodes)
10964
10965     # OS change
10966     if self.op.os_name and not self.op.force:
10967       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10968                       self.op.force_variant)
10969       instance_os = self.op.os_name
10970     else:
10971       instance_os = instance.os
10972
10973     if self.op.disk_template:
10974       if instance.disk_template == self.op.disk_template:
10975         raise errors.OpPrereqError("Instance already has disk template %s" %
10976                                    instance.disk_template, errors.ECODE_INVAL)
10977
10978       if (instance.disk_template,
10979           self.op.disk_template) not in self._DISK_CONVERSIONS:
10980         raise errors.OpPrereqError("Unsupported disk template conversion from"
10981                                    " %s to %s" % (instance.disk_template,
10982                                                   self.op.disk_template),
10983                                    errors.ECODE_INVAL)
10984       _CheckInstanceDown(self, instance, "cannot change disk template")
10985       if self.op.disk_template in constants.DTS_INT_MIRROR:
10986         if self.op.remote_node == pnode:
10987           raise errors.OpPrereqError("Given new secondary node %s is the same"
10988                                      " as the primary node of the instance" %
10989                                      self.op.remote_node, errors.ECODE_STATE)
10990         _CheckNodeOnline(self, self.op.remote_node)
10991         _CheckNodeNotDrained(self, self.op.remote_node)
10992         # FIXME: here we assume that the old instance type is DT_PLAIN
10993         assert instance.disk_template == constants.DT_PLAIN
10994         disks = [{constants.IDISK_SIZE: d.size,
10995                   constants.IDISK_VG: d.logical_id[0]}
10996                  for d in instance.disks]
10997         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10998         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10999
11000     # hvparams processing
11001     if self.op.hvparams:
11002       hv_type = instance.hypervisor
11003       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11004       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11005       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11006
11007       # local check
11008       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11009       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11010       self.hv_proposed = self.hv_new = hv_new # the new actual values
11011       self.hv_inst = i_hvdict # the new dict (without defaults)
11012     else:
11013       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11014                                               instance.hvparams)
11015       self.hv_new = self.hv_inst = {}
11016
11017     # beparams processing
11018     if self.op.beparams:
11019       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11020                                    use_none=True)
11021       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11022       be_new = cluster.SimpleFillBE(i_bedict)
11023       self.be_proposed = self.be_new = be_new # the new actual values
11024       self.be_inst = i_bedict # the new dict (without defaults)
11025     else:
11026       self.be_new = self.be_inst = {}
11027       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11028     be_old = cluster.FillBE(instance)
11029
11030     # CPU param validation -- checking every time a paramtere is
11031     # changed to cover all cases where either CPU mask or vcpus have
11032     # changed
11033     if (constants.BE_VCPUS in self.be_proposed and
11034         constants.HV_CPU_MASK in self.hv_proposed):
11035       cpu_list = \
11036         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11037       # Verify mask is consistent with number of vCPUs. Can skip this
11038       # test if only 1 entry in the CPU mask, which means same mask
11039       # is applied to all vCPUs.
11040       if (len(cpu_list) > 1 and
11041           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11042         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11043                                    " CPU mask [%s]" %
11044                                    (self.be_proposed[constants.BE_VCPUS],
11045                                     self.hv_proposed[constants.HV_CPU_MASK]),
11046                                    errors.ECODE_INVAL)
11047
11048       # Only perform this test if a new CPU mask is given
11049       if constants.HV_CPU_MASK in self.hv_new:
11050         # Calculate the largest CPU number requested
11051         max_requested_cpu = max(map(max, cpu_list))
11052         # Check that all of the instance's nodes have enough physical CPUs to
11053         # satisfy the requested CPU mask
11054         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11055                                 max_requested_cpu + 1, instance.hypervisor)
11056
11057     # osparams processing
11058     if self.op.osparams:
11059       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11060       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11061       self.os_inst = i_osdict # the new dict (without defaults)
11062     else:
11063       self.os_inst = {}
11064
11065     self.warn = []
11066
11067     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11068         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11069       mem_check_list = [pnode]
11070       if be_new[constants.BE_AUTO_BALANCE]:
11071         # either we changed auto_balance to yes or it was from before
11072         mem_check_list.extend(instance.secondary_nodes)
11073       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11074                                                   instance.hypervisor)
11075       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11076                                          instance.hypervisor)
11077       pninfo = nodeinfo[pnode]
11078       msg = pninfo.fail_msg
11079       if msg:
11080         # Assume the primary node is unreachable and go ahead
11081         self.warn.append("Can't get info from primary node %s: %s" %
11082                          (pnode, msg))
11083       elif not isinstance(pninfo.payload.get("memory_free", None), int):
11084         self.warn.append("Node data from primary node %s doesn't contain"
11085                          " free memory information" % pnode)
11086       elif instance_info.fail_msg:
11087         self.warn.append("Can't get instance runtime information: %s" %
11088                         instance_info.fail_msg)
11089       else:
11090         if instance_info.payload:
11091           current_mem = int(instance_info.payload["memory"])
11092         else:
11093           # Assume instance not running
11094           # (there is a slight race condition here, but it's not very probable,
11095           # and we have no other way to check)
11096           current_mem = 0
11097         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11098                     pninfo.payload["memory_free"])
11099         if miss_mem > 0:
11100           raise errors.OpPrereqError("This change will prevent the instance"
11101                                      " from starting, due to %d MB of memory"
11102                                      " missing on its primary node" % miss_mem,
11103                                      errors.ECODE_NORES)
11104
11105       if be_new[constants.BE_AUTO_BALANCE]:
11106         for node, nres in nodeinfo.items():
11107           if node not in instance.secondary_nodes:
11108             continue
11109           nres.Raise("Can't get info from secondary node %s" % node,
11110                      prereq=True, ecode=errors.ECODE_STATE)
11111           if not isinstance(nres.payload.get("memory_free", None), int):
11112             raise errors.OpPrereqError("Secondary node %s didn't return free"
11113                                        " memory information" % node,
11114                                        errors.ECODE_STATE)
11115           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11116             raise errors.OpPrereqError("This change will prevent the instance"
11117                                        " from failover to its secondary node"
11118                                        " %s, due to not enough memory" % node,
11119                                        errors.ECODE_STATE)
11120
11121     # NIC processing
11122     self.nic_pnew = {}
11123     self.nic_pinst = {}
11124     for nic_op, nic_dict in self.op.nics:
11125       if nic_op == constants.DDM_REMOVE:
11126         if not instance.nics:
11127           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11128                                      errors.ECODE_INVAL)
11129         continue
11130       if nic_op != constants.DDM_ADD:
11131         # an existing nic
11132         if not instance.nics:
11133           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11134                                      " no NICs" % nic_op,
11135                                      errors.ECODE_INVAL)
11136         if nic_op < 0 or nic_op >= len(instance.nics):
11137           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11138                                      " are 0 to %d" %
11139                                      (nic_op, len(instance.nics) - 1),
11140                                      errors.ECODE_INVAL)
11141         old_nic_params = instance.nics[nic_op].nicparams
11142         old_nic_ip = instance.nics[nic_op].ip
11143       else:
11144         old_nic_params = {}
11145         old_nic_ip = None
11146
11147       update_params_dict = dict([(key, nic_dict[key])
11148                                  for key in constants.NICS_PARAMETERS
11149                                  if key in nic_dict])
11150
11151       if "bridge" in nic_dict:
11152         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11153
11154       new_nic_params = _GetUpdatedParams(old_nic_params,
11155                                          update_params_dict)
11156       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11157       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11158       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11159       self.nic_pinst[nic_op] = new_nic_params
11160       self.nic_pnew[nic_op] = new_filled_nic_params
11161       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11162
11163       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11164         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11165         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11166         if msg:
11167           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11168           if self.op.force:
11169             self.warn.append(msg)
11170           else:
11171             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11172       if new_nic_mode == constants.NIC_MODE_ROUTED:
11173         if constants.INIC_IP in nic_dict:
11174           nic_ip = nic_dict[constants.INIC_IP]
11175         else:
11176           nic_ip = old_nic_ip
11177         if nic_ip is None:
11178           raise errors.OpPrereqError("Cannot set the nic ip to None"
11179                                      " on a routed nic", errors.ECODE_INVAL)
11180       if constants.INIC_MAC in nic_dict:
11181         nic_mac = nic_dict[constants.INIC_MAC]
11182         if nic_mac is None:
11183           raise errors.OpPrereqError("Cannot set the nic mac to None",
11184                                      errors.ECODE_INVAL)
11185         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11186           # otherwise generate the mac
11187           nic_dict[constants.INIC_MAC] = \
11188             self.cfg.GenerateMAC(self.proc.GetECId())
11189         else:
11190           # or validate/reserve the current one
11191           try:
11192             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11193           except errors.ReservationError:
11194             raise errors.OpPrereqError("MAC address %s already in use"
11195                                        " in cluster" % nic_mac,
11196                                        errors.ECODE_NOTUNIQUE)
11197
11198     # DISK processing
11199     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11200       raise errors.OpPrereqError("Disk operations not supported for"
11201                                  " diskless instances",
11202                                  errors.ECODE_INVAL)
11203     for disk_op, _ in self.op.disks:
11204       if disk_op == constants.DDM_REMOVE:
11205         if len(instance.disks) == 1:
11206           raise errors.OpPrereqError("Cannot remove the last disk of"
11207                                      " an instance", errors.ECODE_INVAL)
11208         _CheckInstanceDown(self, instance, "cannot remove disks")
11209
11210       if (disk_op == constants.DDM_ADD and
11211           len(instance.disks) >= constants.MAX_DISKS):
11212         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11213                                    " add more" % constants.MAX_DISKS,
11214                                    errors.ECODE_STATE)
11215       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11216         # an existing disk
11217         if disk_op < 0 or disk_op >= len(instance.disks):
11218           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11219                                      " are 0 to %d" %
11220                                      (disk_op, len(instance.disks)),
11221                                      errors.ECODE_INVAL)
11222
11223     return
11224
11225   def _ConvertPlainToDrbd(self, feedback_fn):
11226     """Converts an instance from plain to drbd.
11227
11228     """
11229     feedback_fn("Converting template to drbd")
11230     instance = self.instance
11231     pnode = instance.primary_node
11232     snode = self.op.remote_node
11233
11234     # create a fake disk info for _GenerateDiskTemplate
11235     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11236                   constants.IDISK_VG: d.logical_id[0]}
11237                  for d in instance.disks]
11238     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11239                                       instance.name, pnode, [snode],
11240                                       disk_info, None, None, 0, feedback_fn)
11241     info = _GetInstanceInfoText(instance)
11242     feedback_fn("Creating aditional volumes...")
11243     # first, create the missing data and meta devices
11244     for disk in new_disks:
11245       # unfortunately this is... not too nice
11246       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11247                             info, True)
11248       for child in disk.children:
11249         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11250     # at this stage, all new LVs have been created, we can rename the
11251     # old ones
11252     feedback_fn("Renaming original volumes...")
11253     rename_list = [(o, n.children[0].logical_id)
11254                    for (o, n) in zip(instance.disks, new_disks)]
11255     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11256     result.Raise("Failed to rename original LVs")
11257
11258     feedback_fn("Initializing DRBD devices...")
11259     # all child devices are in place, we can now create the DRBD devices
11260     for disk in new_disks:
11261       for node in [pnode, snode]:
11262         f_create = node == pnode
11263         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11264
11265     # at this point, the instance has been modified
11266     instance.disk_template = constants.DT_DRBD8
11267     instance.disks = new_disks
11268     self.cfg.Update(instance, feedback_fn)
11269
11270     # disks are created, waiting for sync
11271     disk_abort = not _WaitForSync(self, instance,
11272                                   oneshot=not self.op.wait_for_sync)
11273     if disk_abort:
11274       raise errors.OpExecError("There are some degraded disks for"
11275                                " this instance, please cleanup manually")
11276
11277   def _ConvertDrbdToPlain(self, feedback_fn):
11278     """Converts an instance from drbd to plain.
11279
11280     """
11281     instance = self.instance
11282     assert len(instance.secondary_nodes) == 1
11283     pnode = instance.primary_node
11284     snode = instance.secondary_nodes[0]
11285     feedback_fn("Converting template to plain")
11286
11287     old_disks = instance.disks
11288     new_disks = [d.children[0] for d in old_disks]
11289
11290     # copy over size and mode
11291     for parent, child in zip(old_disks, new_disks):
11292       child.size = parent.size
11293       child.mode = parent.mode
11294
11295     # update instance structure
11296     instance.disks = new_disks
11297     instance.disk_template = constants.DT_PLAIN
11298     self.cfg.Update(instance, feedback_fn)
11299
11300     feedback_fn("Removing volumes on the secondary node...")
11301     for disk in old_disks:
11302       self.cfg.SetDiskID(disk, snode)
11303       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11304       if msg:
11305         self.LogWarning("Could not remove block device %s on node %s,"
11306                         " continuing anyway: %s", disk.iv_name, snode, msg)
11307
11308     feedback_fn("Removing unneeded volumes on the primary node...")
11309     for idx, disk in enumerate(old_disks):
11310       meta = disk.children[1]
11311       self.cfg.SetDiskID(meta, pnode)
11312       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11313       if msg:
11314         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11315                         " continuing anyway: %s", idx, pnode, msg)
11316
11317   def Exec(self, feedback_fn):
11318     """Modifies an instance.
11319
11320     All parameters take effect only at the next restart of the instance.
11321
11322     """
11323     # Process here the warnings from CheckPrereq, as we don't have a
11324     # feedback_fn there.
11325     for warn in self.warn:
11326       feedback_fn("WARNING: %s" % warn)
11327
11328     result = []
11329     instance = self.instance
11330     # disk changes
11331     for disk_op, disk_dict in self.op.disks:
11332       if disk_op == constants.DDM_REMOVE:
11333         # remove the last disk
11334         device = instance.disks.pop()
11335         device_idx = len(instance.disks)
11336         for node, disk in device.ComputeNodeTree(instance.primary_node):
11337           self.cfg.SetDiskID(disk, node)
11338           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11339           if msg:
11340             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11341                             " continuing anyway", device_idx, node, msg)
11342         result.append(("disk/%d" % device_idx, "remove"))
11343       elif disk_op == constants.DDM_ADD:
11344         # add a new disk
11345         if instance.disk_template in (constants.DT_FILE,
11346                                         constants.DT_SHARED_FILE):
11347           file_driver, file_path = instance.disks[0].logical_id
11348           file_path = os.path.dirname(file_path)
11349         else:
11350           file_driver = file_path = None
11351         disk_idx_base = len(instance.disks)
11352         new_disk = _GenerateDiskTemplate(self,
11353                                          instance.disk_template,
11354                                          instance.name, instance.primary_node,
11355                                          instance.secondary_nodes,
11356                                          [disk_dict],
11357                                          file_path,
11358                                          file_driver,
11359                                          disk_idx_base, feedback_fn)[0]
11360         instance.disks.append(new_disk)
11361         info = _GetInstanceInfoText(instance)
11362
11363         logging.info("Creating volume %s for instance %s",
11364                      new_disk.iv_name, instance.name)
11365         # Note: this needs to be kept in sync with _CreateDisks
11366         #HARDCODE
11367         for node in instance.all_nodes:
11368           f_create = node == instance.primary_node
11369           try:
11370             _CreateBlockDev(self, node, instance, new_disk,
11371                             f_create, info, f_create)
11372           except errors.OpExecError, err:
11373             self.LogWarning("Failed to create volume %s (%s) on"
11374                             " node %s: %s",
11375                             new_disk.iv_name, new_disk, node, err)
11376         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11377                        (new_disk.size, new_disk.mode)))
11378       else:
11379         # change a given disk
11380         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11381         result.append(("disk.mode/%d" % disk_op,
11382                        disk_dict[constants.IDISK_MODE]))
11383
11384     if self.op.disk_template:
11385       r_shut = _ShutdownInstanceDisks(self, instance)
11386       if not r_shut:
11387         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11388                                  " proceed with disk template conversion")
11389       mode = (instance.disk_template, self.op.disk_template)
11390       try:
11391         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11392       except:
11393         self.cfg.ReleaseDRBDMinors(instance.name)
11394         raise
11395       result.append(("disk_template", self.op.disk_template))
11396
11397     # NIC changes
11398     for nic_op, nic_dict in self.op.nics:
11399       if nic_op == constants.DDM_REMOVE:
11400         # remove the last nic
11401         del instance.nics[-1]
11402         result.append(("nic.%d" % len(instance.nics), "remove"))
11403       elif nic_op == constants.DDM_ADD:
11404         # mac and bridge should be set, by now
11405         mac = nic_dict[constants.INIC_MAC]
11406         ip = nic_dict.get(constants.INIC_IP, None)
11407         nicparams = self.nic_pinst[constants.DDM_ADD]
11408         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11409         instance.nics.append(new_nic)
11410         result.append(("nic.%d" % (len(instance.nics) - 1),
11411                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11412                        (new_nic.mac, new_nic.ip,
11413                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11414                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11415                        )))
11416       else:
11417         for key in (constants.INIC_MAC, constants.INIC_IP):
11418           if key in nic_dict:
11419             setattr(instance.nics[nic_op], key, nic_dict[key])
11420         if nic_op in self.nic_pinst:
11421           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11422         for key, val in nic_dict.iteritems():
11423           result.append(("nic.%s/%d" % (key, nic_op), val))
11424
11425     # hvparams changes
11426     if self.op.hvparams:
11427       instance.hvparams = self.hv_inst
11428       for key, val in self.op.hvparams.iteritems():
11429         result.append(("hv/%s" % key, val))
11430
11431     # beparams changes
11432     if self.op.beparams:
11433       instance.beparams = self.be_inst
11434       for key, val in self.op.beparams.iteritems():
11435         result.append(("be/%s" % key, val))
11436
11437     # OS change
11438     if self.op.os_name:
11439       instance.os = self.op.os_name
11440
11441     # osparams changes
11442     if self.op.osparams:
11443       instance.osparams = self.os_inst
11444       for key, val in self.op.osparams.iteritems():
11445         result.append(("os/%s" % key, val))
11446
11447     self.cfg.Update(instance, feedback_fn)
11448
11449     return result
11450
11451   _DISK_CONVERSIONS = {
11452     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11453     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11454     }
11455
11456
11457 class LUInstanceChangeGroup(LogicalUnit):
11458   HPATH = "instance-change-group"
11459   HTYPE = constants.HTYPE_INSTANCE
11460   REQ_BGL = False
11461
11462   def ExpandNames(self):
11463     self.share_locks = _ShareAll()
11464     self.needed_locks = {
11465       locking.LEVEL_NODEGROUP: [],
11466       locking.LEVEL_NODE: [],
11467       }
11468
11469     self._ExpandAndLockInstance()
11470
11471     if self.op.target_groups:
11472       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11473                                   self.op.target_groups)
11474     else:
11475       self.req_target_uuids = None
11476
11477     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11478
11479   def DeclareLocks(self, level):
11480     if level == locking.LEVEL_NODEGROUP:
11481       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11482
11483       if self.req_target_uuids:
11484         lock_groups = set(self.req_target_uuids)
11485
11486         # Lock all groups used by instance optimistically; this requires going
11487         # via the node before it's locked, requiring verification later on
11488         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11489         lock_groups.update(instance_groups)
11490       else:
11491         # No target groups, need to lock all of them
11492         lock_groups = locking.ALL_SET
11493
11494       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11495
11496     elif level == locking.LEVEL_NODE:
11497       if self.req_target_uuids:
11498         # Lock all nodes used by instances
11499         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11500         self._LockInstancesNodes()
11501
11502         # Lock all nodes in all potential target groups
11503         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11504                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11505         member_nodes = [node_name
11506                         for group in lock_groups
11507                         for node_name in self.cfg.GetNodeGroup(group).members]
11508         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11509       else:
11510         # Lock all nodes as all groups are potential targets
11511         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11512
11513   def CheckPrereq(self):
11514     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11515     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11516     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11517
11518     assert (self.req_target_uuids is None or
11519             owned_groups.issuperset(self.req_target_uuids))
11520     assert owned_instances == set([self.op.instance_name])
11521
11522     # Get instance information
11523     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11524
11525     # Check if node groups for locked instance are still correct
11526     assert owned_nodes.issuperset(self.instance.all_nodes), \
11527       ("Instance %s's nodes changed while we kept the lock" %
11528        self.op.instance_name)
11529
11530     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11531                                            owned_groups)
11532
11533     if self.req_target_uuids:
11534       # User requested specific target groups
11535       self.target_uuids = self.req_target_uuids
11536     else:
11537       # All groups except those used by the instance are potential targets
11538       self.target_uuids = owned_groups - inst_groups
11539
11540     conflicting_groups = self.target_uuids & inst_groups
11541     if conflicting_groups:
11542       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11543                                  " used by the instance '%s'" %
11544                                  (utils.CommaJoin(conflicting_groups),
11545                                   self.op.instance_name),
11546                                  errors.ECODE_INVAL)
11547
11548     if not self.target_uuids:
11549       raise errors.OpPrereqError("There are no possible target groups",
11550                                  errors.ECODE_INVAL)
11551
11552   def BuildHooksEnv(self):
11553     """Build hooks env.
11554
11555     """
11556     assert self.target_uuids
11557
11558     env = {
11559       "TARGET_GROUPS": " ".join(self.target_uuids),
11560       }
11561
11562     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11563
11564     return env
11565
11566   def BuildHooksNodes(self):
11567     """Build hooks nodes.
11568
11569     """
11570     mn = self.cfg.GetMasterNode()
11571     return ([mn], [mn])
11572
11573   def Exec(self, feedback_fn):
11574     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11575
11576     assert instances == [self.op.instance_name], "Instance not locked"
11577
11578     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11579                      instances=instances, target_groups=list(self.target_uuids))
11580
11581     ial.Run(self.op.iallocator)
11582
11583     if not ial.success:
11584       raise errors.OpPrereqError("Can't compute solution for changing group of"
11585                                  " instance '%s' using iallocator '%s': %s" %
11586                                  (self.op.instance_name, self.op.iallocator,
11587                                   ial.info),
11588                                  errors.ECODE_NORES)
11589
11590     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11591
11592     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11593                  " instance '%s'", len(jobs), self.op.instance_name)
11594
11595     return ResultWithJobs(jobs)
11596
11597
11598 class LUBackupQuery(NoHooksLU):
11599   """Query the exports list
11600
11601   """
11602   REQ_BGL = False
11603
11604   def ExpandNames(self):
11605     self.needed_locks = {}
11606     self.share_locks[locking.LEVEL_NODE] = 1
11607     if not self.op.nodes:
11608       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11609     else:
11610       self.needed_locks[locking.LEVEL_NODE] = \
11611         _GetWantedNodes(self, self.op.nodes)
11612
11613   def Exec(self, feedback_fn):
11614     """Compute the list of all the exported system images.
11615
11616     @rtype: dict
11617     @return: a dictionary with the structure node->(export-list)
11618         where export-list is a list of the instances exported on
11619         that node.
11620
11621     """
11622     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11623     rpcresult = self.rpc.call_export_list(self.nodes)
11624     result = {}
11625     for node in rpcresult:
11626       if rpcresult[node].fail_msg:
11627         result[node] = False
11628       else:
11629         result[node] = rpcresult[node].payload
11630
11631     return result
11632
11633
11634 class LUBackupPrepare(NoHooksLU):
11635   """Prepares an instance for an export and returns useful information.
11636
11637   """
11638   REQ_BGL = False
11639
11640   def ExpandNames(self):
11641     self._ExpandAndLockInstance()
11642
11643   def CheckPrereq(self):
11644     """Check prerequisites.
11645
11646     """
11647     instance_name = self.op.instance_name
11648
11649     self.instance = self.cfg.GetInstanceInfo(instance_name)
11650     assert self.instance is not None, \
11651           "Cannot retrieve locked instance %s" % self.op.instance_name
11652     _CheckNodeOnline(self, self.instance.primary_node)
11653
11654     self._cds = _GetClusterDomainSecret()
11655
11656   def Exec(self, feedback_fn):
11657     """Prepares an instance for an export.
11658
11659     """
11660     instance = self.instance
11661
11662     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11663       salt = utils.GenerateSecret(8)
11664
11665       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11666       result = self.rpc.call_x509_cert_create(instance.primary_node,
11667                                               constants.RIE_CERT_VALIDITY)
11668       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11669
11670       (name, cert_pem) = result.payload
11671
11672       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11673                                              cert_pem)
11674
11675       return {
11676         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11677         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11678                           salt),
11679         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11680         }
11681
11682     return None
11683
11684
11685 class LUBackupExport(LogicalUnit):
11686   """Export an instance to an image in the cluster.
11687
11688   """
11689   HPATH = "instance-export"
11690   HTYPE = constants.HTYPE_INSTANCE
11691   REQ_BGL = False
11692
11693   def CheckArguments(self):
11694     """Check the arguments.
11695
11696     """
11697     self.x509_key_name = self.op.x509_key_name
11698     self.dest_x509_ca_pem = self.op.destination_x509_ca
11699
11700     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11701       if not self.x509_key_name:
11702         raise errors.OpPrereqError("Missing X509 key name for encryption",
11703                                    errors.ECODE_INVAL)
11704
11705       if not self.dest_x509_ca_pem:
11706         raise errors.OpPrereqError("Missing destination X509 CA",
11707                                    errors.ECODE_INVAL)
11708
11709   def ExpandNames(self):
11710     self._ExpandAndLockInstance()
11711
11712     # Lock all nodes for local exports
11713     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11714       # FIXME: lock only instance primary and destination node
11715       #
11716       # Sad but true, for now we have do lock all nodes, as we don't know where
11717       # the previous export might be, and in this LU we search for it and
11718       # remove it from its current node. In the future we could fix this by:
11719       #  - making a tasklet to search (share-lock all), then create the
11720       #    new one, then one to remove, after
11721       #  - removing the removal operation altogether
11722       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11723
11724   def DeclareLocks(self, level):
11725     """Last minute lock declaration."""
11726     # All nodes are locked anyway, so nothing to do here.
11727
11728   def BuildHooksEnv(self):
11729     """Build hooks env.
11730
11731     This will run on the master, primary node and target node.
11732
11733     """
11734     env = {
11735       "EXPORT_MODE": self.op.mode,
11736       "EXPORT_NODE": self.op.target_node,
11737       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11738       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11739       # TODO: Generic function for boolean env variables
11740       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11741       }
11742
11743     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11744
11745     return env
11746
11747   def BuildHooksNodes(self):
11748     """Build hooks nodes.
11749
11750     """
11751     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11752
11753     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11754       nl.append(self.op.target_node)
11755
11756     return (nl, nl)
11757
11758   def CheckPrereq(self):
11759     """Check prerequisites.
11760
11761     This checks that the instance and node names are valid.
11762
11763     """
11764     instance_name = self.op.instance_name
11765
11766     self.instance = self.cfg.GetInstanceInfo(instance_name)
11767     assert self.instance is not None, \
11768           "Cannot retrieve locked instance %s" % self.op.instance_name
11769     _CheckNodeOnline(self, self.instance.primary_node)
11770
11771     if (self.op.remove_instance and self.instance.admin_up and
11772         not self.op.shutdown):
11773       raise errors.OpPrereqError("Can not remove instance without shutting it"
11774                                  " down before")
11775
11776     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11777       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11778       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11779       assert self.dst_node is not None
11780
11781       _CheckNodeOnline(self, self.dst_node.name)
11782       _CheckNodeNotDrained(self, self.dst_node.name)
11783
11784       self._cds = None
11785       self.dest_disk_info = None
11786       self.dest_x509_ca = None
11787
11788     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11789       self.dst_node = None
11790
11791       if len(self.op.target_node) != len(self.instance.disks):
11792         raise errors.OpPrereqError(("Received destination information for %s"
11793                                     " disks, but instance %s has %s disks") %
11794                                    (len(self.op.target_node), instance_name,
11795                                     len(self.instance.disks)),
11796                                    errors.ECODE_INVAL)
11797
11798       cds = _GetClusterDomainSecret()
11799
11800       # Check X509 key name
11801       try:
11802         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11803       except (TypeError, ValueError), err:
11804         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11805
11806       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11807         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11808                                    errors.ECODE_INVAL)
11809
11810       # Load and verify CA
11811       try:
11812         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11813       except OpenSSL.crypto.Error, err:
11814         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11815                                    (err, ), errors.ECODE_INVAL)
11816
11817       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11818       if errcode is not None:
11819         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11820                                    (msg, ), errors.ECODE_INVAL)
11821
11822       self.dest_x509_ca = cert
11823
11824       # Verify target information
11825       disk_info = []
11826       for idx, disk_data in enumerate(self.op.target_node):
11827         try:
11828           (host, port, magic) = \
11829             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11830         except errors.GenericError, err:
11831           raise errors.OpPrereqError("Target info for disk %s: %s" %
11832                                      (idx, err), errors.ECODE_INVAL)
11833
11834         disk_info.append((host, port, magic))
11835
11836       assert len(disk_info) == len(self.op.target_node)
11837       self.dest_disk_info = disk_info
11838
11839     else:
11840       raise errors.ProgrammerError("Unhandled export mode %r" %
11841                                    self.op.mode)
11842
11843     # instance disk type verification
11844     # TODO: Implement export support for file-based disks
11845     for disk in self.instance.disks:
11846       if disk.dev_type == constants.LD_FILE:
11847         raise errors.OpPrereqError("Export not supported for instances with"
11848                                    " file-based disks", errors.ECODE_INVAL)
11849
11850   def _CleanupExports(self, feedback_fn):
11851     """Removes exports of current instance from all other nodes.
11852
11853     If an instance in a cluster with nodes A..D was exported to node C, its
11854     exports will be removed from the nodes A, B and D.
11855
11856     """
11857     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11858
11859     nodelist = self.cfg.GetNodeList()
11860     nodelist.remove(self.dst_node.name)
11861
11862     # on one-node clusters nodelist will be empty after the removal
11863     # if we proceed the backup would be removed because OpBackupQuery
11864     # substitutes an empty list with the full cluster node list.
11865     iname = self.instance.name
11866     if nodelist:
11867       feedback_fn("Removing old exports for instance %s" % iname)
11868       exportlist = self.rpc.call_export_list(nodelist)
11869       for node in exportlist:
11870         if exportlist[node].fail_msg:
11871           continue
11872         if iname in exportlist[node].payload:
11873           msg = self.rpc.call_export_remove(node, iname).fail_msg
11874           if msg:
11875             self.LogWarning("Could not remove older export for instance %s"
11876                             " on node %s: %s", iname, node, msg)
11877
11878   def Exec(self, feedback_fn):
11879     """Export an instance to an image in the cluster.
11880
11881     """
11882     assert self.op.mode in constants.EXPORT_MODES
11883
11884     instance = self.instance
11885     src_node = instance.primary_node
11886
11887     if self.op.shutdown:
11888       # shutdown the instance, but not the disks
11889       feedback_fn("Shutting down instance %s" % instance.name)
11890       result = self.rpc.call_instance_shutdown(src_node, instance,
11891                                                self.op.shutdown_timeout)
11892       # TODO: Maybe ignore failures if ignore_remove_failures is set
11893       result.Raise("Could not shutdown instance %s on"
11894                    " node %s" % (instance.name, src_node))
11895
11896     # set the disks ID correctly since call_instance_start needs the
11897     # correct drbd minor to create the symlinks
11898     for disk in instance.disks:
11899       self.cfg.SetDiskID(disk, src_node)
11900
11901     activate_disks = (not instance.admin_up)
11902
11903     if activate_disks:
11904       # Activate the instance disks if we'exporting a stopped instance
11905       feedback_fn("Activating disks for %s" % instance.name)
11906       _StartInstanceDisks(self, instance, None)
11907
11908     try:
11909       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11910                                                      instance)
11911
11912       helper.CreateSnapshots()
11913       try:
11914         if (self.op.shutdown and instance.admin_up and
11915             not self.op.remove_instance):
11916           assert not activate_disks
11917           feedback_fn("Starting instance %s" % instance.name)
11918           result = self.rpc.call_instance_start(src_node,
11919                                                 (instance, None, None), False)
11920           msg = result.fail_msg
11921           if msg:
11922             feedback_fn("Failed to start instance: %s" % msg)
11923             _ShutdownInstanceDisks(self, instance)
11924             raise errors.OpExecError("Could not start instance: %s" % msg)
11925
11926         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11927           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11928         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11929           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11930           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11931
11932           (key_name, _, _) = self.x509_key_name
11933
11934           dest_ca_pem = \
11935             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11936                                             self.dest_x509_ca)
11937
11938           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11939                                                      key_name, dest_ca_pem,
11940                                                      timeouts)
11941       finally:
11942         helper.Cleanup()
11943
11944       # Check for backwards compatibility
11945       assert len(dresults) == len(instance.disks)
11946       assert compat.all(isinstance(i, bool) for i in dresults), \
11947              "Not all results are boolean: %r" % dresults
11948
11949     finally:
11950       if activate_disks:
11951         feedback_fn("Deactivating disks for %s" % instance.name)
11952         _ShutdownInstanceDisks(self, instance)
11953
11954     if not (compat.all(dresults) and fin_resu):
11955       failures = []
11956       if not fin_resu:
11957         failures.append("export finalization")
11958       if not compat.all(dresults):
11959         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11960                                if not dsk)
11961         failures.append("disk export: disk(s) %s" % fdsk)
11962
11963       raise errors.OpExecError("Export failed, errors in %s" %
11964                                utils.CommaJoin(failures))
11965
11966     # At this point, the export was successful, we can cleanup/finish
11967
11968     # Remove instance if requested
11969     if self.op.remove_instance:
11970       feedback_fn("Removing instance %s" % instance.name)
11971       _RemoveInstance(self, feedback_fn, instance,
11972                       self.op.ignore_remove_failures)
11973
11974     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11975       self._CleanupExports(feedback_fn)
11976
11977     return fin_resu, dresults
11978
11979
11980 class LUBackupRemove(NoHooksLU):
11981   """Remove exports related to the named instance.
11982
11983   """
11984   REQ_BGL = False
11985
11986   def ExpandNames(self):
11987     self.needed_locks = {}
11988     # We need all nodes to be locked in order for RemoveExport to work, but we
11989     # don't need to lock the instance itself, as nothing will happen to it (and
11990     # we can remove exports also for a removed instance)
11991     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11992
11993   def Exec(self, feedback_fn):
11994     """Remove any export.
11995
11996     """
11997     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11998     # If the instance was not found we'll try with the name that was passed in.
11999     # This will only work if it was an FQDN, though.
12000     fqdn_warn = False
12001     if not instance_name:
12002       fqdn_warn = True
12003       instance_name = self.op.instance_name
12004
12005     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12006     exportlist = self.rpc.call_export_list(locked_nodes)
12007     found = False
12008     for node in exportlist:
12009       msg = exportlist[node].fail_msg
12010       if msg:
12011         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12012         continue
12013       if instance_name in exportlist[node].payload:
12014         found = True
12015         result = self.rpc.call_export_remove(node, instance_name)
12016         msg = result.fail_msg
12017         if msg:
12018           logging.error("Could not remove export for instance %s"
12019                         " on node %s: %s", instance_name, node, msg)
12020
12021     if fqdn_warn and not found:
12022       feedback_fn("Export not found. If trying to remove an export belonging"
12023                   " to a deleted instance please use its Fully Qualified"
12024                   " Domain Name.")
12025
12026
12027 class LUGroupAdd(LogicalUnit):
12028   """Logical unit for creating node groups.
12029
12030   """
12031   HPATH = "group-add"
12032   HTYPE = constants.HTYPE_GROUP
12033   REQ_BGL = False
12034
12035   def ExpandNames(self):
12036     # We need the new group's UUID here so that we can create and acquire the
12037     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12038     # that it should not check whether the UUID exists in the configuration.
12039     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12040     self.needed_locks = {}
12041     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12042
12043   def CheckPrereq(self):
12044     """Check prerequisites.
12045
12046     This checks that the given group name is not an existing node group
12047     already.
12048
12049     """
12050     try:
12051       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12052     except errors.OpPrereqError:
12053       pass
12054     else:
12055       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12056                                  " node group (UUID: %s)" %
12057                                  (self.op.group_name, existing_uuid),
12058                                  errors.ECODE_EXISTS)
12059
12060     if self.op.ndparams:
12061       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12062
12063   def BuildHooksEnv(self):
12064     """Build hooks env.
12065
12066     """
12067     return {
12068       "GROUP_NAME": self.op.group_name,
12069       }
12070
12071   def BuildHooksNodes(self):
12072     """Build hooks nodes.
12073
12074     """
12075     mn = self.cfg.GetMasterNode()
12076     return ([mn], [mn])
12077
12078   def Exec(self, feedback_fn):
12079     """Add the node group to the cluster.
12080
12081     """
12082     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12083                                   uuid=self.group_uuid,
12084                                   alloc_policy=self.op.alloc_policy,
12085                                   ndparams=self.op.ndparams)
12086
12087     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12088     del self.remove_locks[locking.LEVEL_NODEGROUP]
12089
12090
12091 class LUGroupAssignNodes(NoHooksLU):
12092   """Logical unit for assigning nodes to groups.
12093
12094   """
12095   REQ_BGL = False
12096
12097   def ExpandNames(self):
12098     # These raise errors.OpPrereqError on their own:
12099     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12100     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12101
12102     # We want to lock all the affected nodes and groups. We have readily
12103     # available the list of nodes, and the *destination* group. To gather the
12104     # list of "source" groups, we need to fetch node information later on.
12105     self.needed_locks = {
12106       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12107       locking.LEVEL_NODE: self.op.nodes,
12108       }
12109
12110   def DeclareLocks(self, level):
12111     if level == locking.LEVEL_NODEGROUP:
12112       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12113
12114       # Try to get all affected nodes' groups without having the group or node
12115       # lock yet. Needs verification later in the code flow.
12116       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12117
12118       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12119
12120   def CheckPrereq(self):
12121     """Check prerequisites.
12122
12123     """
12124     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12125     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12126             frozenset(self.op.nodes))
12127
12128     expected_locks = (set([self.group_uuid]) |
12129                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12130     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12131     if actual_locks != expected_locks:
12132       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12133                                " current groups are '%s', used to be '%s'" %
12134                                (utils.CommaJoin(expected_locks),
12135                                 utils.CommaJoin(actual_locks)))
12136
12137     self.node_data = self.cfg.GetAllNodesInfo()
12138     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12139     instance_data = self.cfg.GetAllInstancesInfo()
12140
12141     if self.group is None:
12142       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12143                                (self.op.group_name, self.group_uuid))
12144
12145     (new_splits, previous_splits) = \
12146       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12147                                              for node in self.op.nodes],
12148                                             self.node_data, instance_data)
12149
12150     if new_splits:
12151       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12152
12153       if not self.op.force:
12154         raise errors.OpExecError("The following instances get split by this"
12155                                  " change and --force was not given: %s" %
12156                                  fmt_new_splits)
12157       else:
12158         self.LogWarning("This operation will split the following instances: %s",
12159                         fmt_new_splits)
12160
12161         if previous_splits:
12162           self.LogWarning("In addition, these already-split instances continue"
12163                           " to be split across groups: %s",
12164                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12165
12166   def Exec(self, feedback_fn):
12167     """Assign nodes to a new group.
12168
12169     """
12170     for node in self.op.nodes:
12171       self.node_data[node].group = self.group_uuid
12172
12173     # FIXME: Depends on side-effects of modifying the result of
12174     # C{cfg.GetAllNodesInfo}
12175
12176     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12177
12178   @staticmethod
12179   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12180     """Check for split instances after a node assignment.
12181
12182     This method considers a series of node assignments as an atomic operation,
12183     and returns information about split instances after applying the set of
12184     changes.
12185
12186     In particular, it returns information about newly split instances, and
12187     instances that were already split, and remain so after the change.
12188
12189     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12190     considered.
12191
12192     @type changes: list of (node_name, new_group_uuid) pairs.
12193     @param changes: list of node assignments to consider.
12194     @param node_data: a dict with data for all nodes
12195     @param instance_data: a dict with all instances to consider
12196     @rtype: a two-tuple
12197     @return: a list of instances that were previously okay and result split as a
12198       consequence of this change, and a list of instances that were previously
12199       split and this change does not fix.
12200
12201     """
12202     changed_nodes = dict((node, group) for node, group in changes
12203                          if node_data[node].group != group)
12204
12205     all_split_instances = set()
12206     previously_split_instances = set()
12207
12208     def InstanceNodes(instance):
12209       return [instance.primary_node] + list(instance.secondary_nodes)
12210
12211     for inst in instance_data.values():
12212       if inst.disk_template not in constants.DTS_INT_MIRROR:
12213         continue
12214
12215       instance_nodes = InstanceNodes(inst)
12216
12217       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12218         previously_split_instances.add(inst.name)
12219
12220       if len(set(changed_nodes.get(node, node_data[node].group)
12221                  for node in instance_nodes)) > 1:
12222         all_split_instances.add(inst.name)
12223
12224     return (list(all_split_instances - previously_split_instances),
12225             list(previously_split_instances & all_split_instances))
12226
12227
12228 class _GroupQuery(_QueryBase):
12229   FIELDS = query.GROUP_FIELDS
12230
12231   def ExpandNames(self, lu):
12232     lu.needed_locks = {}
12233
12234     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12235     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12236
12237     if not self.names:
12238       self.wanted = [name_to_uuid[name]
12239                      for name in utils.NiceSort(name_to_uuid.keys())]
12240     else:
12241       # Accept names to be either names or UUIDs.
12242       missing = []
12243       self.wanted = []
12244       all_uuid = frozenset(self._all_groups.keys())
12245
12246       for name in self.names:
12247         if name in all_uuid:
12248           self.wanted.append(name)
12249         elif name in name_to_uuid:
12250           self.wanted.append(name_to_uuid[name])
12251         else:
12252           missing.append(name)
12253
12254       if missing:
12255         raise errors.OpPrereqError("Some groups do not exist: %s" %
12256                                    utils.CommaJoin(missing),
12257                                    errors.ECODE_NOENT)
12258
12259   def DeclareLocks(self, lu, level):
12260     pass
12261
12262   def _GetQueryData(self, lu):
12263     """Computes the list of node groups and their attributes.
12264
12265     """
12266     do_nodes = query.GQ_NODE in self.requested_data
12267     do_instances = query.GQ_INST in self.requested_data
12268
12269     group_to_nodes = None
12270     group_to_instances = None
12271
12272     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12273     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12274     # latter GetAllInstancesInfo() is not enough, for we have to go through
12275     # instance->node. Hence, we will need to process nodes even if we only need
12276     # instance information.
12277     if do_nodes or do_instances:
12278       all_nodes = lu.cfg.GetAllNodesInfo()
12279       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12280       node_to_group = {}
12281
12282       for node in all_nodes.values():
12283         if node.group in group_to_nodes:
12284           group_to_nodes[node.group].append(node.name)
12285           node_to_group[node.name] = node.group
12286
12287       if do_instances:
12288         all_instances = lu.cfg.GetAllInstancesInfo()
12289         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12290
12291         for instance in all_instances.values():
12292           node = instance.primary_node
12293           if node in node_to_group:
12294             group_to_instances[node_to_group[node]].append(instance.name)
12295
12296         if not do_nodes:
12297           # Do not pass on node information if it was not requested.
12298           group_to_nodes = None
12299
12300     return query.GroupQueryData([self._all_groups[uuid]
12301                                  for uuid in self.wanted],
12302                                 group_to_nodes, group_to_instances)
12303
12304
12305 class LUGroupQuery(NoHooksLU):
12306   """Logical unit for querying node groups.
12307
12308   """
12309   REQ_BGL = False
12310
12311   def CheckArguments(self):
12312     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12313                           self.op.output_fields, False)
12314
12315   def ExpandNames(self):
12316     self.gq.ExpandNames(self)
12317
12318   def DeclareLocks(self, level):
12319     self.gq.DeclareLocks(self, level)
12320
12321   def Exec(self, feedback_fn):
12322     return self.gq.OldStyleQuery(self)
12323
12324
12325 class LUGroupSetParams(LogicalUnit):
12326   """Modifies the parameters of a node group.
12327
12328   """
12329   HPATH = "group-modify"
12330   HTYPE = constants.HTYPE_GROUP
12331   REQ_BGL = False
12332
12333   def CheckArguments(self):
12334     all_changes = [
12335       self.op.ndparams,
12336       self.op.alloc_policy,
12337       ]
12338
12339     if all_changes.count(None) == len(all_changes):
12340       raise errors.OpPrereqError("Please pass at least one modification",
12341                                  errors.ECODE_INVAL)
12342
12343   def ExpandNames(self):
12344     # This raises errors.OpPrereqError on its own:
12345     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12346
12347     self.needed_locks = {
12348       locking.LEVEL_NODEGROUP: [self.group_uuid],
12349       }
12350
12351   def CheckPrereq(self):
12352     """Check prerequisites.
12353
12354     """
12355     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12356
12357     if self.group is None:
12358       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12359                                (self.op.group_name, self.group_uuid))
12360
12361     if self.op.ndparams:
12362       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12363       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12364       self.new_ndparams = new_ndparams
12365
12366   def BuildHooksEnv(self):
12367     """Build hooks env.
12368
12369     """
12370     return {
12371       "GROUP_NAME": self.op.group_name,
12372       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12373       }
12374
12375   def BuildHooksNodes(self):
12376     """Build hooks nodes.
12377
12378     """
12379     mn = self.cfg.GetMasterNode()
12380     return ([mn], [mn])
12381
12382   def Exec(self, feedback_fn):
12383     """Modifies the node group.
12384
12385     """
12386     result = []
12387
12388     if self.op.ndparams:
12389       self.group.ndparams = self.new_ndparams
12390       result.append(("ndparams", str(self.group.ndparams)))
12391
12392     if self.op.alloc_policy:
12393       self.group.alloc_policy = self.op.alloc_policy
12394
12395     self.cfg.Update(self.group, feedback_fn)
12396     return result
12397
12398
12399 class LUGroupRemove(LogicalUnit):
12400   HPATH = "group-remove"
12401   HTYPE = constants.HTYPE_GROUP
12402   REQ_BGL = False
12403
12404   def ExpandNames(self):
12405     # This will raises errors.OpPrereqError on its own:
12406     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12407     self.needed_locks = {
12408       locking.LEVEL_NODEGROUP: [self.group_uuid],
12409       }
12410
12411   def CheckPrereq(self):
12412     """Check prerequisites.
12413
12414     This checks that the given group name exists as a node group, that is
12415     empty (i.e., contains no nodes), and that is not the last group of the
12416     cluster.
12417
12418     """
12419     # Verify that the group is empty.
12420     group_nodes = [node.name
12421                    for node in self.cfg.GetAllNodesInfo().values()
12422                    if node.group == self.group_uuid]
12423
12424     if group_nodes:
12425       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12426                                  " nodes: %s" %
12427                                  (self.op.group_name,
12428                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12429                                  errors.ECODE_STATE)
12430
12431     # Verify the cluster would not be left group-less.
12432     if len(self.cfg.GetNodeGroupList()) == 1:
12433       raise errors.OpPrereqError("Group '%s' is the only group,"
12434                                  " cannot be removed" %
12435                                  self.op.group_name,
12436                                  errors.ECODE_STATE)
12437
12438   def BuildHooksEnv(self):
12439     """Build hooks env.
12440
12441     """
12442     return {
12443       "GROUP_NAME": self.op.group_name,
12444       }
12445
12446   def BuildHooksNodes(self):
12447     """Build hooks nodes.
12448
12449     """
12450     mn = self.cfg.GetMasterNode()
12451     return ([mn], [mn])
12452
12453   def Exec(self, feedback_fn):
12454     """Remove the node group.
12455
12456     """
12457     try:
12458       self.cfg.RemoveNodeGroup(self.group_uuid)
12459     except errors.ConfigurationError:
12460       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12461                                (self.op.group_name, self.group_uuid))
12462
12463     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12464
12465
12466 class LUGroupRename(LogicalUnit):
12467   HPATH = "group-rename"
12468   HTYPE = constants.HTYPE_GROUP
12469   REQ_BGL = False
12470
12471   def ExpandNames(self):
12472     # This raises errors.OpPrereqError on its own:
12473     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12474
12475     self.needed_locks = {
12476       locking.LEVEL_NODEGROUP: [self.group_uuid],
12477       }
12478
12479   def CheckPrereq(self):
12480     """Check prerequisites.
12481
12482     Ensures requested new name is not yet used.
12483
12484     """
12485     try:
12486       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12487     except errors.OpPrereqError:
12488       pass
12489     else:
12490       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12491                                  " node group (UUID: %s)" %
12492                                  (self.op.new_name, new_name_uuid),
12493                                  errors.ECODE_EXISTS)
12494
12495   def BuildHooksEnv(self):
12496     """Build hooks env.
12497
12498     """
12499     return {
12500       "OLD_NAME": self.op.group_name,
12501       "NEW_NAME": self.op.new_name,
12502       }
12503
12504   def BuildHooksNodes(self):
12505     """Build hooks nodes.
12506
12507     """
12508     mn = self.cfg.GetMasterNode()
12509
12510     all_nodes = self.cfg.GetAllNodesInfo()
12511     all_nodes.pop(mn, None)
12512
12513     run_nodes = [mn]
12514     run_nodes.extend(node.name for node in all_nodes.values()
12515                      if node.group == self.group_uuid)
12516
12517     return (run_nodes, run_nodes)
12518
12519   def Exec(self, feedback_fn):
12520     """Rename the node group.
12521
12522     """
12523     group = self.cfg.GetNodeGroup(self.group_uuid)
12524
12525     if group is None:
12526       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12527                                (self.op.group_name, self.group_uuid))
12528
12529     group.name = self.op.new_name
12530     self.cfg.Update(group, feedback_fn)
12531
12532     return self.op.new_name
12533
12534
12535 class LUGroupEvacuate(LogicalUnit):
12536   HPATH = "group-evacuate"
12537   HTYPE = constants.HTYPE_GROUP
12538   REQ_BGL = False
12539
12540   def ExpandNames(self):
12541     # This raises errors.OpPrereqError on its own:
12542     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12543
12544     if self.op.target_groups:
12545       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12546                                   self.op.target_groups)
12547     else:
12548       self.req_target_uuids = []
12549
12550     if self.group_uuid in self.req_target_uuids:
12551       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12552                                  " as a target group (targets are %s)" %
12553                                  (self.group_uuid,
12554                                   utils.CommaJoin(self.req_target_uuids)),
12555                                  errors.ECODE_INVAL)
12556
12557     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12558
12559     self.share_locks = _ShareAll()
12560     self.needed_locks = {
12561       locking.LEVEL_INSTANCE: [],
12562       locking.LEVEL_NODEGROUP: [],
12563       locking.LEVEL_NODE: [],
12564       }
12565
12566   def DeclareLocks(self, level):
12567     if level == locking.LEVEL_INSTANCE:
12568       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12569
12570       # Lock instances optimistically, needs verification once node and group
12571       # locks have been acquired
12572       self.needed_locks[locking.LEVEL_INSTANCE] = \
12573         self.cfg.GetNodeGroupInstances(self.group_uuid)
12574
12575     elif level == locking.LEVEL_NODEGROUP:
12576       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12577
12578       if self.req_target_uuids:
12579         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12580
12581         # Lock all groups used by instances optimistically; this requires going
12582         # via the node before it's locked, requiring verification later on
12583         lock_groups.update(group_uuid
12584                            for instance_name in
12585                              self.owned_locks(locking.LEVEL_INSTANCE)
12586                            for group_uuid in
12587                              self.cfg.GetInstanceNodeGroups(instance_name))
12588       else:
12589         # No target groups, need to lock all of them
12590         lock_groups = locking.ALL_SET
12591
12592       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12593
12594     elif level == locking.LEVEL_NODE:
12595       # This will only lock the nodes in the group to be evacuated which
12596       # contain actual instances
12597       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12598       self._LockInstancesNodes()
12599
12600       # Lock all nodes in group to be evacuated and target groups
12601       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12602       assert self.group_uuid in owned_groups
12603       member_nodes = [node_name
12604                       for group in owned_groups
12605                       for node_name in self.cfg.GetNodeGroup(group).members]
12606       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12607
12608   def CheckPrereq(self):
12609     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12610     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12611     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12612
12613     assert owned_groups.issuperset(self.req_target_uuids)
12614     assert self.group_uuid in owned_groups
12615
12616     # Check if locked instances are still correct
12617     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12618
12619     # Get instance information
12620     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12621
12622     # Check if node groups for locked instances are still correct
12623     for instance_name in owned_instances:
12624       inst = self.instances[instance_name]
12625       assert owned_nodes.issuperset(inst.all_nodes), \
12626         "Instance %s's nodes changed while we kept the lock" % instance_name
12627
12628       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12629                                              owned_groups)
12630
12631       assert self.group_uuid in inst_groups, \
12632         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12633
12634     if self.req_target_uuids:
12635       # User requested specific target groups
12636       self.target_uuids = self.req_target_uuids
12637     else:
12638       # All groups except the one to be evacuated are potential targets
12639       self.target_uuids = [group_uuid for group_uuid in owned_groups
12640                            if group_uuid != self.group_uuid]
12641
12642       if not self.target_uuids:
12643         raise errors.OpPrereqError("There are no possible target groups",
12644                                    errors.ECODE_INVAL)
12645
12646   def BuildHooksEnv(self):
12647     """Build hooks env.
12648
12649     """
12650     return {
12651       "GROUP_NAME": self.op.group_name,
12652       "TARGET_GROUPS": " ".join(self.target_uuids),
12653       }
12654
12655   def BuildHooksNodes(self):
12656     """Build hooks nodes.
12657
12658     """
12659     mn = self.cfg.GetMasterNode()
12660
12661     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12662
12663     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12664
12665     return (run_nodes, run_nodes)
12666
12667   def Exec(self, feedback_fn):
12668     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12669
12670     assert self.group_uuid not in self.target_uuids
12671
12672     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12673                      instances=instances, target_groups=self.target_uuids)
12674
12675     ial.Run(self.op.iallocator)
12676
12677     if not ial.success:
12678       raise errors.OpPrereqError("Can't compute group evacuation using"
12679                                  " iallocator '%s': %s" %
12680                                  (self.op.iallocator, ial.info),
12681                                  errors.ECODE_NORES)
12682
12683     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12684
12685     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12686                  len(jobs), self.op.group_name)
12687
12688     return ResultWithJobs(jobs)
12689
12690
12691 class TagsLU(NoHooksLU): # pylint: disable=W0223
12692   """Generic tags LU.
12693
12694   This is an abstract class which is the parent of all the other tags LUs.
12695
12696   """
12697   def ExpandNames(self):
12698     self.group_uuid = None
12699     self.needed_locks = {}
12700     if self.op.kind == constants.TAG_NODE:
12701       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12702       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12703     elif self.op.kind == constants.TAG_INSTANCE:
12704       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12705       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12706     elif self.op.kind == constants.TAG_NODEGROUP:
12707       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12708
12709     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12710     # not possible to acquire the BGL based on opcode parameters)
12711
12712   def CheckPrereq(self):
12713     """Check prerequisites.
12714
12715     """
12716     if self.op.kind == constants.TAG_CLUSTER:
12717       self.target = self.cfg.GetClusterInfo()
12718     elif self.op.kind == constants.TAG_NODE:
12719       self.target = self.cfg.GetNodeInfo(self.op.name)
12720     elif self.op.kind == constants.TAG_INSTANCE:
12721       self.target = self.cfg.GetInstanceInfo(self.op.name)
12722     elif self.op.kind == constants.TAG_NODEGROUP:
12723       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12724     else:
12725       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12726                                  str(self.op.kind), errors.ECODE_INVAL)
12727
12728
12729 class LUTagsGet(TagsLU):
12730   """Returns the tags of a given object.
12731
12732   """
12733   REQ_BGL = False
12734
12735   def ExpandNames(self):
12736     TagsLU.ExpandNames(self)
12737
12738     # Share locks as this is only a read operation
12739     self.share_locks = _ShareAll()
12740
12741   def Exec(self, feedback_fn):
12742     """Returns the tag list.
12743
12744     """
12745     return list(self.target.GetTags())
12746
12747
12748 class LUTagsSearch(NoHooksLU):
12749   """Searches the tags for a given pattern.
12750
12751   """
12752   REQ_BGL = False
12753
12754   def ExpandNames(self):
12755     self.needed_locks = {}
12756
12757   def CheckPrereq(self):
12758     """Check prerequisites.
12759
12760     This checks the pattern passed for validity by compiling it.
12761
12762     """
12763     try:
12764       self.re = re.compile(self.op.pattern)
12765     except re.error, err:
12766       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12767                                  (self.op.pattern, err), errors.ECODE_INVAL)
12768
12769   def Exec(self, feedback_fn):
12770     """Returns the tag list.
12771
12772     """
12773     cfg = self.cfg
12774     tgts = [("/cluster", cfg.GetClusterInfo())]
12775     ilist = cfg.GetAllInstancesInfo().values()
12776     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12777     nlist = cfg.GetAllNodesInfo().values()
12778     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12779     tgts.extend(("/nodegroup/%s" % n.name, n)
12780                 for n in cfg.GetAllNodeGroupsInfo().values())
12781     results = []
12782     for path, target in tgts:
12783       for tag in target.GetTags():
12784         if self.re.search(tag):
12785           results.append((path, tag))
12786     return results
12787
12788
12789 class LUTagsSet(TagsLU):
12790   """Sets a tag on a given object.
12791
12792   """
12793   REQ_BGL = False
12794
12795   def CheckPrereq(self):
12796     """Check prerequisites.
12797
12798     This checks the type and length of the tag name and value.
12799
12800     """
12801     TagsLU.CheckPrereq(self)
12802     for tag in self.op.tags:
12803       objects.TaggableObject.ValidateTag(tag)
12804
12805   def Exec(self, feedback_fn):
12806     """Sets the tag.
12807
12808     """
12809     try:
12810       for tag in self.op.tags:
12811         self.target.AddTag(tag)
12812     except errors.TagError, err:
12813       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12814     self.cfg.Update(self.target, feedback_fn)
12815
12816
12817 class LUTagsDel(TagsLU):
12818   """Delete a list of tags from a given object.
12819
12820   """
12821   REQ_BGL = False
12822
12823   def CheckPrereq(self):
12824     """Check prerequisites.
12825
12826     This checks that we have the given tag.
12827
12828     """
12829     TagsLU.CheckPrereq(self)
12830     for tag in self.op.tags:
12831       objects.TaggableObject.ValidateTag(tag)
12832     del_tags = frozenset(self.op.tags)
12833     cur_tags = self.target.GetTags()
12834
12835     diff_tags = del_tags - cur_tags
12836     if diff_tags:
12837       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12838       raise errors.OpPrereqError("Tag(s) %s not found" %
12839                                  (utils.CommaJoin(diff_names), ),
12840                                  errors.ECODE_NOENT)
12841
12842   def Exec(self, feedback_fn):
12843     """Remove the tag from the object.
12844
12845     """
12846     for tag in self.op.tags:
12847       self.target.RemoveTag(tag)
12848     self.cfg.Update(self.target, feedback_fn)
12849
12850
12851 class LUTestDelay(NoHooksLU):
12852   """Sleep for a specified amount of time.
12853
12854   This LU sleeps on the master and/or nodes for a specified amount of
12855   time.
12856
12857   """
12858   REQ_BGL = False
12859
12860   def ExpandNames(self):
12861     """Expand names and set required locks.
12862
12863     This expands the node list, if any.
12864
12865     """
12866     self.needed_locks = {}
12867     if self.op.on_nodes:
12868       # _GetWantedNodes can be used here, but is not always appropriate to use
12869       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12870       # more information.
12871       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12872       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12873
12874   def _TestDelay(self):
12875     """Do the actual sleep.
12876
12877     """
12878     if self.op.on_master:
12879       if not utils.TestDelay(self.op.duration):
12880         raise errors.OpExecError("Error during master delay test")
12881     if self.op.on_nodes:
12882       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12883       for node, node_result in result.items():
12884         node_result.Raise("Failure during rpc call to node %s" % node)
12885
12886   def Exec(self, feedback_fn):
12887     """Execute the test delay opcode, with the wanted repetitions.
12888
12889     """
12890     if self.op.repeat == 0:
12891       self._TestDelay()
12892     else:
12893       top_value = self.op.repeat - 1
12894       for i in range(self.op.repeat):
12895         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12896         self._TestDelay()
12897
12898
12899 class LUTestJqueue(NoHooksLU):
12900   """Utility LU to test some aspects of the job queue.
12901
12902   """
12903   REQ_BGL = False
12904
12905   # Must be lower than default timeout for WaitForJobChange to see whether it
12906   # notices changed jobs
12907   _CLIENT_CONNECT_TIMEOUT = 20.0
12908   _CLIENT_CONFIRM_TIMEOUT = 60.0
12909
12910   @classmethod
12911   def _NotifyUsingSocket(cls, cb, errcls):
12912     """Opens a Unix socket and waits for another program to connect.
12913
12914     @type cb: callable
12915     @param cb: Callback to send socket name to client
12916     @type errcls: class
12917     @param errcls: Exception class to use for errors
12918
12919     """
12920     # Using a temporary directory as there's no easy way to create temporary
12921     # sockets without writing a custom loop around tempfile.mktemp and
12922     # socket.bind
12923     tmpdir = tempfile.mkdtemp()
12924     try:
12925       tmpsock = utils.PathJoin(tmpdir, "sock")
12926
12927       logging.debug("Creating temporary socket at %s", tmpsock)
12928       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12929       try:
12930         sock.bind(tmpsock)
12931         sock.listen(1)
12932
12933         # Send details to client
12934         cb(tmpsock)
12935
12936         # Wait for client to connect before continuing
12937         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12938         try:
12939           (conn, _) = sock.accept()
12940         except socket.error, err:
12941           raise errcls("Client didn't connect in time (%s)" % err)
12942       finally:
12943         sock.close()
12944     finally:
12945       # Remove as soon as client is connected
12946       shutil.rmtree(tmpdir)
12947
12948     # Wait for client to close
12949     try:
12950       try:
12951         # pylint: disable=E1101
12952         # Instance of '_socketobject' has no ... member
12953         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12954         conn.recv(1)
12955       except socket.error, err:
12956         raise errcls("Client failed to confirm notification (%s)" % err)
12957     finally:
12958       conn.close()
12959
12960   def _SendNotification(self, test, arg, sockname):
12961     """Sends a notification to the client.
12962
12963     @type test: string
12964     @param test: Test name
12965     @param arg: Test argument (depends on test)
12966     @type sockname: string
12967     @param sockname: Socket path
12968
12969     """
12970     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12971
12972   def _Notify(self, prereq, test, arg):
12973     """Notifies the client of a test.
12974
12975     @type prereq: bool
12976     @param prereq: Whether this is a prereq-phase test
12977     @type test: string
12978     @param test: Test name
12979     @param arg: Test argument (depends on test)
12980
12981     """
12982     if prereq:
12983       errcls = errors.OpPrereqError
12984     else:
12985       errcls = errors.OpExecError
12986
12987     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12988                                                   test, arg),
12989                                    errcls)
12990
12991   def CheckArguments(self):
12992     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12993     self.expandnames_calls = 0
12994
12995   def ExpandNames(self):
12996     checkargs_calls = getattr(self, "checkargs_calls", 0)
12997     if checkargs_calls < 1:
12998       raise errors.ProgrammerError("CheckArguments was not called")
12999
13000     self.expandnames_calls += 1
13001
13002     if self.op.notify_waitlock:
13003       self._Notify(True, constants.JQT_EXPANDNAMES, None)
13004
13005     self.LogInfo("Expanding names")
13006
13007     # Get lock on master node (just to get a lock, not for a particular reason)
13008     self.needed_locks = {
13009       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13010       }
13011
13012   def Exec(self, feedback_fn):
13013     if self.expandnames_calls < 1:
13014       raise errors.ProgrammerError("ExpandNames was not called")
13015
13016     if self.op.notify_exec:
13017       self._Notify(False, constants.JQT_EXEC, None)
13018
13019     self.LogInfo("Executing")
13020
13021     if self.op.log_messages:
13022       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13023       for idx, msg in enumerate(self.op.log_messages):
13024         self.LogInfo("Sending log message %s", idx + 1)
13025         feedback_fn(constants.JQT_MSGPREFIX + msg)
13026         # Report how many test messages have been sent
13027         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13028
13029     if self.op.fail:
13030       raise errors.OpExecError("Opcode failure was requested")
13031
13032     return True
13033
13034
13035 class IAllocator(object):
13036   """IAllocator framework.
13037
13038   An IAllocator instance has three sets of attributes:
13039     - cfg that is needed to query the cluster
13040     - input data (all members of the _KEYS class attribute are required)
13041     - four buffer attributes (in|out_data|text), that represent the
13042       input (to the external script) in text and data structure format,
13043       and the output from it, again in two formats
13044     - the result variables from the script (success, info, nodes) for
13045       easy usage
13046
13047   """
13048   # pylint: disable=R0902
13049   # lots of instance attributes
13050
13051   def __init__(self, cfg, rpc_runner, mode, **kwargs):
13052     self.cfg = cfg
13053     self.rpc = rpc_runner
13054     # init buffer variables
13055     self.in_text = self.out_text = self.in_data = self.out_data = None
13056     # init all input fields so that pylint is happy
13057     self.mode = mode
13058     self.memory = self.disks = self.disk_template = None
13059     self.os = self.tags = self.nics = self.vcpus = None
13060     self.hypervisor = None
13061     self.relocate_from = None
13062     self.name = None
13063     self.instances = None
13064     self.evac_mode = None
13065     self.target_groups = []
13066     # computed fields
13067     self.required_nodes = None
13068     # init result fields
13069     self.success = self.info = self.result = None
13070
13071     try:
13072       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13073     except KeyError:
13074       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13075                                    " IAllocator" % self.mode)
13076
13077     keyset = [n for (n, _) in keydata]
13078
13079     for key in kwargs:
13080       if key not in keyset:
13081         raise errors.ProgrammerError("Invalid input parameter '%s' to"
13082                                      " IAllocator" % key)
13083       setattr(self, key, kwargs[key])
13084
13085     for key in keyset:
13086       if key not in kwargs:
13087         raise errors.ProgrammerError("Missing input parameter '%s' to"
13088                                      " IAllocator" % key)
13089     self._BuildInputData(compat.partial(fn, self), keydata)
13090
13091   def _ComputeClusterData(self):
13092     """Compute the generic allocator input data.
13093
13094     This is the data that is independent of the actual operation.
13095
13096     """
13097     cfg = self.cfg
13098     cluster_info = cfg.GetClusterInfo()
13099     # cluster data
13100     data = {
13101       "version": constants.IALLOCATOR_VERSION,
13102       "cluster_name": cfg.GetClusterName(),
13103       "cluster_tags": list(cluster_info.GetTags()),
13104       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13105       # we don't have job IDs
13106       }
13107     ninfo = cfg.GetAllNodesInfo()
13108     iinfo = cfg.GetAllInstancesInfo().values()
13109     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13110
13111     # node data
13112     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13113
13114     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13115       hypervisor_name = self.hypervisor
13116     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13117       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13118     else:
13119       hypervisor_name = cluster_info.enabled_hypervisors[0]
13120
13121     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13122                                         hypervisor_name)
13123     node_iinfo = \
13124       self.rpc.call_all_instances_info(node_list,
13125                                        cluster_info.enabled_hypervisors)
13126
13127     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13128
13129     config_ndata = self._ComputeBasicNodeData(ninfo)
13130     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13131                                                  i_list, config_ndata)
13132     assert len(data["nodes"]) == len(ninfo), \
13133         "Incomplete node data computed"
13134
13135     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13136
13137     self.in_data = data
13138
13139   @staticmethod
13140   def _ComputeNodeGroupData(cfg):
13141     """Compute node groups data.
13142
13143     """
13144     ng = dict((guuid, {
13145       "name": gdata.name,
13146       "alloc_policy": gdata.alloc_policy,
13147       })
13148       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13149
13150     return ng
13151
13152   @staticmethod
13153   def _ComputeBasicNodeData(node_cfg):
13154     """Compute global node data.
13155
13156     @rtype: dict
13157     @returns: a dict of name: (node dict, node config)
13158
13159     """
13160     # fill in static (config-based) values
13161     node_results = dict((ninfo.name, {
13162       "tags": list(ninfo.GetTags()),
13163       "primary_ip": ninfo.primary_ip,
13164       "secondary_ip": ninfo.secondary_ip,
13165       "offline": ninfo.offline,
13166       "drained": ninfo.drained,
13167       "master_candidate": ninfo.master_candidate,
13168       "group": ninfo.group,
13169       "master_capable": ninfo.master_capable,
13170       "vm_capable": ninfo.vm_capable,
13171       })
13172       for ninfo in node_cfg.values())
13173
13174     return node_results
13175
13176   @staticmethod
13177   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13178                               node_results):
13179     """Compute global node data.
13180
13181     @param node_results: the basic node structures as filled from the config
13182
13183     """
13184     # make a copy of the current dict
13185     node_results = dict(node_results)
13186     for nname, nresult in node_data.items():
13187       assert nname in node_results, "Missing basic data for node %s" % nname
13188       ninfo = node_cfg[nname]
13189
13190       if not (ninfo.offline or ninfo.drained):
13191         nresult.Raise("Can't get data for node %s" % nname)
13192         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13193                                 nname)
13194         remote_info = nresult.payload
13195
13196         for attr in ["memory_total", "memory_free", "memory_dom0",
13197                      "vg_size", "vg_free", "cpu_total"]:
13198           if attr not in remote_info:
13199             raise errors.OpExecError("Node '%s' didn't return attribute"
13200                                      " '%s'" % (nname, attr))
13201           if not isinstance(remote_info[attr], int):
13202             raise errors.OpExecError("Node '%s' returned invalid value"
13203                                      " for '%s': %s" %
13204                                      (nname, attr, remote_info[attr]))
13205         # compute memory used by primary instances
13206         i_p_mem = i_p_up_mem = 0
13207         for iinfo, beinfo in i_list:
13208           if iinfo.primary_node == nname:
13209             i_p_mem += beinfo[constants.BE_MEMORY]
13210             if iinfo.name not in node_iinfo[nname].payload:
13211               i_used_mem = 0
13212             else:
13213               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13214             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13215             remote_info["memory_free"] -= max(0, i_mem_diff)
13216
13217             if iinfo.admin_up:
13218               i_p_up_mem += beinfo[constants.BE_MEMORY]
13219
13220         # compute memory used by instances
13221         pnr_dyn = {
13222           "total_memory": remote_info["memory_total"],
13223           "reserved_memory": remote_info["memory_dom0"],
13224           "free_memory": remote_info["memory_free"],
13225           "total_disk": remote_info["vg_size"],
13226           "free_disk": remote_info["vg_free"],
13227           "total_cpus": remote_info["cpu_total"],
13228           "i_pri_memory": i_p_mem,
13229           "i_pri_up_memory": i_p_up_mem,
13230           }
13231         pnr_dyn.update(node_results[nname])
13232         node_results[nname] = pnr_dyn
13233
13234     return node_results
13235
13236   @staticmethod
13237   def _ComputeInstanceData(cluster_info, i_list):
13238     """Compute global instance data.
13239
13240     """
13241     instance_data = {}
13242     for iinfo, beinfo in i_list:
13243       nic_data = []
13244       for nic in iinfo.nics:
13245         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13246         nic_dict = {
13247           "mac": nic.mac,
13248           "ip": nic.ip,
13249           "mode": filled_params[constants.NIC_MODE],
13250           "link": filled_params[constants.NIC_LINK],
13251           }
13252         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13253           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13254         nic_data.append(nic_dict)
13255       pir = {
13256         "tags": list(iinfo.GetTags()),
13257         "admin_up": iinfo.admin_up,
13258         "vcpus": beinfo[constants.BE_VCPUS],
13259         "memory": beinfo[constants.BE_MEMORY],
13260         "os": iinfo.os,
13261         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13262         "nics": nic_data,
13263         "disks": [{constants.IDISK_SIZE: dsk.size,
13264                    constants.IDISK_MODE: dsk.mode}
13265                   for dsk in iinfo.disks],
13266         "disk_template": iinfo.disk_template,
13267         "hypervisor": iinfo.hypervisor,
13268         }
13269       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13270                                                  pir["disks"])
13271       instance_data[iinfo.name] = pir
13272
13273     return instance_data
13274
13275   def _AddNewInstance(self):
13276     """Add new instance data to allocator structure.
13277
13278     This in combination with _AllocatorGetClusterData will create the
13279     correct structure needed as input for the allocator.
13280
13281     The checks for the completeness of the opcode must have already been
13282     done.
13283
13284     """
13285     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13286
13287     if self.disk_template in constants.DTS_INT_MIRROR:
13288       self.required_nodes = 2
13289     else:
13290       self.required_nodes = 1
13291
13292     request = {
13293       "name": self.name,
13294       "disk_template": self.disk_template,
13295       "tags": self.tags,
13296       "os": self.os,
13297       "vcpus": self.vcpus,
13298       "memory": self.memory,
13299       "disks": self.disks,
13300       "disk_space_total": disk_space,
13301       "nics": self.nics,
13302       "required_nodes": self.required_nodes,
13303       "hypervisor": self.hypervisor,
13304       }
13305
13306     return request
13307
13308   def _AddRelocateInstance(self):
13309     """Add relocate instance data to allocator structure.
13310
13311     This in combination with _IAllocatorGetClusterData will create the
13312     correct structure needed as input for the allocator.
13313
13314     The checks for the completeness of the opcode must have already been
13315     done.
13316
13317     """
13318     instance = self.cfg.GetInstanceInfo(self.name)
13319     if instance is None:
13320       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13321                                    " IAllocator" % self.name)
13322
13323     if instance.disk_template not in constants.DTS_MIRRORED:
13324       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13325                                  errors.ECODE_INVAL)
13326
13327     if instance.disk_template in constants.DTS_INT_MIRROR and \
13328         len(instance.secondary_nodes) != 1:
13329       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13330                                  errors.ECODE_STATE)
13331
13332     self.required_nodes = 1
13333     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13334     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13335
13336     request = {
13337       "name": self.name,
13338       "disk_space_total": disk_space,
13339       "required_nodes": self.required_nodes,
13340       "relocate_from": self.relocate_from,
13341       }
13342     return request
13343
13344   def _AddNodeEvacuate(self):
13345     """Get data for node-evacuate requests.
13346
13347     """
13348     return {
13349       "instances": self.instances,
13350       "evac_mode": self.evac_mode,
13351       }
13352
13353   def _AddChangeGroup(self):
13354     """Get data for node-evacuate requests.
13355
13356     """
13357     return {
13358       "instances": self.instances,
13359       "target_groups": self.target_groups,
13360       }
13361
13362   def _BuildInputData(self, fn, keydata):
13363     """Build input data structures.
13364
13365     """
13366     self._ComputeClusterData()
13367
13368     request = fn()
13369     request["type"] = self.mode
13370     for keyname, keytype in keydata:
13371       if keyname not in request:
13372         raise errors.ProgrammerError("Request parameter %s is missing" %
13373                                      keyname)
13374       val = request[keyname]
13375       if not keytype(val):
13376         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13377                                      " validation, value %s, expected"
13378                                      " type %s" % (keyname, val, keytype))
13379     self.in_data["request"] = request
13380
13381     self.in_text = serializer.Dump(self.in_data)
13382
13383   _STRING_LIST = ht.TListOf(ht.TString)
13384   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13385      # pylint: disable=E1101
13386      # Class '...' has no 'OP_ID' member
13387      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13388                           opcodes.OpInstanceMigrate.OP_ID,
13389                           opcodes.OpInstanceReplaceDisks.OP_ID])
13390      })))
13391
13392   _NEVAC_MOVED = \
13393     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13394                        ht.TItems([ht.TNonEmptyString,
13395                                   ht.TNonEmptyString,
13396                                   ht.TListOf(ht.TNonEmptyString),
13397                                  ])))
13398   _NEVAC_FAILED = \
13399     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13400                        ht.TItems([ht.TNonEmptyString,
13401                                   ht.TMaybeString,
13402                                  ])))
13403   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13404                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13405
13406   _MODE_DATA = {
13407     constants.IALLOCATOR_MODE_ALLOC:
13408       (_AddNewInstance,
13409        [
13410         ("name", ht.TString),
13411         ("memory", ht.TInt),
13412         ("disks", ht.TListOf(ht.TDict)),
13413         ("disk_template", ht.TString),
13414         ("os", ht.TString),
13415         ("tags", _STRING_LIST),
13416         ("nics", ht.TListOf(ht.TDict)),
13417         ("vcpus", ht.TInt),
13418         ("hypervisor", ht.TString),
13419         ], ht.TList),
13420     constants.IALLOCATOR_MODE_RELOC:
13421       (_AddRelocateInstance,
13422        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13423        ht.TList),
13424      constants.IALLOCATOR_MODE_NODE_EVAC:
13425       (_AddNodeEvacuate, [
13426         ("instances", _STRING_LIST),
13427         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13428         ], _NEVAC_RESULT),
13429      constants.IALLOCATOR_MODE_CHG_GROUP:
13430       (_AddChangeGroup, [
13431         ("instances", _STRING_LIST),
13432         ("target_groups", _STRING_LIST),
13433         ], _NEVAC_RESULT),
13434     }
13435
13436   def Run(self, name, validate=True, call_fn=None):
13437     """Run an instance allocator and return the results.
13438
13439     """
13440     if call_fn is None:
13441       call_fn = self.rpc.call_iallocator_runner
13442
13443     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13444     result.Raise("Failure while running the iallocator script")
13445
13446     self.out_text = result.payload
13447     if validate:
13448       self._ValidateResult()
13449
13450   def _ValidateResult(self):
13451     """Process the allocator results.
13452
13453     This will process and if successful save the result in
13454     self.out_data and the other parameters.
13455
13456     """
13457     try:
13458       rdict = serializer.Load(self.out_text)
13459     except Exception, err:
13460       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13461
13462     if not isinstance(rdict, dict):
13463       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13464
13465     # TODO: remove backwards compatiblity in later versions
13466     if "nodes" in rdict and "result" not in rdict:
13467       rdict["result"] = rdict["nodes"]
13468       del rdict["nodes"]
13469
13470     for key in "success", "info", "result":
13471       if key not in rdict:
13472         raise errors.OpExecError("Can't parse iallocator results:"
13473                                  " missing key '%s'" % key)
13474       setattr(self, key, rdict[key])
13475
13476     if not self._result_check(self.result):
13477       raise errors.OpExecError("Iallocator returned invalid result,"
13478                                " expected %s, got %s" %
13479                                (self._result_check, self.result),
13480                                errors.ECODE_INVAL)
13481
13482     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13483       assert self.relocate_from is not None
13484       assert self.required_nodes == 1
13485
13486       node2group = dict((name, ndata["group"])
13487                         for (name, ndata) in self.in_data["nodes"].items())
13488
13489       fn = compat.partial(self._NodesToGroups, node2group,
13490                           self.in_data["nodegroups"])
13491
13492       instance = self.cfg.GetInstanceInfo(self.name)
13493       request_groups = fn(self.relocate_from + [instance.primary_node])
13494       result_groups = fn(rdict["result"] + [instance.primary_node])
13495
13496       if self.success and not set(result_groups).issubset(request_groups):
13497         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13498                                  " differ from original groups (%s)" %
13499                                  (utils.CommaJoin(result_groups),
13500                                   utils.CommaJoin(request_groups)))
13501
13502     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13503       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13504
13505     self.out_data = rdict
13506
13507   @staticmethod
13508   def _NodesToGroups(node2group, groups, nodes):
13509     """Returns a list of unique group names for a list of nodes.
13510
13511     @type node2group: dict
13512     @param node2group: Map from node name to group UUID
13513     @type groups: dict
13514     @param groups: Group information
13515     @type nodes: list
13516     @param nodes: Node names
13517
13518     """
13519     result = set()
13520
13521     for node in nodes:
13522       try:
13523         group_uuid = node2group[node]
13524       except KeyError:
13525         # Ignore unknown node
13526         pass
13527       else:
13528         try:
13529           group = groups[group_uuid]
13530         except KeyError:
13531           # Can't find group, let's use UUID
13532           group_name = group_uuid
13533         else:
13534           group_name = group["name"]
13535
13536         result.add(group_name)
13537
13538     return sorted(result)
13539
13540
13541 class LUTestAllocator(NoHooksLU):
13542   """Run allocator tests.
13543
13544   This LU runs the allocator tests
13545
13546   """
13547   def CheckPrereq(self):
13548     """Check prerequisites.
13549
13550     This checks the opcode parameters depending on the director and mode test.
13551
13552     """
13553     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13554       for attr in ["memory", "disks", "disk_template",
13555                    "os", "tags", "nics", "vcpus"]:
13556         if not hasattr(self.op, attr):
13557           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13558                                      attr, errors.ECODE_INVAL)
13559       iname = self.cfg.ExpandInstanceName(self.op.name)
13560       if iname is not None:
13561         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13562                                    iname, errors.ECODE_EXISTS)
13563       if not isinstance(self.op.nics, list):
13564         raise errors.OpPrereqError("Invalid parameter 'nics'",
13565                                    errors.ECODE_INVAL)
13566       if not isinstance(self.op.disks, list):
13567         raise errors.OpPrereqError("Invalid parameter 'disks'",
13568                                    errors.ECODE_INVAL)
13569       for row in self.op.disks:
13570         if (not isinstance(row, dict) or
13571             constants.IDISK_SIZE not in row or
13572             not isinstance(row[constants.IDISK_SIZE], int) or
13573             constants.IDISK_MODE not in row or
13574             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13575           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13576                                      " parameter", errors.ECODE_INVAL)
13577       if self.op.hypervisor is None:
13578         self.op.hypervisor = self.cfg.GetHypervisorType()
13579     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13580       fname = _ExpandInstanceName(self.cfg, self.op.name)
13581       self.op.name = fname
13582       self.relocate_from = \
13583           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13584     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13585                           constants.IALLOCATOR_MODE_NODE_EVAC):
13586       if not self.op.instances:
13587         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13588       self.op.instances = _GetWantedInstances(self, self.op.instances)
13589     else:
13590       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13591                                  self.op.mode, errors.ECODE_INVAL)
13592
13593     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13594       if self.op.allocator is None:
13595         raise errors.OpPrereqError("Missing allocator name",
13596                                    errors.ECODE_INVAL)
13597     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13598       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13599                                  self.op.direction, errors.ECODE_INVAL)
13600
13601   def Exec(self, feedback_fn):
13602     """Run the allocator test.
13603
13604     """
13605     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13606       ial = IAllocator(self.cfg, self.rpc,
13607                        mode=self.op.mode,
13608                        name=self.op.name,
13609                        memory=self.op.memory,
13610                        disks=self.op.disks,
13611                        disk_template=self.op.disk_template,
13612                        os=self.op.os,
13613                        tags=self.op.tags,
13614                        nics=self.op.nics,
13615                        vcpus=self.op.vcpus,
13616                        hypervisor=self.op.hypervisor,
13617                        )
13618     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13619       ial = IAllocator(self.cfg, self.rpc,
13620                        mode=self.op.mode,
13621                        name=self.op.name,
13622                        relocate_from=list(self.relocate_from),
13623                        )
13624     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13625       ial = IAllocator(self.cfg, self.rpc,
13626                        mode=self.op.mode,
13627                        instances=self.op.instances,
13628                        target_groups=self.op.target_groups)
13629     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13630       ial = IAllocator(self.cfg, self.rpc,
13631                        mode=self.op.mode,
13632                        instances=self.op.instances,
13633                        evac_mode=self.op.evac_mode)
13634     else:
13635       raise errors.ProgrammerError("Uncatched mode %s in"
13636                                    " LUTestAllocator.Exec", self.op.mode)
13637
13638     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13639       result = ial.in_text
13640     else:
13641       ial.Run(self.op.allocator, validate=False)
13642       result = ial.out_text
13643     return result
13644
13645
13646 #: Query type implementations
13647 _QUERY_IMPL = {
13648   constants.QR_INSTANCE: _InstanceQuery,
13649   constants.QR_NODE: _NodeQuery,
13650   constants.QR_GROUP: _GroupQuery,
13651   constants.QR_OS: _OsQuery,
13652   }
13653
13654 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13655
13656
13657 def _GetQueryImplementation(name):
13658   """Returns the implemtnation for a query type.
13659
13660   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13661
13662   """
13663   try:
13664     return _QUERY_IMPL[name]
13665   except KeyError:
13666     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13667                                errors.ECODE_INVAL)