code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable=W0611
  64
  65
  66 #: Size of DRBD meta block device
  67 DRBD_META_SIZE = 128
  68
  69
  70 class ResultWithJobs:
  71   """Data container for LU results with jobs.
  72
  73   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  74   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  75   contained in the C{jobs} attribute and include the job IDs in the opcode
  76   result.
  77
  78   """
  79   def __init__(self, jobs, **kwargs):
  80     """Initializes this class.
  81
  82     Additional return values can be specified as keyword arguments.
  83
  84     @type jobs: list of lists of L{opcode.OpCode}
  85     @param jobs: A list of lists of opcode objects
  86
  87     """
  88     self.jobs = jobs
  89     self.other = kwargs
  90
  91
  92 class LogicalUnit(object):
  93   """Logical Unit base class.
  94
  95   Subclasses must follow these rules:
  96     - implement ExpandNames
  97     - implement CheckPrereq (except when tasklets are used)
  98     - implement Exec (except when tasklets are used)
  99     - implement BuildHooksEnv
 100     - implement BuildHooksNodes
 101     - redefine HPATH and HTYPE
 102     - optionally redefine their run requirements:
 103         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 104
 105   Note that all commands require root permissions.
 106
 107   @ivar dry_run_result: the value (if any) that will be returned to the caller
 108       in dry-run mode (signalled by opcode dry_run parameter)
 109
 110   """
 111   HPATH = None
 112   HTYPE = None
 113   REQ_BGL = True
 114
 115   def __init__(self, processor, op, context, rpc):
 116     """Constructor for LogicalUnit.
 117
 118     This needs to be overridden in derived classes in order to check op
 119     validity.
 120
 121     """
 122     self.proc = processor
 123     self.op = op
 124     self.cfg = context.cfg
 125     self.glm = context.glm
 126     # readability alias
 127     self.owned_locks = context.glm.list_owned
 128     self.context = context
 129     self.rpc = rpc
 130     # Dicts used to declare locking needs to mcpu
 131     self.needed_locks = None
 132     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 133     self.add_locks = {}
 134     self.remove_locks = {}
 135     # Used to force good behavior when calling helper functions
 136     self.recalculate_locks = {}
 137     # logging
 138     self.Log = processor.Log # pylint: disable=C0103
 139     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 140     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 141     self.LogStep = processor.LogStep # pylint: disable=C0103
 142     # support for dry-run
 143     self.dry_run_result = None
 144     # support for generic debug attribute
 145     if (not hasattr(self.op, "debug_level") or
 146         not isinstance(self.op.debug_level, int)):
 147       self.op.debug_level = 0
 148
 149     # Tasklets
 150     self.tasklets = None
 151
 152     # Validate opcode parameters and set defaults
 153     self.op.Validate(True)
 154
 155     self.CheckArguments()
 156
 157   def CheckArguments(self):
 158     """Check syntactic validity for the opcode arguments.
 159
 160     This method is for doing a simple syntactic check and ensure
 161     validity of opcode parameters, without any cluster-related
 162     checks. While the same can be accomplished in ExpandNames and/or
 163     CheckPrereq, doing these separate is better because:
 164
 165       - ExpandNames is left as as purely a lock-related function
 166       - CheckPrereq is run after we have acquired locks (and possible
 167         waited for them)
 168
 169     The function is allowed to change the self.op attribute so that
 170     later methods can no longer worry about missing parameters.
 171
 172     """
 173     pass
 174
 175   def ExpandNames(self):
 176     """Expand names for this LU.
 177
 178     This method is called before starting to execute the opcode, and it should
 179     update all the parameters of the opcode to their canonical form (e.g. a
 180     short node name must be fully expanded after this method has successfully
 181     completed). This way locking, hooks, logging, etc. can work correctly.
 182
 183     LUs which implement this method must also populate the self.needed_locks
 184     member, as a dict with lock levels as keys, and a list of needed lock names
 185     as values. Rules:
 186
 187       - use an empty dict if you don't need any lock
 188       - if you don't need any lock at a particular level omit that level
 189       - don't put anything for the BGL level
 190       - if you want all locks at a level use locking.ALL_SET as a value
 191
 192     If you need to share locks (rather than acquire them exclusively) at one
 193     level you can modify self.share_locks, setting a true value (usually 1) for
 194     that level. By default locks are not shared.
 195
 196     This function can also define a list of tasklets, which then will be
 197     executed in order instead of the usual LU-level CheckPrereq and Exec
 198     functions, if those are not defined by the LU.
 199
 200     Examples::
 201
 202       # Acquire all nodes and one instance
 203       self.needed_locks = {
 204         locking.LEVEL_NODE: locking.ALL_SET,
 205         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 206       }
 207       # Acquire just two nodes
 208       self.needed_locks = {
 209         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 210       }
 211       # Acquire no locks
 212       self.needed_locks = {} # No, you can't leave it to the default value None
 213
 214     """
 215     # The implementation of this method is mandatory only if the new LU is
 216     # concurrent, so that old LUs don't need to be changed all at the same
 217     # time.
 218     if self.REQ_BGL:
 219       self.needed_locks = {} # Exclusive LUs don't need locks.
 220     else:
 221       raise NotImplementedError
 222
 223   def DeclareLocks(self, level):
 224     """Declare LU locking needs for a level
 225
 226     While most LUs can just declare their locking needs at ExpandNames time,
 227     sometimes there's the need to calculate some locks after having acquired
 228     the ones before. This function is called just before acquiring locks at a
 229     particular level, but after acquiring the ones at lower levels, and permits
 230     such calculations. It can be used to modify self.needed_locks, and by
 231     default it does nothing.
 232
 233     This function is only called if you have something already set in
 234     self.needed_locks for the level.
 235
 236     @param level: Locking level which is going to be locked
 237     @type level: member of ganeti.locking.LEVELS
 238
 239     """
 240
 241   def CheckPrereq(self):
 242     """Check prerequisites for this LU.
 243
 244     This method should check that the prerequisites for the execution
 245     of this LU are fulfilled. It can do internode communication, but
 246     it should be idempotent - no cluster or system changes are
 247     allowed.
 248
 249     The method should raise errors.OpPrereqError in case something is
 250     not fulfilled. Its return value is ignored.
 251
 252     This method should also update all the parameters of the opcode to
 253     their canonical form if it hasn't been done by ExpandNames before.
 254
 255     """
 256     if self.tasklets is not None:
 257       for (idx, tl) in enumerate(self.tasklets):
 258         logging.debug("Checking prerequisites for tasklet %s/%s",
 259                       idx + 1, len(self.tasklets))
 260         tl.CheckPrereq()
 261     else:
 262       pass
 263
 264   def Exec(self, feedback_fn):
 265     """Execute the LU.
 266
 267     This method should implement the actual work. It should raise
 268     errors.OpExecError for failures that are somewhat dealt with in
 269     code, or expected.
 270
 271     """
 272     if self.tasklets is not None:
 273       for (idx, tl) in enumerate(self.tasklets):
 274         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 275         tl.Exec(feedback_fn)
 276     else:
 277       raise NotImplementedError
 278
 279   def BuildHooksEnv(self):
 280     """Build hooks environment for this LU.
 281
 282     @rtype: dict
 283     @return: Dictionary containing the environment that will be used for
 284       running the hooks for this LU. The keys of the dict must not be prefixed
 285       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 286       will extend the environment with additional variables. If no environment
 287       should be defined, an empty dictionary should be returned (not C{None}).
 288     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 289       will not be called.
 290
 291     """
 292     raise NotImplementedError
 293
 294   def BuildHooksNodes(self):
 295     """Build list of nodes to run LU's hooks.
 296
 297     @rtype: tuple; (list, list)
 298     @return: Tuple containing a list of node names on which the hook
 299       should run before the execution and a list of node names on which the
 300       hook should run after the execution. No nodes should be returned as an
 301       empty list (and not None).
 302     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 303       will not be called.
 304
 305     """
 306     raise NotImplementedError
 307
 308   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 309     """Notify the LU about the results of its hooks.
 310
 311     This method is called every time a hooks phase is executed, and notifies
 312     the Logical Unit about the hooks' result. The LU can then use it to alter
 313     its result based on the hooks.  By default the method does nothing and the
 314     previous result is passed back unchanged but any LU can define it if it
 315     wants to use the local cluster hook-scripts somehow.
 316
 317     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 318         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 319     @param hook_results: the results of the multi-node hooks rpc call
 320     @param feedback_fn: function used send feedback back to the caller
 321     @param lu_result: the previous Exec result this LU had, or None
 322         in the PRE phase
 323     @return: the new Exec result, based on the previous result
 324         and hook results
 325
 326     """
 327     # API must be kept, thus we ignore the unused argument and could
 328     # be a function warnings
 329     # pylint: disable=W0613,R0201
 330     return lu_result
 331
 332   def _ExpandAndLockInstance(self):
 333     """Helper function to expand and lock an instance.
 334
 335     Many LUs that work on an instance take its name in self.op.instance_name
 336     and need to expand it and then declare the expanded name for locking. This
 337     function does it, and then updates self.op.instance_name to the expanded
 338     name. It also initializes needed_locks as a dict, if this hasn't been done
 339     before.
 340
 341     """
 342     if self.needed_locks is None:
 343       self.needed_locks = {}
 344     else:
 345       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 346         "_ExpandAndLockInstance called with instance-level locks set"
 347     self.op.instance_name = _ExpandInstanceName(self.cfg,
 348                                                 self.op.instance_name)
 349     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 350
 351   def _LockInstancesNodes(self, primary_only=False):
 352     """Helper function to declare instances' nodes for locking.
 353
 354     This function should be called after locking one or more instances to lock
 355     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 356     with all primary or secondary nodes for instances already locked and
 357     present in self.needed_locks[locking.LEVEL_INSTANCE].
 358
 359     It should be called from DeclareLocks, and for safety only works if
 360     self.recalculate_locks[locking.LEVEL_NODE] is set.
 361
 362     In the future it may grow parameters to just lock some instance's nodes, or
 363     to just lock primaries or secondary nodes, if needed.
 364
 365     If should be called in DeclareLocks in a way similar to::
 366
 367       if level == locking.LEVEL_NODE:
 368         self._LockInstancesNodes()
 369
 370     @type primary_only: boolean
 371     @param primary_only: only lock primary nodes of locked instances
 372
 373     """
 374     assert locking.LEVEL_NODE in self.recalculate_locks, \
 375       "_LockInstancesNodes helper function called with no nodes to recalculate"
 376
 377     # TODO: check if we're really been called with the instance locks held
 378
 379     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 380     # future we might want to have different behaviors depending on the value
 381     # of self.recalculate_locks[locking.LEVEL_NODE]
 382     wanted_nodes = []
 383     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 384     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 385       wanted_nodes.append(instance.primary_node)
 386       if not primary_only:
 387         wanted_nodes.extend(instance.secondary_nodes)
 388
 389     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 390       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 391     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 392       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 393
 394     del self.recalculate_locks[locking.LEVEL_NODE]
 395
 396
 397 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 398   """Simple LU which runs no hooks.
 399
 400   This LU is intended as a parent for other LogicalUnits which will
 401   run no hooks, in order to reduce duplicate code.
 402
 403   """
 404   HPATH = None
 405   HTYPE = None
 406
 407   def BuildHooksEnv(self):
 408     """Empty BuildHooksEnv for NoHooksLu.
 409
 410     This just raises an error.
 411
 412     """
 413     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 414
 415   def BuildHooksNodes(self):
 416     """Empty BuildHooksNodes for NoHooksLU.
 417
 418     """
 419     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 420
 421
 422 class Tasklet:
 423   """Tasklet base class.
 424
 425   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 426   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 427   tasklets know nothing about locks.
 428
 429   Subclasses must follow these rules:
 430     - Implement CheckPrereq
 431     - Implement Exec
 432
 433   """
 434   def __init__(self, lu):
 435     self.lu = lu
 436
 437     # Shortcuts
 438     self.cfg = lu.cfg
 439     self.rpc = lu.rpc
 440
 441   def CheckPrereq(self):
 442     """Check prerequisites for this tasklets.
 443
 444     This method should check whether the prerequisites for the execution of
 445     this tasklet are fulfilled. It can do internode communication, but it
 446     should be idempotent - no cluster or system changes are allowed.
 447
 448     The method should raise errors.OpPrereqError in case something is not
 449     fulfilled. Its return value is ignored.
 450
 451     This method should also update all parameters to their canonical form if it
 452     hasn't been done before.
 453
 454     """
 455     pass
 456
 457   def Exec(self, feedback_fn):
 458     """Execute the tasklet.
 459
 460     This method should implement the actual work. It should raise
 461     errors.OpExecError for failures that are somewhat dealt with in code, or
 462     expected.
 463
 464     """
 465     raise NotImplementedError
 466
 467
 468 class _QueryBase:
 469   """Base for query utility classes.
 470
 471   """
 472   #: Attribute holding field definitions
 473   FIELDS = None
 474
 475   def __init__(self, qfilter, fields, use_locking):
 476     """Initializes this class.
 477
 478     """
 479     self.use_locking = use_locking
 480
 481     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 482                              namefield="name")
 483     self.requested_data = self.query.RequestedData()
 484     self.names = self.query.RequestedNames()
 485
 486     # Sort only if no names were requested
 487     self.sort_by_name = not self.names
 488
 489     self.do_locking = None
 490     self.wanted = None
 491
 492   def _GetNames(self, lu, all_names, lock_level):
 493     """Helper function to determine names asked for in the query.
 494
 495     """
 496     if self.do_locking:
 497       names = lu.owned_locks(lock_level)
 498     else:
 499       names = all_names
 500
 501     if self.wanted == locking.ALL_SET:
 502       assert not self.names
 503       # caller didn't specify names, so ordering is not important
 504       return utils.NiceSort(names)
 505
 506     # caller specified names and we must keep the same order
 507     assert self.names
 508     assert not self.do_locking or lu.glm.is_owned(lock_level)
 509
 510     missing = set(self.wanted).difference(names)
 511     if missing:
 512       raise errors.OpExecError("Some items were removed before retrieving"
 513                                " their data: %s" % missing)
 514
 515     # Return expanded names
 516     return self.wanted
 517
 518   def ExpandNames(self, lu):
 519     """Expand names for this query.
 520
 521     See L{LogicalUnit.ExpandNames}.
 522
 523     """
 524     raise NotImplementedError()
 525
 526   def DeclareLocks(self, lu, level):
 527     """Declare locks for this query.
 528
 529     See L{LogicalUnit.DeclareLocks}.
 530
 531     """
 532     raise NotImplementedError()
 533
 534   def _GetQueryData(self, lu):
 535     """Collects all data for this query.
 536
 537     @return: Query data object
 538
 539     """
 540     raise NotImplementedError()
 541
 542   def NewStyleQuery(self, lu):
 543     """Collect data and execute query.
 544
 545     """
 546     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 547                                   sort_by_name=self.sort_by_name)
 548
 549   def OldStyleQuery(self, lu):
 550     """Collect data and execute query.
 551
 552     """
 553     return self.query.OldStyleQuery(self._GetQueryData(lu),
 554                                     sort_by_name=self.sort_by_name)
 555
 556
 557 def _ShareAll():
 558   """Returns a dict declaring all lock levels shared.
 559
 560   """
 561   return dict.fromkeys(locking.LEVELS, 1)
 562
 563
 564 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 565   """Checks if the owned node groups are still correct for an instance.
 566
 567   @type cfg: L{config.ConfigWriter}
 568   @param cfg: The cluster configuration
 569   @type instance_name: string
 570   @param instance_name: Instance name
 571   @type owned_groups: set or frozenset
 572   @param owned_groups: List of currently owned node groups
 573
 574   """
 575   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 576
 577   if not owned_groups.issuperset(inst_groups):
 578     raise errors.OpPrereqError("Instance %s's node groups changed since"
 579                                " locks were acquired, current groups are"
 580                                " are '%s', owning groups '%s'; retry the"
 581                                " operation" %
 582                                (instance_name,
 583                                 utils.CommaJoin(inst_groups),
 584                                 utils.CommaJoin(owned_groups)),
 585                                errors.ECODE_STATE)
 586
 587   return inst_groups
 588
 589
 590 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 591   """Checks if the instances in a node group are still correct.
 592
 593   @type cfg: L{config.ConfigWriter}
 594   @param cfg: The cluster configuration
 595   @type group_uuid: string
 596   @param group_uuid: Node group UUID
 597   @type owned_instances: set or frozenset
 598   @param owned_instances: List of currently owned instances
 599
 600   """
 601   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 602   if owned_instances != wanted_instances:
 603     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 604                                " locks were acquired, wanted '%s', have '%s';"
 605                                " retry the operation" %
 606                                (group_uuid,
 607                                 utils.CommaJoin(wanted_instances),
 608                                 utils.CommaJoin(owned_instances)),
 609                                errors.ECODE_STATE)
 610
 611   return wanted_instances
 612
 613
 614 def _SupportsOob(cfg, node):
 615   """Tells if node supports OOB.
 616
 617   @type cfg: L{config.ConfigWriter}
 618   @param cfg: The cluster configuration
 619   @type node: L{objects.Node}
 620   @param node: The node
 621   @return: The OOB script if supported or an empty string otherwise
 622
 623   """
 624   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 625
 626
 627 def _GetWantedNodes(lu, nodes):
 628   """Returns list of checked and expanded node names.
 629
 630   @type lu: L{LogicalUnit}
 631   @param lu: the logical unit on whose behalf we execute
 632   @type nodes: list
 633   @param nodes: list of node names or None for all nodes
 634   @rtype: list
 635   @return: the list of nodes, sorted
 636   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 637
 638   """
 639   if nodes:
 640     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 641
 642   return utils.NiceSort(lu.cfg.GetNodeList())
 643
 644
 645 def _GetWantedInstances(lu, instances):
 646   """Returns list of checked and expanded instance names.
 647
 648   @type lu: L{LogicalUnit}
 649   @param lu: the logical unit on whose behalf we execute
 650   @type instances: list
 651   @param instances: list of instance names or None for all instances
 652   @rtype: list
 653   @return: the list of instances, sorted
 654   @raise errors.OpPrereqError: if the instances parameter is wrong type
 655   @raise errors.OpPrereqError: if any of the passed instances is not found
 656
 657   """
 658   if instances:
 659     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 660   else:
 661     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 662   return wanted
 663
 664
 665 def _GetUpdatedParams(old_params, update_dict,
 666                       use_default=True, use_none=False):
 667   """Return the new version of a parameter dictionary.
 668
 669   @type old_params: dict
 670   @param old_params: old parameters
 671   @type update_dict: dict
 672   @param update_dict: dict containing new parameter values, or
 673       constants.VALUE_DEFAULT to reset the parameter to its default
 674       value
 675   @param use_default: boolean
 676   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 677       values as 'to be deleted' values
 678   @param use_none: boolean
 679   @type use_none: whether to recognise C{None} values as 'to be
 680       deleted' values
 681   @rtype: dict
 682   @return: the new parameter dictionary
 683
 684   """
 685   params_copy = copy.deepcopy(old_params)
 686   for key, val in update_dict.iteritems():
 687     if ((use_default and val == constants.VALUE_DEFAULT) or
 688         (use_none and val is None)):
 689       try:
 690         del params_copy[key]
 691       except KeyError:
 692         pass
 693     else:
 694       params_copy[key] = val
 695   return params_copy
 696
 697
 698 def _ReleaseLocks(lu, level, names=None, keep=None):
 699   """Releases locks owned by an LU.
 700
 701   @type lu: L{LogicalUnit}
 702   @param level: Lock level
 703   @type names: list or None
 704   @param names: Names of locks to release
 705   @type keep: list or None
 706   @param keep: Names of locks to retain
 707
 708   """
 709   assert not (keep is not None and names is not None), \
 710          "Only one of the 'names' and the 'keep' parameters can be given"
 711
 712   if names is not None:
 713     should_release = names.__contains__
 714   elif keep:
 715     should_release = lambda name: name not in keep
 716   else:
 717     should_release = None
 718
 719   if should_release:
 720     retain = []
 721     release = []
 722
 723     # Determine which locks to release
 724     for name in lu.owned_locks(level):
 725       if should_release(name):
 726         release.append(name)
 727       else:
 728         retain.append(name)
 729
 730     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 731
 732     # Release just some locks
 733     lu.glm.release(level, names=release)
 734
 735     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 736   else:
 737     # Release everything
 738     lu.glm.release(level)
 739
 740     assert not lu.glm.is_owned(level), "No locks should be owned"
 741
 742
 743 def _MapInstanceDisksToNodes(instances):
 744   """Creates a map from (node, volume) to instance name.
 745
 746   @type instances: list of L{objects.Instance}
 747   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 748
 749   """
 750   return dict(((node, vol), inst.name)
 751               for inst in instances
 752               for (node, vols) in inst.MapLVsByNode().items()
 753               for vol in vols)
 754
 755
 756 def _RunPostHook(lu, node_name):
 757   """Runs the post-hook for an opcode on a single node.
 758
 759   """
 760   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 761   try:
 762     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 763   except:
 764     # pylint: disable=W0702
 765     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 766
 767
 768 def _CheckOutputFields(static, dynamic, selected):
 769   """Checks whether all selected fields are valid.
 770
 771   @type static: L{utils.FieldSet}
 772   @param static: static fields set
 773   @type dynamic: L{utils.FieldSet}
 774   @param dynamic: dynamic fields set
 775
 776   """
 777   f = utils.FieldSet()
 778   f.Extend(static)
 779   f.Extend(dynamic)
 780
 781   delta = f.NonMatching(selected)
 782   if delta:
 783     raise errors.OpPrereqError("Unknown output fields selected: %s"
 784                                % ",".join(delta), errors.ECODE_INVAL)
 785
 786
 787 def _CheckGlobalHvParams(params):
 788   """Validates that given hypervisor params are not global ones.
 789
 790   This will ensure that instances don't get customised versions of
 791   global params.
 792
 793   """
 794   used_globals = constants.HVC_GLOBALS.intersection(params)
 795   if used_globals:
 796     msg = ("The following hypervisor parameters are global and cannot"
 797            " be customized at instance level, please modify them at"
 798            " cluster level: %s" % utils.CommaJoin(used_globals))
 799     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 800
 801
 802 def _CheckNodeOnline(lu, node, msg=None):
 803   """Ensure that a given node is online.
 804
 805   @param lu: the LU on behalf of which we make the check
 806   @param node: the node to check
 807   @param msg: if passed, should be a message to replace the default one
 808   @raise errors.OpPrereqError: if the node is offline
 809
 810   """
 811   if msg is None:
 812     msg = "Can't use offline node"
 813   if lu.cfg.GetNodeInfo(node).offline:
 814     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 815
 816
 817 def _CheckNodeNotDrained(lu, node):
 818   """Ensure that a given node is not drained.
 819
 820   @param lu: the LU on behalf of which we make the check
 821   @param node: the node to check
 822   @raise errors.OpPrereqError: if the node is drained
 823
 824   """
 825   if lu.cfg.GetNodeInfo(node).drained:
 826     raise errors.OpPrereqError("Can't use drained node %s" % node,
 827                                errors.ECODE_STATE)
 828
 829
 830 def _CheckNodeVmCapable(lu, node):
 831   """Ensure that a given node is vm capable.
 832
 833   @param lu: the LU on behalf of which we make the check
 834   @param node: the node to check
 835   @raise errors.OpPrereqError: if the node is not vm capable
 836
 837   """
 838   if not lu.cfg.GetNodeInfo(node).vm_capable:
 839     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 840                                errors.ECODE_STATE)
 841
 842
 843 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 844   """Ensure that a node supports a given OS.
 845
 846   @param lu: the LU on behalf of which we make the check
 847   @param node: the node to check
 848   @param os_name: the OS to query about
 849   @param force_variant: whether to ignore variant errors
 850   @raise errors.OpPrereqError: if the node is not supporting the OS
 851
 852   """
 853   result = lu.rpc.call_os_get(node, os_name)
 854   result.Raise("OS '%s' not in supported OS list for node %s" %
 855                (os_name, node),
 856                prereq=True, ecode=errors.ECODE_INVAL)
 857   if not force_variant:
 858     _CheckOSVariant(result.payload, os_name)
 859
 860
 861 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 862   """Ensure that a node has the given secondary ip.
 863
 864   @type lu: L{LogicalUnit}
 865   @param lu: the LU on behalf of which we make the check
 866   @type node: string
 867   @param node: the node to check
 868   @type secondary_ip: string
 869   @param secondary_ip: the ip to check
 870   @type prereq: boolean
 871   @param prereq: whether to throw a prerequisite or an execute error
 872   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 873   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 874
 875   """
 876   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 877   result.Raise("Failure checking secondary ip on node %s" % node,
 878                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 879   if not result.payload:
 880     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 881            " please fix and re-run this command" % secondary_ip)
 882     if prereq:
 883       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 884     else:
 885       raise errors.OpExecError(msg)
 886
 887
 888 def _GetClusterDomainSecret():
 889   """Reads the cluster domain secret.
 890
 891   """
 892   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 893                                strict=True)
 894
 895
 896 def _CheckInstanceDown(lu, instance, reason):
 897   """Ensure that an instance is not running."""
 898   if instance.admin_up:
 899     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 900                                (instance.name, reason), errors.ECODE_STATE)
 901
 902   pnode = instance.primary_node
 903   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 904   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 905               prereq=True, ecode=errors.ECODE_ENVIRON)
 906
 907   if instance.name in ins_l.payload:
 908     raise errors.OpPrereqError("Instance %s is running, %s" %
 909                                (instance.name, reason), errors.ECODE_STATE)
 910
 911
 912 def _ExpandItemName(fn, name, kind):
 913   """Expand an item name.
 914
 915   @param fn: the function to use for expansion
 916   @param name: requested item name
 917   @param kind: text description ('Node' or 'Instance')
 918   @return: the resolved (full) name
 919   @raise errors.OpPrereqError: if the item is not found
 920
 921   """
 922   full_name = fn(name)
 923   if full_name is None:
 924     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 925                                errors.ECODE_NOENT)
 926   return full_name
 927
 928
 929 def _ExpandNodeName(cfg, name):
 930   """Wrapper over L{_ExpandItemName} for nodes."""
 931   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 932
 933
 934 def _ExpandInstanceName(cfg, name):
 935   """Wrapper over L{_ExpandItemName} for instance."""
 936   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 937
 938
 939 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 940                           memory, vcpus, nics, disk_template, disks,
 941                           bep, hvp, hypervisor_name, tags):
 942   """Builds instance related env variables for hooks
 943
 944   This builds the hook environment from individual variables.
 945
 946   @type name: string
 947   @param name: the name of the instance
 948   @type primary_node: string
 949   @param primary_node: the name of the instance's primary node
 950   @type secondary_nodes: list
 951   @param secondary_nodes: list of secondary nodes as strings
 952   @type os_type: string
 953   @param os_type: the name of the instance's OS
 954   @type status: boolean
 955   @param status: the should_run status of the instance
 956   @type memory: string
 957   @param memory: the memory size of the instance
 958   @type vcpus: string
 959   @param vcpus: the count of VCPUs the instance has
 960   @type nics: list
 961   @param nics: list of tuples (ip, mac, mode, link) representing
 962       the NICs the instance has
 963   @type disk_template: string
 964   @param disk_template: the disk template of the instance
 965   @type disks: list
 966   @param disks: the list of (size, mode) pairs
 967   @type bep: dict
 968   @param bep: the backend parameters for the instance
 969   @type hvp: dict
 970   @param hvp: the hypervisor parameters for the instance
 971   @type hypervisor_name: string
 972   @param hypervisor_name: the hypervisor for the instance
 973   @type tags: list
 974   @param tags: list of instance tags as strings
 975   @rtype: dict
 976   @return: the hook environment for this instance
 977
 978   """
 979   if status:
 980     str_status = "up"
 981   else:
 982     str_status = "down"
 983   env = {
 984     "OP_TARGET": name,
 985     "INSTANCE_NAME": name,
 986     "INSTANCE_PRIMARY": primary_node,
 987     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 988     "INSTANCE_OS_TYPE": os_type,
 989     "INSTANCE_STATUS": str_status,
 990     "INSTANCE_MEMORY": memory,
 991     "INSTANCE_VCPUS": vcpus,
 992     "INSTANCE_DISK_TEMPLATE": disk_template,
 993     "INSTANCE_HYPERVISOR": hypervisor_name,
 994   }
 995
 996   if nics:
 997     nic_count = len(nics)
 998     for idx, (ip, mac, mode, link) in enumerate(nics):
 999       if ip is None:
1000         ip = ""
1001       env["INSTANCE_NIC%d_IP" % idx] = ip
1002       env["INSTANCE_NIC%d_MAC" % idx] = mac
1003       env["INSTANCE_NIC%d_MODE" % idx] = mode
1004       env["INSTANCE_NIC%d_LINK" % idx] = link
1005       if mode == constants.NIC_MODE_BRIDGED:
1006         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1007   else:
1008     nic_count = 0
1009
1010   env["INSTANCE_NIC_COUNT"] = nic_count
1011
1012   if disks:
1013     disk_count = len(disks)
1014     for idx, (size, mode) in enumerate(disks):
1015       env["INSTANCE_DISK%d_SIZE" % idx] = size
1016       env["INSTANCE_DISK%d_MODE" % idx] = mode
1017   else:
1018     disk_count = 0
1019
1020   env["INSTANCE_DISK_COUNT"] = disk_count
1021
1022   if not tags:
1023     tags = []
1024
1025   env["INSTANCE_TAGS"] = " ".join(tags)
1026
1027   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1028     for key, value in source.items():
1029       env["INSTANCE_%s_%s" % (kind, key)] = value
1030
1031   return env
1032
1033
1034 def _NICListToTuple(lu, nics):
1035   """Build a list of nic information tuples.
1036
1037   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1038   value in LUInstanceQueryData.
1039
1040   @type lu:  L{LogicalUnit}
1041   @param lu: the logical unit on whose behalf we execute
1042   @type nics: list of L{objects.NIC}
1043   @param nics: list of nics to convert to hooks tuples
1044
1045   """
1046   hooks_nics = []
1047   cluster = lu.cfg.GetClusterInfo()
1048   for nic in nics:
1049     ip = nic.ip
1050     mac = nic.mac
1051     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1052     mode = filled_params[constants.NIC_MODE]
1053     link = filled_params[constants.NIC_LINK]
1054     hooks_nics.append((ip, mac, mode, link))
1055   return hooks_nics
1056
1057
1058 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1059   """Builds instance related env variables for hooks from an object.
1060
1061   @type lu: L{LogicalUnit}
1062   @param lu: the logical unit on whose behalf we execute
1063   @type instance: L{objects.Instance}
1064   @param instance: the instance for which we should build the
1065       environment
1066   @type override: dict
1067   @param override: dictionary with key/values that will override
1068       our values
1069   @rtype: dict
1070   @return: the hook environment dictionary
1071
1072   """
1073   cluster = lu.cfg.GetClusterInfo()
1074   bep = cluster.FillBE(instance)
1075   hvp = cluster.FillHV(instance)
1076   args = {
1077     "name": instance.name,
1078     "primary_node": instance.primary_node,
1079     "secondary_nodes": instance.secondary_nodes,
1080     "os_type": instance.os,
1081     "status": instance.admin_up,
1082     "memory": bep[constants.BE_MEMORY],
1083     "vcpus": bep[constants.BE_VCPUS],
1084     "nics": _NICListToTuple(lu, instance.nics),
1085     "disk_template": instance.disk_template,
1086     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1087     "bep": bep,
1088     "hvp": hvp,
1089     "hypervisor_name": instance.hypervisor,
1090     "tags": instance.tags,
1091   }
1092   if override:
1093     args.update(override)
1094   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1095
1096
1097 def _AdjustCandidatePool(lu, exceptions):
1098   """Adjust the candidate pool after node operations.
1099
1100   """
1101   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1102   if mod_list:
1103     lu.LogInfo("Promoted nodes to master candidate role: %s",
1104                utils.CommaJoin(node.name for node in mod_list))
1105     for name in mod_list:
1106       lu.context.ReaddNode(name)
1107   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1108   if mc_now > mc_max:
1109     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1110                (mc_now, mc_max))
1111
1112
1113 def _DecideSelfPromotion(lu, exceptions=None):
1114   """Decide whether I should promote myself as a master candidate.
1115
1116   """
1117   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1118   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1119   # the new node will increase mc_max with one, so:
1120   mc_should = min(mc_should + 1, cp_size)
1121   return mc_now < mc_should
1122
1123
1124 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1125   """Check that the brigdes needed by a list of nics exist.
1126
1127   """
1128   cluster = lu.cfg.GetClusterInfo()
1129   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1130   brlist = [params[constants.NIC_LINK] for params in paramslist
1131             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1132   if brlist:
1133     result = lu.rpc.call_bridges_exist(target_node, brlist)
1134     result.Raise("Error checking bridges on destination node '%s'" %
1135                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1136
1137
1138 def _CheckInstanceBridgesExist(lu, instance, node=None):
1139   """Check that the brigdes needed by an instance exist.
1140
1141   """
1142   if node is None:
1143     node = instance.primary_node
1144   _CheckNicsBridgesExist(lu, instance.nics, node)
1145
1146
1147 def _CheckOSVariant(os_obj, name):
1148   """Check whether an OS name conforms to the os variants specification.
1149
1150   @type os_obj: L{objects.OS}
1151   @param os_obj: OS object to check
1152   @type name: string
1153   @param name: OS name passed by the user, to check for validity
1154
1155   """
1156   variant = objects.OS.GetVariant(name)
1157   if not os_obj.supported_variants:
1158     if variant:
1159       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1160                                  " passed)" % (os_obj.name, variant),
1161                                  errors.ECODE_INVAL)
1162     return
1163   if not variant:
1164     raise errors.OpPrereqError("OS name must include a variant",
1165                                errors.ECODE_INVAL)
1166
1167   if variant not in os_obj.supported_variants:
1168     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1169
1170
1171 def _GetNodeInstancesInner(cfg, fn):
1172   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1173
1174
1175 def _GetNodeInstances(cfg, node_name):
1176   """Returns a list of all primary and secondary instances on a node.
1177
1178   """
1179
1180   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1181
1182
1183 def _GetNodePrimaryInstances(cfg, node_name):
1184   """Returns primary instances on a node.
1185
1186   """
1187   return _GetNodeInstancesInner(cfg,
1188                                 lambda inst: node_name == inst.primary_node)
1189
1190
1191 def _GetNodeSecondaryInstances(cfg, node_name):
1192   """Returns secondary instances on a node.
1193
1194   """
1195   return _GetNodeInstancesInner(cfg,
1196                                 lambda inst: node_name in inst.secondary_nodes)
1197
1198
1199 def _GetStorageTypeArgs(cfg, storage_type):
1200   """Returns the arguments for a storage type.
1201
1202   """
1203   # Special case for file storage
1204   if storage_type == constants.ST_FILE:
1205     # storage.FileStorage wants a list of storage directories
1206     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1207
1208   return []
1209
1210
1211 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1212   faulty = []
1213
1214   for dev in instance.disks:
1215     cfg.SetDiskID(dev, node_name)
1216
1217   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1218   result.Raise("Failed to get disk status from node %s" % node_name,
1219                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1220
1221   for idx, bdev_status in enumerate(result.payload):
1222     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1223       faulty.append(idx)
1224
1225   return faulty
1226
1227
1228 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1229   """Check the sanity of iallocator and node arguments and use the
1230   cluster-wide iallocator if appropriate.
1231
1232   Check that at most one of (iallocator, node) is specified. If none is
1233   specified, then the LU's opcode's iallocator slot is filled with the
1234   cluster-wide default iallocator.
1235
1236   @type iallocator_slot: string
1237   @param iallocator_slot: the name of the opcode iallocator slot
1238   @type node_slot: string
1239   @param node_slot: the name of the opcode target node slot
1240
1241   """
1242   node = getattr(lu.op, node_slot, None)
1243   iallocator = getattr(lu.op, iallocator_slot, None)
1244
1245   if node is not None and iallocator is not None:
1246     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1247                                errors.ECODE_INVAL)
1248   elif node is None and iallocator is None:
1249     default_iallocator = lu.cfg.GetDefaultIAllocator()
1250     if default_iallocator:
1251       setattr(lu.op, iallocator_slot, default_iallocator)
1252     else:
1253       raise errors.OpPrereqError("No iallocator or node given and no"
1254                                  " cluster-wide default iallocator found;"
1255                                  " please specify either an iallocator or a"
1256                                  " node, or set a cluster-wide default"
1257                                  " iallocator")
1258
1259
1260 def _GetDefaultIAllocator(cfg, iallocator):
1261   """Decides on which iallocator to use.
1262
1263   @type cfg: L{config.ConfigWriter}
1264   @param cfg: Cluster configuration object
1265   @type iallocator: string or None
1266   @param iallocator: Iallocator specified in opcode
1267   @rtype: string
1268   @return: Iallocator name
1269
1270   """
1271   if not iallocator:
1272     # Use default iallocator
1273     iallocator = cfg.GetDefaultIAllocator()
1274
1275   if not iallocator:
1276     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1277                                " opcode nor as a cluster-wide default",
1278                                errors.ECODE_INVAL)
1279
1280   return iallocator
1281
1282
1283 class LUClusterPostInit(LogicalUnit):
1284   """Logical unit for running hooks after cluster initialization.
1285
1286   """
1287   HPATH = "cluster-init"
1288   HTYPE = constants.HTYPE_CLUSTER
1289
1290   def BuildHooksEnv(self):
1291     """Build hooks env.
1292
1293     """
1294     return {
1295       "OP_TARGET": self.cfg.GetClusterName(),
1296       }
1297
1298   def BuildHooksNodes(self):
1299     """Build hooks nodes.
1300
1301     """
1302     return ([], [self.cfg.GetMasterNode()])
1303
1304   def Exec(self, feedback_fn):
1305     """Nothing to do.
1306
1307     """
1308     return True
1309
1310
1311 class LUClusterDestroy(LogicalUnit):
1312   """Logical unit for destroying the cluster.
1313
1314   """
1315   HPATH = "cluster-destroy"
1316   HTYPE = constants.HTYPE_CLUSTER
1317
1318   def BuildHooksEnv(self):
1319     """Build hooks env.
1320
1321     """
1322     return {
1323       "OP_TARGET": self.cfg.GetClusterName(),
1324       }
1325
1326   def BuildHooksNodes(self):
1327     """Build hooks nodes.
1328
1329     """
1330     return ([], [])
1331
1332   def CheckPrereq(self):
1333     """Check prerequisites.
1334
1335     This checks whether the cluster is empty.
1336
1337     Any errors are signaled by raising errors.OpPrereqError.
1338
1339     """
1340     master = self.cfg.GetMasterNode()
1341
1342     nodelist = self.cfg.GetNodeList()
1343     if len(nodelist) != 1 or nodelist[0] != master:
1344       raise errors.OpPrereqError("There are still %d node(s) in"
1345                                  " this cluster." % (len(nodelist) - 1),
1346                                  errors.ECODE_INVAL)
1347     instancelist = self.cfg.GetInstanceList()
1348     if instancelist:
1349       raise errors.OpPrereqError("There are still %d instance(s) in"
1350                                  " this cluster." % len(instancelist),
1351                                  errors.ECODE_INVAL)
1352
1353   def Exec(self, feedback_fn):
1354     """Destroys the cluster.
1355
1356     """
1357     master = self.cfg.GetMasterNode()
1358
1359     # Run post hooks on master node before it's removed
1360     _RunPostHook(self, master)
1361
1362     result = self.rpc.call_node_deactivate_master_ip(master)
1363     result.Raise("Could not disable the master role")
1364
1365     return master
1366
1367
1368 def _VerifyCertificate(filename):
1369   """Verifies a certificate for L{LUClusterVerifyConfig}.
1370
1371   @type filename: string
1372   @param filename: Path to PEM file
1373
1374   """
1375   try:
1376     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1377                                            utils.ReadFile(filename))
1378   except Exception, err: # pylint: disable=W0703
1379     return (LUClusterVerifyConfig.ETYPE_ERROR,
1380             "Failed to load X509 certificate %s: %s" % (filename, err))
1381
1382   (errcode, msg) = \
1383     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1384                                 constants.SSL_CERT_EXPIRATION_ERROR)
1385
1386   if msg:
1387     fnamemsg = "While verifying %s: %s" % (filename, msg)
1388   else:
1389     fnamemsg = None
1390
1391   if errcode is None:
1392     return (None, fnamemsg)
1393   elif errcode == utils.CERT_WARNING:
1394     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1395   elif errcode == utils.CERT_ERROR:
1396     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1397
1398   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1399
1400
1401 def _GetAllHypervisorParameters(cluster, instances):
1402   """Compute the set of all hypervisor parameters.
1403
1404   @type cluster: L{objects.Cluster}
1405   @param cluster: the cluster object
1406   @param instances: list of L{objects.Instance}
1407   @param instances: additional instances from which to obtain parameters
1408   @rtype: list of (origin, hypervisor, parameters)
1409   @return: a list with all parameters found, indicating the hypervisor they
1410        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1411
1412   """
1413   hvp_data = []
1414
1415   for hv_name in cluster.enabled_hypervisors:
1416     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1417
1418   for os_name, os_hvp in cluster.os_hvp.items():
1419     for hv_name, hv_params in os_hvp.items():
1420       if hv_params:
1421         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1422         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1423
1424   # TODO: collapse identical parameter values in a single one
1425   for instance in instances:
1426     if instance.hvparams:
1427       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1428                        cluster.FillHV(instance)))
1429
1430   return hvp_data
1431
1432
1433 class _VerifyErrors(object):
1434   """Mix-in for cluster/group verify LUs.
1435
1436   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1437   self.op and self._feedback_fn to be available.)
1438
1439   """
1440
1441   ETYPE_FIELD = "code"
1442   ETYPE_ERROR = "ERROR"
1443   ETYPE_WARNING = "WARNING"
1444
1445   def _Error(self, ecode, item, msg, *args, **kwargs):
1446     """Format an error message.
1447
1448     Based on the opcode's error_codes parameter, either format a
1449     parseable error code, or a simpler error string.
1450
1451     This must be called only from Exec and functions called from Exec.
1452
1453     """
1454     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1455     itype, etxt, _ = ecode
1456     # first complete the msg
1457     if args:
1458       msg = msg % args
1459     # then format the whole message
1460     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1461       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1462     else:
1463       if item:
1464         item = " " + item
1465       else:
1466         item = ""
1467       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1468     # and finally report it via the feedback_fn
1469     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1470
1471   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1472     """Log an error message if the passed condition is True.
1473
1474     """
1475     cond = (bool(cond)
1476             or self.op.debug_simulate_errors) # pylint: disable=E1101
1477
1478     # If the error code is in the list of ignored errors, demote the error to a
1479     # warning
1480     (_, etxt, _) = ecode
1481     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1482       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1483
1484     if cond:
1485       self._Error(ecode, *args, **kwargs)
1486
1487     # do not mark the operation as failed for WARN cases only
1488     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1489       self.bad = self.bad or cond
1490
1491
1492 class LUClusterVerify(NoHooksLU):
1493   """Submits all jobs necessary to verify the cluster.
1494
1495   """
1496   REQ_BGL = False
1497
1498   def ExpandNames(self):
1499     self.needed_locks = {}
1500
1501   def Exec(self, feedback_fn):
1502     jobs = []
1503
1504     if self.op.group_name:
1505       groups = [self.op.group_name]
1506       depends_fn = lambda: None
1507     else:
1508       groups = self.cfg.GetNodeGroupList()
1509
1510       # Verify global configuration
1511       jobs.append([
1512         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1513         ])
1514
1515       # Always depend on global verification
1516       depends_fn = lambda: [(-len(jobs), [])]
1517
1518     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1519                                             ignore_errors=self.op.ignore_errors,
1520                                             depends=depends_fn())]
1521                 for group in groups)
1522
1523     # Fix up all parameters
1524     for op in itertools.chain(*jobs): # pylint: disable=W0142
1525       op.debug_simulate_errors = self.op.debug_simulate_errors
1526       op.verbose = self.op.verbose
1527       op.error_codes = self.op.error_codes
1528       try:
1529         op.skip_checks = self.op.skip_checks
1530       except AttributeError:
1531         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1532
1533     return ResultWithJobs(jobs)
1534
1535
1536 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1537   """Verifies the cluster config.
1538
1539   """
1540   REQ_BGL = True
1541
1542   def _VerifyHVP(self, hvp_data):
1543     """Verifies locally the syntax of the hypervisor parameters.
1544
1545     """
1546     for item, hv_name, hv_params in hvp_data:
1547       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1548              (item, hv_name))
1549       try:
1550         hv_class = hypervisor.GetHypervisor(hv_name)
1551         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1552         hv_class.CheckParameterSyntax(hv_params)
1553       except errors.GenericError, err:
1554         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1555
1556   def ExpandNames(self):
1557     # Information can be safely retrieved as the BGL is acquired in exclusive
1558     # mode
1559     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1560     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1561     self.all_node_info = self.cfg.GetAllNodesInfo()
1562     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1563     self.needed_locks = {}
1564
1565   def Exec(self, feedback_fn):
1566     """Verify integrity of cluster, performing various test on nodes.
1567
1568     """
1569     self.bad = False
1570     self._feedback_fn = feedback_fn
1571
1572     feedback_fn("* Verifying cluster config")
1573
1574     for msg in self.cfg.VerifyConfig():
1575       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1576
1577     feedback_fn("* Verifying cluster certificate files")
1578
1579     for cert_filename in constants.ALL_CERT_FILES:
1580       (errcode, msg) = _VerifyCertificate(cert_filename)
1581       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1582
1583     feedback_fn("* Verifying hypervisor parameters")
1584
1585     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1586                                                 self.all_inst_info.values()))
1587
1588     feedback_fn("* Verifying all nodes belong to an existing group")
1589
1590     # We do this verification here because, should this bogus circumstance
1591     # occur, it would never be caught by VerifyGroup, which only acts on
1592     # nodes/instances reachable from existing node groups.
1593
1594     dangling_nodes = set(node.name for node in self.all_node_info.values()
1595                          if node.group not in self.all_group_info)
1596
1597     dangling_instances = {}
1598     no_node_instances = []
1599
1600     for inst in self.all_inst_info.values():
1601       if inst.primary_node in dangling_nodes:
1602         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1603       elif inst.primary_node not in self.all_node_info:
1604         no_node_instances.append(inst.name)
1605
1606     pretty_dangling = [
1607         "%s (%s)" %
1608         (node.name,
1609          utils.CommaJoin(dangling_instances.get(node.name,
1610                                                 ["no instances"])))
1611         for node in dangling_nodes]
1612
1613     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1614                   None,
1615                   "the following nodes (and their instances) belong to a non"
1616                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1617
1618     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1619                   None,
1620                   "the following instances have a non-existing primary-node:"
1621                   " %s", utils.CommaJoin(no_node_instances))
1622
1623     return not self.bad
1624
1625
1626 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1627   """Verifies the status of a node group.
1628
1629   """
1630   HPATH = "cluster-verify"
1631   HTYPE = constants.HTYPE_CLUSTER
1632   REQ_BGL = False
1633
1634   _HOOKS_INDENT_RE = re.compile("^", re.M)
1635
1636   class NodeImage(object):
1637     """A class representing the logical and physical status of a node.
1638
1639     @type name: string
1640     @ivar name: the node name to which this object refers
1641     @ivar volumes: a structure as returned from
1642         L{ganeti.backend.GetVolumeList} (runtime)
1643     @ivar instances: a list of running instances (runtime)
1644     @ivar pinst: list of configured primary instances (config)
1645     @ivar sinst: list of configured secondary instances (config)
1646     @ivar sbp: dictionary of {primary-node: list of instances} for all
1647         instances for which this node is secondary (config)
1648     @ivar mfree: free memory, as reported by hypervisor (runtime)
1649     @ivar dfree: free disk, as reported by the node (runtime)
1650     @ivar offline: the offline status (config)
1651     @type rpc_fail: boolean
1652     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1653         not whether the individual keys were correct) (runtime)
1654     @type lvm_fail: boolean
1655     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1656     @type hyp_fail: boolean
1657     @ivar hyp_fail: whether the RPC call didn't return the instance list
1658     @type ghost: boolean
1659     @ivar ghost: whether this is a known node or not (config)
1660     @type os_fail: boolean
1661     @ivar os_fail: whether the RPC call didn't return valid OS data
1662     @type oslist: list
1663     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1664     @type vm_capable: boolean
1665     @ivar vm_capable: whether the node can host instances
1666
1667     """
1668     def __init__(self, offline=False, name=None, vm_capable=True):
1669       self.name = name
1670       self.volumes = {}
1671       self.instances = []
1672       self.pinst = []
1673       self.sinst = []
1674       self.sbp = {}
1675       self.mfree = 0
1676       self.dfree = 0
1677       self.offline = offline
1678       self.vm_capable = vm_capable
1679       self.rpc_fail = False
1680       self.lvm_fail = False
1681       self.hyp_fail = False
1682       self.ghost = False
1683       self.os_fail = False
1684       self.oslist = {}
1685
1686   def ExpandNames(self):
1687     # This raises errors.OpPrereqError on its own:
1688     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1689
1690     # Get instances in node group; this is unsafe and needs verification later
1691     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1692
1693     self.needed_locks = {
1694       locking.LEVEL_INSTANCE: inst_names,
1695       locking.LEVEL_NODEGROUP: [self.group_uuid],
1696       locking.LEVEL_NODE: [],
1697       }
1698
1699     self.share_locks = _ShareAll()
1700
1701   def DeclareLocks(self, level):
1702     if level == locking.LEVEL_NODE:
1703       # Get members of node group; this is unsafe and needs verification later
1704       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1705
1706       all_inst_info = self.cfg.GetAllInstancesInfo()
1707
1708       # In Exec(), we warn about mirrored instances that have primary and
1709       # secondary living in separate node groups. To fully verify that
1710       # volumes for these instances are healthy, we will need to do an
1711       # extra call to their secondaries. We ensure here those nodes will
1712       # be locked.
1713       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1714         # Important: access only the instances whose lock is owned
1715         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1716           nodes.update(all_inst_info[inst].secondary_nodes)
1717
1718       self.needed_locks[locking.LEVEL_NODE] = nodes
1719
1720   def CheckPrereq(self):
1721     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1722     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1723
1724     group_nodes = set(self.group_info.members)
1725     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1726
1727     unlocked_nodes = \
1728         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1729
1730     unlocked_instances = \
1731         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1732
1733     if unlocked_nodes:
1734       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1735                                  utils.CommaJoin(unlocked_nodes))
1736
1737     if unlocked_instances:
1738       raise errors.OpPrereqError("Missing lock for instances: %s" %
1739                                  utils.CommaJoin(unlocked_instances))
1740
1741     self.all_node_info = self.cfg.GetAllNodesInfo()
1742     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1743
1744     self.my_node_names = utils.NiceSort(group_nodes)
1745     self.my_inst_names = utils.NiceSort(group_instances)
1746
1747     self.my_node_info = dict((name, self.all_node_info[name])
1748                              for name in self.my_node_names)
1749
1750     self.my_inst_info = dict((name, self.all_inst_info[name])
1751                              for name in self.my_inst_names)
1752
1753     # We detect here the nodes that will need the extra RPC calls for verifying
1754     # split LV volumes; they should be locked.
1755     extra_lv_nodes = set()
1756
1757     for inst in self.my_inst_info.values():
1758       if inst.disk_template in constants.DTS_INT_MIRROR:
1759         group = self.my_node_info[inst.primary_node].group
1760         for nname in inst.secondary_nodes:
1761           if self.all_node_info[nname].group != group:
1762             extra_lv_nodes.add(nname)
1763
1764     unlocked_lv_nodes = \
1765         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1766
1767     if unlocked_lv_nodes:
1768       raise errors.OpPrereqError("these nodes could be locked: %s" %
1769                                  utils.CommaJoin(unlocked_lv_nodes))
1770     self.extra_lv_nodes = list(extra_lv_nodes)
1771
1772   def _VerifyNode(self, ninfo, nresult):
1773     """Perform some basic validation on data returned from a node.
1774
1775       - check the result data structure is well formed and has all the
1776         mandatory fields
1777       - check ganeti version
1778
1779     @type ninfo: L{objects.Node}
1780     @param ninfo: the node to check
1781     @param nresult: the results from the node
1782     @rtype: boolean
1783     @return: whether overall this call was successful (and we can expect
1784          reasonable values in the respose)
1785
1786     """
1787     node = ninfo.name
1788     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1789
1790     # main result, nresult should be a non-empty dict
1791     test = not nresult or not isinstance(nresult, dict)
1792     _ErrorIf(test, constants.CV_ENODERPC, node,
1793                   "unable to verify node: no data returned")
1794     if test:
1795       return False
1796
1797     # compares ganeti version
1798     local_version = constants.PROTOCOL_VERSION
1799     remote_version = nresult.get("version", None)
1800     test = not (remote_version and
1801                 isinstance(remote_version, (list, tuple)) and
1802                 len(remote_version) == 2)
1803     _ErrorIf(test, constants.CV_ENODERPC, node,
1804              "connection to node returned invalid data")
1805     if test:
1806       return False
1807
1808     test = local_version != remote_version[0]
1809     _ErrorIf(test, constants.CV_ENODEVERSION, node,
1810              "incompatible protocol versions: master %s,"
1811              " node %s", local_version, remote_version[0])
1812     if test:
1813       return False
1814
1815     # node seems compatible, we can actually try to look into its results
1816
1817     # full package version
1818     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1819                   constants.CV_ENODEVERSION, node,
1820                   "software version mismatch: master %s, node %s",
1821                   constants.RELEASE_VERSION, remote_version[1],
1822                   code=self.ETYPE_WARNING)
1823
1824     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1825     if ninfo.vm_capable and isinstance(hyp_result, dict):
1826       for hv_name, hv_result in hyp_result.iteritems():
1827         test = hv_result is not None
1828         _ErrorIf(test, constants.CV_ENODEHV, node,
1829                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1830
1831     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1832     if ninfo.vm_capable and isinstance(hvp_result, list):
1833       for item, hv_name, hv_result in hvp_result:
1834         _ErrorIf(True, constants.CV_ENODEHV, node,
1835                  "hypervisor %s parameter verify failure (source %s): %s",
1836                  hv_name, item, hv_result)
1837
1838     test = nresult.get(constants.NV_NODESETUP,
1839                        ["Missing NODESETUP results"])
1840     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1841              "; ".join(test))
1842
1843     return True
1844
1845   def _VerifyNodeTime(self, ninfo, nresult,
1846                       nvinfo_starttime, nvinfo_endtime):
1847     """Check the node time.
1848
1849     @type ninfo: L{objects.Node}
1850     @param ninfo: the node to check
1851     @param nresult: the remote results for the node
1852     @param nvinfo_starttime: the start time of the RPC call
1853     @param nvinfo_endtime: the end time of the RPC call
1854
1855     """
1856     node = ninfo.name
1857     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1858
1859     ntime = nresult.get(constants.NV_TIME, None)
1860     try:
1861       ntime_merged = utils.MergeTime(ntime)
1862     except (ValueError, TypeError):
1863       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1864       return
1865
1866     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1867       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1868     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1869       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1870     else:
1871       ntime_diff = None
1872
1873     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1874              "Node time diverges by at least %s from master node time",
1875              ntime_diff)
1876
1877   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1878     """Check the node LVM results.
1879
1880     @type ninfo: L{objects.Node}
1881     @param ninfo: the node to check
1882     @param nresult: the remote results for the node
1883     @param vg_name: the configured VG name
1884
1885     """
1886     if vg_name is None:
1887       return
1888
1889     node = ninfo.name
1890     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1891
1892     # checks vg existence and size > 20G
1893     vglist = nresult.get(constants.NV_VGLIST, None)
1894     test = not vglist
1895     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1896     if not test:
1897       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1898                                             constants.MIN_VG_SIZE)
1899       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1900
1901     # check pv names
1902     pvlist = nresult.get(constants.NV_PVLIST, None)
1903     test = pvlist is None
1904     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1905     if not test:
1906       # check that ':' is not present in PV names, since it's a
1907       # special character for lvcreate (denotes the range of PEs to
1908       # use on the PV)
1909       for _, pvname, owner_vg in pvlist:
1910         test = ":" in pvname
1911         _ErrorIf(test, constants.CV_ENODELVM, node,
1912                  "Invalid character ':' in PV '%s' of VG '%s'",
1913                  pvname, owner_vg)
1914
1915   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1916     """Check the node bridges.
1917
1918     @type ninfo: L{objects.Node}
1919     @param ninfo: the node to check
1920     @param nresult: the remote results for the node
1921     @param bridges: the expected list of bridges
1922
1923     """
1924     if not bridges:
1925       return
1926
1927     node = ninfo.name
1928     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1929
1930     missing = nresult.get(constants.NV_BRIDGES, None)
1931     test = not isinstance(missing, list)
1932     _ErrorIf(test, constants.CV_ENODENET, node,
1933              "did not return valid bridge information")
1934     if not test:
1935       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1936                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1937
1938   def _VerifyNodeNetwork(self, ninfo, nresult):
1939     """Check the node network connectivity results.
1940
1941     @type ninfo: L{objects.Node}
1942     @param ninfo: the node to check
1943     @param nresult: the remote results for the node
1944
1945     """
1946     node = ninfo.name
1947     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1948
1949     test = constants.NV_NODELIST not in nresult
1950     _ErrorIf(test, constants.CV_ENODESSH, node,
1951              "node hasn't returned node ssh connectivity data")
1952     if not test:
1953       if nresult[constants.NV_NODELIST]:
1954         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1955           _ErrorIf(True, constants.CV_ENODESSH, node,
1956                    "ssh communication with node '%s': %s", a_node, a_msg)
1957
1958     test = constants.NV_NODENETTEST not in nresult
1959     _ErrorIf(test, constants.CV_ENODENET, node,
1960              "node hasn't returned node tcp connectivity data")
1961     if not test:
1962       if nresult[constants.NV_NODENETTEST]:
1963         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1964         for anode in nlist:
1965           _ErrorIf(True, constants.CV_ENODENET, node,
1966                    "tcp communication with node '%s': %s",
1967                    anode, nresult[constants.NV_NODENETTEST][anode])
1968
1969     test = constants.NV_MASTERIP not in nresult
1970     _ErrorIf(test, constants.CV_ENODENET, node,
1971              "node hasn't returned node master IP reachability data")
1972     if not test:
1973       if not nresult[constants.NV_MASTERIP]:
1974         if node == self.master_node:
1975           msg = "the master node cannot reach the master IP (not configured?)"
1976         else:
1977           msg = "cannot reach the master IP"
1978         _ErrorIf(True, constants.CV_ENODENET, node, msg)
1979
1980   def _VerifyInstance(self, instance, instanceconfig, node_image,
1981                       diskstatus):
1982     """Verify an instance.
1983
1984     This function checks to see if the required block devices are
1985     available on the instance's node.
1986
1987     """
1988     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1989     node_current = instanceconfig.primary_node
1990
1991     node_vol_should = {}
1992     instanceconfig.MapLVsByNode(node_vol_should)
1993
1994     for node in node_vol_should:
1995       n_img = node_image[node]
1996       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1997         # ignore missing volumes on offline or broken nodes
1998         continue
1999       for volume in node_vol_should[node]:
2000         test = volume not in n_img.volumes
2001         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2002                  "volume %s missing on node %s", volume, node)
2003
2004     if instanceconfig.admin_up:
2005       pri_img = node_image[node_current]
2006       test = instance not in pri_img.instances and not pri_img.offline
2007       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2008                "instance not running on its primary node %s",
2009                node_current)
2010
2011     diskdata = [(nname, success, status, idx)
2012                 for (nname, disks) in diskstatus.items()
2013                 for idx, (success, status) in enumerate(disks)]
2014
2015     for nname, success, bdev_status, idx in diskdata:
2016       # the 'ghost node' construction in Exec() ensures that we have a
2017       # node here
2018       snode = node_image[nname]
2019       bad_snode = snode.ghost or snode.offline
2020       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2021                constants.CV_EINSTANCEFAULTYDISK, instance,
2022                "couldn't retrieve status for disk/%s on %s: %s",
2023                idx, nname, bdev_status)
2024       _ErrorIf((instanceconfig.admin_up and success and
2025                 bdev_status.ldisk_status == constants.LDS_FAULTY),
2026                constants.CV_EINSTANCEFAULTYDISK, instance,
2027                "disk/%s on %s is faulty", idx, nname)
2028
2029   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2030     """Verify if there are any unknown volumes in the cluster.
2031
2032     The .os, .swap and backup volumes are ignored. All other volumes are
2033     reported as unknown.
2034
2035     @type reserved: L{ganeti.utils.FieldSet}
2036     @param reserved: a FieldSet of reserved volume names
2037
2038     """
2039     for node, n_img in node_image.items():
2040       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2041         # skip non-healthy nodes
2042         continue
2043       for volume in n_img.volumes:
2044         test = ((node not in node_vol_should or
2045                 volume not in node_vol_should[node]) and
2046                 not reserved.Matches(volume))
2047         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2048                       "volume %s is unknown", volume)
2049
2050   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2051     """Verify N+1 Memory Resilience.
2052
2053     Check that if one single node dies we can still start all the
2054     instances it was primary for.
2055
2056     """
2057     cluster_info = self.cfg.GetClusterInfo()
2058     for node, n_img in node_image.items():
2059       # This code checks that every node which is now listed as
2060       # secondary has enough memory to host all instances it is
2061       # supposed to should a single other node in the cluster fail.
2062       # FIXME: not ready for failover to an arbitrary node
2063       # FIXME: does not support file-backed instances
2064       # WARNING: we currently take into account down instances as well
2065       # as up ones, considering that even if they're down someone
2066       # might want to start them even in the event of a node failure.
2067       if n_img.offline:
2068         # we're skipping offline nodes from the N+1 warning, since
2069         # most likely we don't have good memory infromation from them;
2070         # we already list instances living on such nodes, and that's
2071         # enough warning
2072         continue
2073       for prinode, instances in n_img.sbp.items():
2074         needed_mem = 0
2075         for instance in instances:
2076           bep = cluster_info.FillBE(instance_cfg[instance])
2077           if bep[constants.BE_AUTO_BALANCE]:
2078             needed_mem += bep[constants.BE_MEMORY]
2079         test = n_img.mfree < needed_mem
2080         self._ErrorIf(test, constants.CV_ENODEN1, node,
2081                       "not enough memory to accomodate instance failovers"
2082                       " should node %s fail (%dMiB needed, %dMiB available)",
2083                       prinode, needed_mem, n_img.mfree)
2084
2085   @classmethod
2086   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2087                    (files_all, files_opt, files_mc, files_vm)):
2088     """Verifies file checksums collected from all nodes.
2089
2090     @param errorif: Callback for reporting errors
2091     @param nodeinfo: List of L{objects.Node} objects
2092     @param master_node: Name of master node
2093     @param all_nvinfo: RPC results
2094
2095     """
2096     # Define functions determining which nodes to consider for a file
2097     files2nodefn = [
2098       (files_all, None),
2099       (files_mc, lambda node: (node.master_candidate or
2100                                node.name == master_node)),
2101       (files_vm, lambda node: node.vm_capable),
2102       ]
2103
2104     # Build mapping from filename to list of nodes which should have the file
2105     nodefiles = {}
2106     for (files, fn) in files2nodefn:
2107       if fn is None:
2108         filenodes = nodeinfo
2109       else:
2110         filenodes = filter(fn, nodeinfo)
2111       nodefiles.update((filename,
2112                         frozenset(map(operator.attrgetter("name"), filenodes)))
2113                        for filename in files)
2114
2115     assert set(nodefiles) == (files_all | files_mc | files_vm)
2116
2117     fileinfo = dict((filename, {}) for filename in nodefiles)
2118     ignore_nodes = set()
2119
2120     for node in nodeinfo:
2121       if node.offline:
2122         ignore_nodes.add(node.name)
2123         continue
2124
2125       nresult = all_nvinfo[node.name]
2126
2127       if nresult.fail_msg or not nresult.payload:
2128         node_files = None
2129       else:
2130         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2131
2132       test = not (node_files and isinstance(node_files, dict))
2133       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2134               "Node did not return file checksum data")
2135       if test:
2136         ignore_nodes.add(node.name)
2137         continue
2138
2139       # Build per-checksum mapping from filename to nodes having it
2140       for (filename, checksum) in node_files.items():
2141         assert filename in nodefiles
2142         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2143
2144     for (filename, checksums) in fileinfo.items():
2145       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2146
2147       # Nodes having the file
2148       with_file = frozenset(node_name
2149                             for nodes in fileinfo[filename].values()
2150                             for node_name in nodes) - ignore_nodes
2151
2152       expected_nodes = nodefiles[filename] - ignore_nodes
2153
2154       # Nodes missing file
2155       missing_file = expected_nodes - with_file
2156
2157       if filename in files_opt:
2158         # All or no nodes
2159         errorif(missing_file and missing_file != expected_nodes,
2160                 constants.CV_ECLUSTERFILECHECK, None,
2161                 "File %s is optional, but it must exist on all or no"
2162                 " nodes (not found on %s)",
2163                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2164       else:
2165         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2166                 "File %s is missing from node(s) %s", filename,
2167                 utils.CommaJoin(utils.NiceSort(missing_file)))
2168
2169         # Warn if a node has a file it shouldn't
2170         unexpected = with_file - expected_nodes
2171         errorif(unexpected,
2172                 constants.CV_ECLUSTERFILECHECK, None,
2173                 "File %s should not exist on node(s) %s",
2174                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2175
2176       # See if there are multiple versions of the file
2177       test = len(checksums) > 1
2178       if test:
2179         variants = ["variant %s on %s" %
2180                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2181                     for (idx, (checksum, nodes)) in
2182                       enumerate(sorted(checksums.items()))]
2183       else:
2184         variants = []
2185
2186       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2187               "File %s found with %s different checksums (%s)",
2188               filename, len(checksums), "; ".join(variants))
2189
2190   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2191                       drbd_map):
2192     """Verifies and the node DRBD status.
2193
2194     @type ninfo: L{objects.Node}
2195     @param ninfo: the node to check
2196     @param nresult: the remote results for the node
2197     @param instanceinfo: the dict of instances
2198     @param drbd_helper: the configured DRBD usermode helper
2199     @param drbd_map: the DRBD map as returned by
2200         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2201
2202     """
2203     node = ninfo.name
2204     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2205
2206     if drbd_helper:
2207       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2208       test = (helper_result == None)
2209       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2210                "no drbd usermode helper returned")
2211       if helper_result:
2212         status, payload = helper_result
2213         test = not status
2214         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2215                  "drbd usermode helper check unsuccessful: %s", payload)
2216         test = status and (payload != drbd_helper)
2217         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2218                  "wrong drbd usermode helper: %s", payload)
2219
2220     # compute the DRBD minors
2221     node_drbd = {}
2222     for minor, instance in drbd_map[node].items():
2223       test = instance not in instanceinfo
2224       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2225                "ghost instance '%s' in temporary DRBD map", instance)
2226         # ghost instance should not be running, but otherwise we
2227         # don't give double warnings (both ghost instance and
2228         # unallocated minor in use)
2229       if test:
2230         node_drbd[minor] = (instance, False)
2231       else:
2232         instance = instanceinfo[instance]
2233         node_drbd[minor] = (instance.name, instance.admin_up)
2234
2235     # and now check them
2236     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2237     test = not isinstance(used_minors, (tuple, list))
2238     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2239              "cannot parse drbd status file: %s", str(used_minors))
2240     if test:
2241       # we cannot check drbd status
2242       return
2243
2244     for minor, (iname, must_exist) in node_drbd.items():
2245       test = minor not in used_minors and must_exist
2246       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2247                "drbd minor %d of instance %s is not active", minor, iname)
2248     for minor in used_minors:
2249       test = minor not in node_drbd
2250       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2251                "unallocated drbd minor %d is in use", minor)
2252
2253   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2254     """Builds the node OS structures.
2255
2256     @type ninfo: L{objects.Node}
2257     @param ninfo: the node to check
2258     @param nresult: the remote results for the node
2259     @param nimg: the node image object
2260
2261     """
2262     node = ninfo.name
2263     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2264
2265     remote_os = nresult.get(constants.NV_OSLIST, None)
2266     test = (not isinstance(remote_os, list) or
2267             not compat.all(isinstance(v, list) and len(v) == 7
2268                            for v in remote_os))
2269
2270     _ErrorIf(test, constants.CV_ENODEOS, node,
2271              "node hasn't returned valid OS data")
2272
2273     nimg.os_fail = test
2274
2275     if test:
2276       return
2277
2278     os_dict = {}
2279
2280     for (name, os_path, status, diagnose,
2281          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2282
2283       if name not in os_dict:
2284         os_dict[name] = []
2285
2286       # parameters is a list of lists instead of list of tuples due to
2287       # JSON lacking a real tuple type, fix it:
2288       parameters = [tuple(v) for v in parameters]
2289       os_dict[name].append((os_path, status, diagnose,
2290                             set(variants), set(parameters), set(api_ver)))
2291
2292     nimg.oslist = os_dict
2293
2294   def _VerifyNodeOS(self, ninfo, nimg, base):
2295     """Verifies the node OS list.
2296
2297     @type ninfo: L{objects.Node}
2298     @param ninfo: the node to check
2299     @param nimg: the node image object
2300     @param base: the 'template' node we match against (e.g. from the master)
2301
2302     """
2303     node = ninfo.name
2304     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2305
2306     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2307
2308     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2309     for os_name, os_data in nimg.oslist.items():
2310       assert os_data, "Empty OS status for OS %s?!" % os_name
2311       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2312       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2313                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2314       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2315                "OS '%s' has multiple entries (first one shadows the rest): %s",
2316                os_name, utils.CommaJoin([v[0] for v in os_data]))
2317       # comparisons with the 'base' image
2318       test = os_name not in base.oslist
2319       _ErrorIf(test, constants.CV_ENODEOS, node,
2320                "Extra OS %s not present on reference node (%s)",
2321                os_name, base.name)
2322       if test:
2323         continue
2324       assert base.oslist[os_name], "Base node has empty OS status?"
2325       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2326       if not b_status:
2327         # base OS is invalid, skipping
2328         continue
2329       for kind, a, b in [("API version", f_api, b_api),
2330                          ("variants list", f_var, b_var),
2331                          ("parameters", beautify_params(f_param),
2332                           beautify_params(b_param))]:
2333         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2334                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2335                  kind, os_name, base.name,
2336                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2337
2338     # check any missing OSes
2339     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2340     _ErrorIf(missing, constants.CV_ENODEOS, node,
2341              "OSes present on reference node %s but missing on this node: %s",
2342              base.name, utils.CommaJoin(missing))
2343
2344   def _VerifyOob(self, ninfo, nresult):
2345     """Verifies out of band functionality of a node.
2346
2347     @type ninfo: L{objects.Node}
2348     @param ninfo: the node to check
2349     @param nresult: the remote results for the node
2350
2351     """
2352     node = ninfo.name
2353     # We just have to verify the paths on master and/or master candidates
2354     # as the oob helper is invoked on the master
2355     if ((ninfo.master_candidate or ninfo.master_capable) and
2356         constants.NV_OOB_PATHS in nresult):
2357       for path_result in nresult[constants.NV_OOB_PATHS]:
2358         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2359
2360   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2361     """Verifies and updates the node volume data.
2362
2363     This function will update a L{NodeImage}'s internal structures
2364     with data from the remote call.
2365
2366     @type ninfo: L{objects.Node}
2367     @param ninfo: the node to check
2368     @param nresult: the remote results for the node
2369     @param nimg: the node image object
2370     @param vg_name: the configured VG name
2371
2372     """
2373     node = ninfo.name
2374     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2375
2376     nimg.lvm_fail = True
2377     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2378     if vg_name is None:
2379       pass
2380     elif isinstance(lvdata, basestring):
2381       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2382                utils.SafeEncode(lvdata))
2383     elif not isinstance(lvdata, dict):
2384       _ErrorIf(True, constants.CV_ENODELVM, node,
2385                "rpc call to node failed (lvlist)")
2386     else:
2387       nimg.volumes = lvdata
2388       nimg.lvm_fail = False
2389
2390   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2391     """Verifies and updates the node instance list.
2392
2393     If the listing was successful, then updates this node's instance
2394     list. Otherwise, it marks the RPC call as failed for the instance
2395     list key.
2396
2397     @type ninfo: L{objects.Node}
2398     @param ninfo: the node to check
2399     @param nresult: the remote results for the node
2400     @param nimg: the node image object
2401
2402     """
2403     idata = nresult.get(constants.NV_INSTANCELIST, None)
2404     test = not isinstance(idata, list)
2405     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2406                   "rpc call to node failed (instancelist): %s",
2407                   utils.SafeEncode(str(idata)))
2408     if test:
2409       nimg.hyp_fail = True
2410     else:
2411       nimg.instances = idata
2412
2413   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2414     """Verifies and computes a node information map
2415
2416     @type ninfo: L{objects.Node}
2417     @param ninfo: the node to check
2418     @param nresult: the remote results for the node
2419     @param nimg: the node image object
2420     @param vg_name: the configured VG name
2421
2422     """
2423     node = ninfo.name
2424     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2425
2426     # try to read free memory (from the hypervisor)
2427     hv_info = nresult.get(constants.NV_HVINFO, None)
2428     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2429     _ErrorIf(test, constants.CV_ENODEHV, node,
2430              "rpc call to node failed (hvinfo)")
2431     if not test:
2432       try:
2433         nimg.mfree = int(hv_info["memory_free"])
2434       except (ValueError, TypeError):
2435         _ErrorIf(True, constants.CV_ENODERPC, node,
2436                  "node returned invalid nodeinfo, check hypervisor")
2437
2438     # FIXME: devise a free space model for file based instances as well
2439     if vg_name is not None:
2440       test = (constants.NV_VGLIST not in nresult or
2441               vg_name not in nresult[constants.NV_VGLIST])
2442       _ErrorIf(test, constants.CV_ENODELVM, node,
2443                "node didn't return data for the volume group '%s'"
2444                " - it is either missing or broken", vg_name)
2445       if not test:
2446         try:
2447           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2448         except (ValueError, TypeError):
2449           _ErrorIf(True, constants.CV_ENODERPC, node,
2450                    "node returned invalid LVM info, check LVM status")
2451
2452   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2453     """Gets per-disk status information for all instances.
2454
2455     @type nodelist: list of strings
2456     @param nodelist: Node names
2457     @type node_image: dict of (name, L{objects.Node})
2458     @param node_image: Node objects
2459     @type instanceinfo: dict of (name, L{objects.Instance})
2460     @param instanceinfo: Instance objects
2461     @rtype: {instance: {node: [(succes, payload)]}}
2462     @return: a dictionary of per-instance dictionaries with nodes as
2463         keys and disk information as values; the disk information is a
2464         list of tuples (success, payload)
2465
2466     """
2467     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2468
2469     node_disks = {}
2470     node_disks_devonly = {}
2471     diskless_instances = set()
2472     diskless = constants.DT_DISKLESS
2473
2474     for nname in nodelist:
2475       node_instances = list(itertools.chain(node_image[nname].pinst,
2476                                             node_image[nname].sinst))
2477       diskless_instances.update(inst for inst in node_instances
2478                                 if instanceinfo[inst].disk_template == diskless)
2479       disks = [(inst, disk)
2480                for inst in node_instances
2481                for disk in instanceinfo[inst].disks]
2482
2483       if not disks:
2484         # No need to collect data
2485         continue
2486
2487       node_disks[nname] = disks
2488
2489       # Creating copies as SetDiskID below will modify the objects and that can
2490       # lead to incorrect data returned from nodes
2491       devonly = [dev.Copy() for (_, dev) in disks]
2492
2493       for dev in devonly:
2494         self.cfg.SetDiskID(dev, nname)
2495
2496       node_disks_devonly[nname] = devonly
2497
2498     assert len(node_disks) == len(node_disks_devonly)
2499
2500     # Collect data from all nodes with disks
2501     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2502                                                           node_disks_devonly)
2503
2504     assert len(result) == len(node_disks)
2505
2506     instdisk = {}
2507
2508     for (nname, nres) in result.items():
2509       disks = node_disks[nname]
2510
2511       if nres.offline:
2512         # No data from this node
2513         data = len(disks) * [(False, "node offline")]
2514       else:
2515         msg = nres.fail_msg
2516         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2517                  "while getting disk information: %s", msg)
2518         if msg:
2519           # No data from this node
2520           data = len(disks) * [(False, msg)]
2521         else:
2522           data = []
2523           for idx, i in enumerate(nres.payload):
2524             if isinstance(i, (tuple, list)) and len(i) == 2:
2525               data.append(i)
2526             else:
2527               logging.warning("Invalid result from node %s, entry %d: %s",
2528                               nname, idx, i)
2529               data.append((False, "Invalid result from the remote node"))
2530
2531       for ((inst, _), status) in zip(disks, data):
2532         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2533
2534     # Add empty entries for diskless instances.
2535     for inst in diskless_instances:
2536       assert inst not in instdisk
2537       instdisk[inst] = {}
2538
2539     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2540                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2541                       compat.all(isinstance(s, (tuple, list)) and
2542                                  len(s) == 2 for s in statuses)
2543                       for inst, nnames in instdisk.items()
2544                       for nname, statuses in nnames.items())
2545     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2546
2547     return instdisk
2548
2549   @staticmethod
2550   def _SshNodeSelector(group_uuid, all_nodes):
2551     """Create endless iterators for all potential SSH check hosts.
2552
2553     """
2554     nodes = [node for node in all_nodes
2555              if (node.group != group_uuid and
2556                  not node.offline)]
2557     keyfunc = operator.attrgetter("group")
2558
2559     return map(itertools.cycle,
2560                [sorted(map(operator.attrgetter("name"), names))
2561                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2562                                                   keyfunc)])
2563
2564   @classmethod
2565   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2566     """Choose which nodes should talk to which other nodes.
2567
2568     We will make nodes contact all nodes in their group, and one node from
2569     every other group.
2570
2571     @warning: This algorithm has a known issue if one node group is much
2572       smaller than others (e.g. just one node). In such a case all other
2573       nodes will talk to the single node.
2574
2575     """
2576     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2577     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2578
2579     return (online_nodes,
2580             dict((name, sorted([i.next() for i in sel]))
2581                  for name in online_nodes))
2582
2583   def BuildHooksEnv(self):
2584     """Build hooks env.
2585
2586     Cluster-Verify hooks just ran in the post phase and their failure makes
2587     the output be logged in the verify output and the verification to fail.
2588
2589     """
2590     env = {
2591       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2592       }
2593
2594     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2595                for node in self.my_node_info.values())
2596
2597     return env
2598
2599   def BuildHooksNodes(self):
2600     """Build hooks nodes.
2601
2602     """
2603     return ([], self.my_node_names)
2604
2605   def Exec(self, feedback_fn):
2606     """Verify integrity of the node group, performing various test on nodes.
2607
2608     """
2609     # This method has too many local variables. pylint: disable=R0914
2610     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2611
2612     if not self.my_node_names:
2613       # empty node group
2614       feedback_fn("* Empty node group, skipping verification")
2615       return True
2616
2617     self.bad = False
2618     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619     verbose = self.op.verbose
2620     self._feedback_fn = feedback_fn
2621
2622     vg_name = self.cfg.GetVGName()
2623     drbd_helper = self.cfg.GetDRBDHelper()
2624     cluster = self.cfg.GetClusterInfo()
2625     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2626     hypervisors = cluster.enabled_hypervisors
2627     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2628
2629     i_non_redundant = [] # Non redundant instances
2630     i_non_a_balanced = [] # Non auto-balanced instances
2631     n_offline = 0 # Count of offline nodes
2632     n_drained = 0 # Count of nodes being drained
2633     node_vol_should = {}
2634
2635     # FIXME: verify OS list
2636
2637     # File verification
2638     filemap = _ComputeAncillaryFiles(cluster, False)
2639
2640     # do local checksums
2641     master_node = self.master_node = self.cfg.GetMasterNode()
2642     master_ip = self.cfg.GetMasterIP()
2643
2644     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2645
2646     node_verify_param = {
2647       constants.NV_FILELIST:
2648         utils.UniqueSequence(filename
2649                              for files in filemap
2650                              for filename in files),
2651       constants.NV_NODELIST:
2652         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2653                                   self.all_node_info.values()),
2654       constants.NV_HYPERVISOR: hypervisors,
2655       constants.NV_HVPARAMS:
2656         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2657       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2658                                  for node in node_data_list
2659                                  if not node.offline],
2660       constants.NV_INSTANCELIST: hypervisors,
2661       constants.NV_VERSION: None,
2662       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2663       constants.NV_NODESETUP: None,
2664       constants.NV_TIME: None,
2665       constants.NV_MASTERIP: (master_node, master_ip),
2666       constants.NV_OSLIST: None,
2667       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2668       }
2669
2670     if vg_name is not None:
2671       node_verify_param[constants.NV_VGLIST] = None
2672       node_verify_param[constants.NV_LVLIST] = vg_name
2673       node_verify_param[constants.NV_PVLIST] = [vg_name]
2674       node_verify_param[constants.NV_DRBDLIST] = None
2675
2676     if drbd_helper:
2677       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2678
2679     # bridge checks
2680     # FIXME: this needs to be changed per node-group, not cluster-wide
2681     bridges = set()
2682     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2683     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2684       bridges.add(default_nicpp[constants.NIC_LINK])
2685     for instance in self.my_inst_info.values():
2686       for nic in instance.nics:
2687         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2688         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2689           bridges.add(full_nic[constants.NIC_LINK])
2690
2691     if bridges:
2692       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2693
2694     # Build our expected cluster state
2695     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2696                                                  name=node.name,
2697                                                  vm_capable=node.vm_capable))
2698                       for node in node_data_list)
2699
2700     # Gather OOB paths
2701     oob_paths = []
2702     for node in self.all_node_info.values():
2703       path = _SupportsOob(self.cfg, node)
2704       if path and path not in oob_paths:
2705         oob_paths.append(path)
2706
2707     if oob_paths:
2708       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2709
2710     for instance in self.my_inst_names:
2711       inst_config = self.my_inst_info[instance]
2712
2713       for nname in inst_config.all_nodes:
2714         if nname not in node_image:
2715           gnode = self.NodeImage(name=nname)
2716           gnode.ghost = (nname not in self.all_node_info)
2717           node_image[nname] = gnode
2718
2719       inst_config.MapLVsByNode(node_vol_should)
2720
2721       pnode = inst_config.primary_node
2722       node_image[pnode].pinst.append(instance)
2723
2724       for snode in inst_config.secondary_nodes:
2725         nimg = node_image[snode]
2726         nimg.sinst.append(instance)
2727         if pnode not in nimg.sbp:
2728           nimg.sbp[pnode] = []
2729         nimg.sbp[pnode].append(instance)
2730
2731     # At this point, we have the in-memory data structures complete,
2732     # except for the runtime information, which we'll gather next
2733
2734     # Due to the way our RPC system works, exact response times cannot be
2735     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2736     # time before and after executing the request, we can at least have a time
2737     # window.
2738     nvinfo_starttime = time.time()
2739     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2740                                            node_verify_param,
2741                                            self.cfg.GetClusterName())
2742     nvinfo_endtime = time.time()
2743
2744     if self.extra_lv_nodes and vg_name is not None:
2745       extra_lv_nvinfo = \
2746           self.rpc.call_node_verify(self.extra_lv_nodes,
2747                                     {constants.NV_LVLIST: vg_name},
2748                                     self.cfg.GetClusterName())
2749     else:
2750       extra_lv_nvinfo = {}
2751
2752     all_drbd_map = self.cfg.ComputeDRBDMap()
2753
2754     feedback_fn("* Gathering disk information (%s nodes)" %
2755                 len(self.my_node_names))
2756     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2757                                      self.my_inst_info)
2758
2759     feedback_fn("* Verifying configuration file consistency")
2760
2761     # If not all nodes are being checked, we need to make sure the master node
2762     # and a non-checked vm_capable node are in the list.
2763     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2764     if absent_nodes:
2765       vf_nvinfo = all_nvinfo.copy()
2766       vf_node_info = list(self.my_node_info.values())
2767       additional_nodes = []
2768       if master_node not in self.my_node_info:
2769         additional_nodes.append(master_node)
2770         vf_node_info.append(self.all_node_info[master_node])
2771       # Add the first vm_capable node we find which is not included
2772       for node in absent_nodes:
2773         nodeinfo = self.all_node_info[node]
2774         if nodeinfo.vm_capable and not nodeinfo.offline:
2775           additional_nodes.append(node)
2776           vf_node_info.append(self.all_node_info[node])
2777           break
2778       key = constants.NV_FILELIST
2779       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2780                                                  {key: node_verify_param[key]},
2781                                                  self.cfg.GetClusterName()))
2782     else:
2783       vf_nvinfo = all_nvinfo
2784       vf_node_info = self.my_node_info.values()
2785
2786     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2787
2788     feedback_fn("* Verifying node status")
2789
2790     refos_img = None
2791
2792     for node_i in node_data_list:
2793       node = node_i.name
2794       nimg = node_image[node]
2795
2796       if node_i.offline:
2797         if verbose:
2798           feedback_fn("* Skipping offline node %s" % (node,))
2799         n_offline += 1
2800         continue
2801
2802       if node == master_node:
2803         ntype = "master"
2804       elif node_i.master_candidate:
2805         ntype = "master candidate"
2806       elif node_i.drained:
2807         ntype = "drained"
2808         n_drained += 1
2809       else:
2810         ntype = "regular"
2811       if verbose:
2812         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2813
2814       msg = all_nvinfo[node].fail_msg
2815       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2816                msg)
2817       if msg:
2818         nimg.rpc_fail = True
2819         continue
2820
2821       nresult = all_nvinfo[node].payload
2822
2823       nimg.call_ok = self._VerifyNode(node_i, nresult)
2824       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2825       self._VerifyNodeNetwork(node_i, nresult)
2826       self._VerifyOob(node_i, nresult)
2827
2828       if nimg.vm_capable:
2829         self._VerifyNodeLVM(node_i, nresult, vg_name)
2830         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2831                              all_drbd_map)
2832
2833         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2834         self._UpdateNodeInstances(node_i, nresult, nimg)
2835         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2836         self._UpdateNodeOS(node_i, nresult, nimg)
2837
2838         if not nimg.os_fail:
2839           if refos_img is None:
2840             refos_img = nimg
2841           self._VerifyNodeOS(node_i, nimg, refos_img)
2842         self._VerifyNodeBridges(node_i, nresult, bridges)
2843
2844         # Check whether all running instancies are primary for the node. (This
2845         # can no longer be done from _VerifyInstance below, since some of the
2846         # wrong instances could be from other node groups.)
2847         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2848
2849         for inst in non_primary_inst:
2850           test = inst in self.all_inst_info
2851           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2852                    "instance should not run on node %s", node_i.name)
2853           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2854                    "node is running unknown instance %s", inst)
2855
2856     for node, result in extra_lv_nvinfo.items():
2857       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2858                               node_image[node], vg_name)
2859
2860     feedback_fn("* Verifying instance status")
2861     for instance in self.my_inst_names:
2862       if verbose:
2863         feedback_fn("* Verifying instance %s" % instance)
2864       inst_config = self.my_inst_info[instance]
2865       self._VerifyInstance(instance, inst_config, node_image,
2866                            instdisk[instance])
2867       inst_nodes_offline = []
2868
2869       pnode = inst_config.primary_node
2870       pnode_img = node_image[pnode]
2871       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2872                constants.CV_ENODERPC, pnode, "instance %s, connection to"
2873                " primary node failed", instance)
2874
2875       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2876                constants.CV_EINSTANCEBADNODE, instance,
2877                "instance is marked as running and lives on offline node %s",
2878                inst_config.primary_node)
2879
2880       # If the instance is non-redundant we cannot survive losing its primary
2881       # node, so we are not N+1 compliant. On the other hand we have no disk
2882       # templates with more than one secondary so that situation is not well
2883       # supported either.
2884       # FIXME: does not support file-backed instances
2885       if not inst_config.secondary_nodes:
2886         i_non_redundant.append(instance)
2887
2888       _ErrorIf(len(inst_config.secondary_nodes) > 1,
2889                constants.CV_EINSTANCELAYOUT,
2890                instance, "instance has multiple secondary nodes: %s",
2891                utils.CommaJoin(inst_config.secondary_nodes),
2892                code=self.ETYPE_WARNING)
2893
2894       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2895         pnode = inst_config.primary_node
2896         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2897         instance_groups = {}
2898
2899         for node in instance_nodes:
2900           instance_groups.setdefault(self.all_node_info[node].group,
2901                                      []).append(node)
2902
2903         pretty_list = [
2904           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2905           # Sort so that we always list the primary node first.
2906           for group, nodes in sorted(instance_groups.items(),
2907                                      key=lambda (_, nodes): pnode in nodes,
2908                                      reverse=True)]
2909
2910         self._ErrorIf(len(instance_groups) > 1,
2911                       constants.CV_EINSTANCESPLITGROUPS,
2912                       instance, "instance has primary and secondary nodes in"
2913                       " different groups: %s", utils.CommaJoin(pretty_list),
2914                       code=self.ETYPE_WARNING)
2915
2916       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2917         i_non_a_balanced.append(instance)
2918
2919       for snode in inst_config.secondary_nodes:
2920         s_img = node_image[snode]
2921         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2922                  snode, "instance %s, connection to secondary node failed",
2923                  instance)
2924
2925         if s_img.offline:
2926           inst_nodes_offline.append(snode)
2927
2928       # warn that the instance lives on offline nodes
2929       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2930                "instance has offline secondary node(s) %s",
2931                utils.CommaJoin(inst_nodes_offline))
2932       # ... or ghost/non-vm_capable nodes
2933       for node in inst_config.all_nodes:
2934         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2935                  instance, "instance lives on ghost node %s", node)
2936         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2937                  instance, "instance lives on non-vm_capable node %s", node)
2938
2939     feedback_fn("* Verifying orphan volumes")
2940     reserved = utils.FieldSet(*cluster.reserved_lvs)
2941
2942     # We will get spurious "unknown volume" warnings if any node of this group
2943     # is secondary for an instance whose primary is in another group. To avoid
2944     # them, we find these instances and add their volumes to node_vol_should.
2945     for inst in self.all_inst_info.values():
2946       for secondary in inst.secondary_nodes:
2947         if (secondary in self.my_node_info
2948             and inst.name not in self.my_inst_info):
2949           inst.MapLVsByNode(node_vol_should)
2950           break
2951
2952     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2953
2954     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2955       feedback_fn("* Verifying N+1 Memory redundancy")
2956       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2957
2958     feedback_fn("* Other Notes")
2959     if i_non_redundant:
2960       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2961                   % len(i_non_redundant))
2962
2963     if i_non_a_balanced:
2964       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2965                   % len(i_non_a_balanced))
2966
2967     if n_offline:
2968       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2969
2970     if n_drained:
2971       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2972
2973     return not self.bad
2974
2975   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2976     """Analyze the post-hooks' result
2977
2978     This method analyses the hook result, handles it, and sends some
2979     nicely-formatted feedback back to the user.
2980
2981     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2982         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2983     @param hooks_results: the results of the multi-node hooks rpc call
2984     @param feedback_fn: function used send feedback back to the caller
2985     @param lu_result: previous Exec result
2986     @return: the new Exec result, based on the previous result
2987         and hook results
2988
2989     """
2990     # We only really run POST phase hooks, only for non-empty groups,
2991     # and are only interested in their results
2992     if not self.my_node_names:
2993       # empty node group
2994       pass
2995     elif phase == constants.HOOKS_PHASE_POST:
2996       # Used to change hooks' output to proper indentation
2997       feedback_fn("* Hooks Results")
2998       assert hooks_results, "invalid result from hooks"
2999
3000       for node_name in hooks_results:
3001         res = hooks_results[node_name]
3002         msg = res.fail_msg
3003         test = msg and not res.offline
3004         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3005                       "Communication failure in hooks execution: %s", msg)
3006         if res.offline or msg:
3007           # No need to investigate payload if node is offline or gave
3008           # an error.
3009           continue
3010         for script, hkr, output in res.payload:
3011           test = hkr == constants.HKR_FAIL
3012           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3013                         "Script %s failed, output:", script)
3014           if test:
3015             output = self._HOOKS_INDENT_RE.sub("      ", output)
3016             feedback_fn("%s" % output)
3017             lu_result = False
3018
3019     return lu_result
3020
3021
3022 class LUClusterVerifyDisks(NoHooksLU):
3023   """Verifies the cluster disks status.
3024
3025   """
3026   REQ_BGL = False
3027
3028   def ExpandNames(self):
3029     self.share_locks = _ShareAll()
3030     self.needed_locks = {
3031       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3032       }
3033
3034   def Exec(self, feedback_fn):
3035     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3036
3037     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3038     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3039                            for group in group_names])
3040
3041
3042 class LUGroupVerifyDisks(NoHooksLU):
3043   """Verifies the status of all disks in a node group.
3044
3045   """
3046   REQ_BGL = False
3047
3048   def ExpandNames(self):
3049     # Raises errors.OpPrereqError on its own if group can't be found
3050     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3051
3052     self.share_locks = _ShareAll()
3053     self.needed_locks = {
3054       locking.LEVEL_INSTANCE: [],
3055       locking.LEVEL_NODEGROUP: [],
3056       locking.LEVEL_NODE: [],
3057       }
3058
3059   def DeclareLocks(self, level):
3060     if level == locking.LEVEL_INSTANCE:
3061       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3062
3063       # Lock instances optimistically, needs verification once node and group
3064       # locks have been acquired
3065       self.needed_locks[locking.LEVEL_INSTANCE] = \
3066         self.cfg.GetNodeGroupInstances(self.group_uuid)
3067
3068     elif level == locking.LEVEL_NODEGROUP:
3069       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3070
3071       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3072         set([self.group_uuid] +
3073             # Lock all groups used by instances optimistically; this requires
3074             # going via the node before it's locked, requiring verification
3075             # later on
3076             [group_uuid
3077              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3078              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3079
3080     elif level == locking.LEVEL_NODE:
3081       # This will only lock the nodes in the group to be verified which contain
3082       # actual instances
3083       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3084       self._LockInstancesNodes()
3085
3086       # Lock all nodes in group to be verified
3087       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3088       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3089       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3090
3091   def CheckPrereq(self):
3092     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3093     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3094     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3095
3096     assert self.group_uuid in owned_groups
3097
3098     # Check if locked instances are still correct
3099     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3100
3101     # Get instance information
3102     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3103
3104     # Check if node groups for locked instances are still correct
3105     for (instance_name, inst) in self.instances.items():
3106       assert owned_nodes.issuperset(inst.all_nodes), \
3107         "Instance %s's nodes changed while we kept the lock" % instance_name
3108
3109       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3110                                              owned_groups)
3111
3112       assert self.group_uuid in inst_groups, \
3113         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3114
3115   def Exec(self, feedback_fn):
3116     """Verify integrity of cluster disks.
3117
3118     @rtype: tuple of three items
3119     @return: a tuple of (dict of node-to-node_error, list of instances
3120         which need activate-disks, dict of instance: (node, volume) for
3121         missing volumes
3122
3123     """
3124     res_nodes = {}
3125     res_instances = set()
3126     res_missing = {}
3127
3128     nv_dict = _MapInstanceDisksToNodes([inst
3129                                         for inst in self.instances.values()
3130                                         if inst.admin_up])
3131
3132     if nv_dict:
3133       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3134                              set(self.cfg.GetVmCapableNodeList()))
3135
3136       node_lvs = self.rpc.call_lv_list(nodes, [])
3137
3138       for (node, node_res) in node_lvs.items():
3139         if node_res.offline:
3140           continue
3141
3142         msg = node_res.fail_msg
3143         if msg:
3144           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3145           res_nodes[node] = msg
3146           continue
3147
3148         for lv_name, (_, _, lv_online) in node_res.payload.items():
3149           inst = nv_dict.pop((node, lv_name), None)
3150           if not (lv_online or inst is None):
3151             res_instances.add(inst)
3152
3153       # any leftover items in nv_dict are missing LVs, let's arrange the data
3154       # better
3155       for key, inst in nv_dict.iteritems():
3156         res_missing.setdefault(inst, []).append(key)
3157
3158     return (res_nodes, list(res_instances), res_missing)
3159
3160
3161 class LUClusterRepairDiskSizes(NoHooksLU):
3162   """Verifies the cluster disks sizes.
3163
3164   """
3165   REQ_BGL = False
3166
3167   def ExpandNames(self):
3168     if self.op.instances:
3169       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3170       self.needed_locks = {
3171         locking.LEVEL_NODE: [],
3172         locking.LEVEL_INSTANCE: self.wanted_names,
3173         }
3174       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3175     else:
3176       self.wanted_names = None
3177       self.needed_locks = {
3178         locking.LEVEL_NODE: locking.ALL_SET,
3179         locking.LEVEL_INSTANCE: locking.ALL_SET,
3180         }
3181     self.share_locks = _ShareAll()
3182
3183   def DeclareLocks(self, level):
3184     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3185       self._LockInstancesNodes(primary_only=True)
3186
3187   def CheckPrereq(self):
3188     """Check prerequisites.
3189
3190     This only checks the optional instance list against the existing names.
3191
3192     """
3193     if self.wanted_names is None:
3194       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3195
3196     self.wanted_instances = \
3197         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3198
3199   def _EnsureChildSizes(self, disk):
3200     """Ensure children of the disk have the needed disk size.
3201
3202     This is valid mainly for DRBD8 and fixes an issue where the
3203     children have smaller disk size.
3204
3205     @param disk: an L{ganeti.objects.Disk} object
3206
3207     """
3208     if disk.dev_type == constants.LD_DRBD8:
3209       assert disk.children, "Empty children for DRBD8?"
3210       fchild = disk.children[0]
3211       mismatch = fchild.size < disk.size
3212       if mismatch:
3213         self.LogInfo("Child disk has size %d, parent %d, fixing",
3214                      fchild.size, disk.size)
3215         fchild.size = disk.size
3216
3217       # and we recurse on this child only, not on the metadev
3218       return self._EnsureChildSizes(fchild) or mismatch
3219     else:
3220       return False
3221
3222   def Exec(self, feedback_fn):
3223     """Verify the size of cluster disks.
3224
3225     """
3226     # TODO: check child disks too
3227     # TODO: check differences in size between primary/secondary nodes
3228     per_node_disks = {}
3229     for instance in self.wanted_instances:
3230       pnode = instance.primary_node
3231       if pnode not in per_node_disks:
3232         per_node_disks[pnode] = []
3233       for idx, disk in enumerate(instance.disks):
3234         per_node_disks[pnode].append((instance, idx, disk))
3235
3236     changed = []
3237     for node, dskl in per_node_disks.items():
3238       newl = [v[2].Copy() for v in dskl]
3239       for dsk in newl:
3240         self.cfg.SetDiskID(dsk, node)
3241       result = self.rpc.call_blockdev_getsize(node, newl)
3242       if result.fail_msg:
3243         self.LogWarning("Failure in blockdev_getsize call to node"
3244                         " %s, ignoring", node)
3245         continue
3246       if len(result.payload) != len(dskl):
3247         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3248                         " result.payload=%s", node, len(dskl), result.payload)
3249         self.LogWarning("Invalid result from node %s, ignoring node results",
3250                         node)
3251         continue
3252       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3253         if size is None:
3254           self.LogWarning("Disk %d of instance %s did not return size"
3255                           " information, ignoring", idx, instance.name)
3256           continue
3257         if not isinstance(size, (int, long)):
3258           self.LogWarning("Disk %d of instance %s did not return valid"
3259                           " size information, ignoring", idx, instance.name)
3260           continue
3261         size = size >> 20
3262         if size != disk.size:
3263           self.LogInfo("Disk %d of instance %s has mismatched size,"
3264                        " correcting: recorded %d, actual %d", idx,
3265                        instance.name, disk.size, size)
3266           disk.size = size
3267           self.cfg.Update(instance, feedback_fn)
3268           changed.append((instance.name, idx, size))
3269         if self._EnsureChildSizes(disk):
3270           self.cfg.Update(instance, feedback_fn)
3271           changed.append((instance.name, idx, disk.size))
3272     return changed
3273
3274
3275 class LUClusterRename(LogicalUnit):
3276   """Rename the cluster.
3277
3278   """
3279   HPATH = "cluster-rename"
3280   HTYPE = constants.HTYPE_CLUSTER
3281
3282   def BuildHooksEnv(self):
3283     """Build hooks env.
3284
3285     """
3286     return {
3287       "OP_TARGET": self.cfg.GetClusterName(),
3288       "NEW_NAME": self.op.name,
3289       }
3290
3291   def BuildHooksNodes(self):
3292     """Build hooks nodes.
3293
3294     """
3295     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3296
3297   def CheckPrereq(self):
3298     """Verify that the passed name is a valid one.
3299
3300     """
3301     hostname = netutils.GetHostname(name=self.op.name,
3302                                     family=self.cfg.GetPrimaryIPFamily())
3303
3304     new_name = hostname.name
3305     self.ip = new_ip = hostname.ip
3306     old_name = self.cfg.GetClusterName()
3307     old_ip = self.cfg.GetMasterIP()
3308     if new_name == old_name and new_ip == old_ip:
3309       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3310                                  " cluster has changed",
3311                                  errors.ECODE_INVAL)
3312     if new_ip != old_ip:
3313       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3314         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3315                                    " reachable on the network" %
3316                                    new_ip, errors.ECODE_NOTUNIQUE)
3317
3318     self.op.name = new_name
3319
3320   def Exec(self, feedback_fn):
3321     """Rename the cluster.
3322
3323     """
3324     clustername = self.op.name
3325     ip = self.ip
3326
3327     # shutdown the master IP
3328     master = self.cfg.GetMasterNode()
3329     result = self.rpc.call_node_deactivate_master_ip(master)
3330     result.Raise("Could not disable the master role")
3331
3332     try:
3333       cluster = self.cfg.GetClusterInfo()
3334       cluster.cluster_name = clustername
3335       cluster.master_ip = ip
3336       self.cfg.Update(cluster, feedback_fn)
3337
3338       # update the known hosts file
3339       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3340       node_list = self.cfg.GetOnlineNodeList()
3341       try:
3342         node_list.remove(master)
3343       except ValueError:
3344         pass
3345       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3346     finally:
3347       result = self.rpc.call_node_activate_master_ip(master)
3348       msg = result.fail_msg
3349       if msg:
3350         self.LogWarning("Could not re-enable the master role on"
3351                         " the master, please restart manually: %s", msg)
3352
3353     return clustername
3354
3355
3356 def _ValidateNetmask(cfg, netmask):
3357   """Checks if a netmask is valid.
3358
3359   @type cfg: L{config.ConfigWriter}
3360   @param cfg: The cluster configuration
3361   @type netmask: int
3362   @param netmask: the netmask to be verified
3363   @raise errors.OpPrereqError: if the validation fails
3364
3365   """
3366   ip_family = cfg.GetPrimaryIPFamily()
3367   try:
3368     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3369   except errors.ProgrammerError:
3370     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3371                                ip_family)
3372   if not ipcls.ValidateNetmask(netmask):
3373     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3374                                 (netmask))
3375
3376
3377 class LUClusterSetParams(LogicalUnit):
3378   """Change the parameters of the cluster.
3379
3380   """
3381   HPATH = "cluster-modify"
3382   HTYPE = constants.HTYPE_CLUSTER
3383   REQ_BGL = False
3384
3385   def CheckArguments(self):
3386     """Check parameters
3387
3388     """
3389     if self.op.uid_pool:
3390       uidpool.CheckUidPool(self.op.uid_pool)
3391
3392     if self.op.add_uids:
3393       uidpool.CheckUidPool(self.op.add_uids)
3394
3395     if self.op.remove_uids:
3396       uidpool.CheckUidPool(self.op.remove_uids)
3397
3398     if self.op.master_netmask is not None:
3399       _ValidateNetmask(self.cfg, self.op.master_netmask)
3400
3401   def ExpandNames(self):
3402     # FIXME: in the future maybe other cluster params won't require checking on
3403     # all nodes to be modified.
3404     self.needed_locks = {
3405       locking.LEVEL_NODE: locking.ALL_SET,
3406     }
3407     self.share_locks[locking.LEVEL_NODE] = 1
3408
3409   def BuildHooksEnv(self):
3410     """Build hooks env.
3411
3412     """
3413     return {
3414       "OP_TARGET": self.cfg.GetClusterName(),
3415       "NEW_VG_NAME": self.op.vg_name,
3416       }
3417
3418   def BuildHooksNodes(self):
3419     """Build hooks nodes.
3420
3421     """
3422     mn = self.cfg.GetMasterNode()
3423     return ([mn], [mn])
3424
3425   def CheckPrereq(self):
3426     """Check prerequisites.
3427
3428     This checks whether the given params don't conflict and
3429     if the given volume group is valid.
3430
3431     """
3432     if self.op.vg_name is not None and not self.op.vg_name:
3433       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3434         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3435                                    " instances exist", errors.ECODE_INVAL)
3436
3437     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3438       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3439         raise errors.OpPrereqError("Cannot disable drbd helper while"
3440                                    " drbd-based instances exist",
3441                                    errors.ECODE_INVAL)
3442
3443     node_list = self.owned_locks(locking.LEVEL_NODE)
3444
3445     # if vg_name not None, checks given volume group on all nodes
3446     if self.op.vg_name:
3447       vglist = self.rpc.call_vg_list(node_list)
3448       for node in node_list:
3449         msg = vglist[node].fail_msg
3450         if msg:
3451           # ignoring down node
3452           self.LogWarning("Error while gathering data on node %s"
3453                           " (ignoring node): %s", node, msg)
3454           continue
3455         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3456                                               self.op.vg_name,
3457                                               constants.MIN_VG_SIZE)
3458         if vgstatus:
3459           raise errors.OpPrereqError("Error on node '%s': %s" %
3460                                      (node, vgstatus), errors.ECODE_ENVIRON)
3461
3462     if self.op.drbd_helper:
3463       # checks given drbd helper on all nodes
3464       helpers = self.rpc.call_drbd_helper(node_list)
3465       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3466         if ninfo.offline:
3467           self.LogInfo("Not checking drbd helper on offline node %s", node)
3468           continue
3469         msg = helpers[node].fail_msg
3470         if msg:
3471           raise errors.OpPrereqError("Error checking drbd helper on node"
3472                                      " '%s': %s" % (node, msg),
3473                                      errors.ECODE_ENVIRON)
3474         node_helper = helpers[node].payload
3475         if node_helper != self.op.drbd_helper:
3476           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3477                                      (node, node_helper), errors.ECODE_ENVIRON)
3478
3479     self.cluster = cluster = self.cfg.GetClusterInfo()
3480     # validate params changes
3481     if self.op.beparams:
3482       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3483       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3484
3485     if self.op.ndparams:
3486       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3487       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3488
3489       # TODO: we need a more general way to handle resetting
3490       # cluster-level parameters to default values
3491       if self.new_ndparams["oob_program"] == "":
3492         self.new_ndparams["oob_program"] = \
3493             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3494
3495     if self.op.nicparams:
3496       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3497       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3498       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3499       nic_errors = []
3500
3501       # check all instances for consistency
3502       for instance in self.cfg.GetAllInstancesInfo().values():
3503         for nic_idx, nic in enumerate(instance.nics):
3504           params_copy = copy.deepcopy(nic.nicparams)
3505           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3506
3507           # check parameter syntax
3508           try:
3509             objects.NIC.CheckParameterSyntax(params_filled)
3510           except errors.ConfigurationError, err:
3511             nic_errors.append("Instance %s, nic/%d: %s" %
3512                               (instance.name, nic_idx, err))
3513
3514           # if we're moving instances to routed, check that they have an ip
3515           target_mode = params_filled[constants.NIC_MODE]
3516           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3517             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3518                               " address" % (instance.name, nic_idx))
3519       if nic_errors:
3520         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3521                                    "\n".join(nic_errors))
3522
3523     # hypervisor list/parameters
3524     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3525     if self.op.hvparams:
3526       for hv_name, hv_dict in self.op.hvparams.items():
3527         if hv_name not in self.new_hvparams:
3528           self.new_hvparams[hv_name] = hv_dict
3529         else:
3530           self.new_hvparams[hv_name].update(hv_dict)
3531
3532     # os hypervisor parameters
3533     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3534     if self.op.os_hvp:
3535       for os_name, hvs in self.op.os_hvp.items():
3536         if os_name not in self.new_os_hvp:
3537           self.new_os_hvp[os_name] = hvs
3538         else:
3539           for hv_name, hv_dict in hvs.items():
3540             if hv_name not in self.new_os_hvp[os_name]:
3541               self.new_os_hvp[os_name][hv_name] = hv_dict
3542             else:
3543               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3544
3545     # os parameters
3546     self.new_osp = objects.FillDict(cluster.osparams, {})
3547     if self.op.osparams:
3548       for os_name, osp in self.op.osparams.items():
3549         if os_name not in self.new_osp:
3550           self.new_osp[os_name] = {}
3551
3552         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3553                                                   use_none=True)
3554
3555         if not self.new_osp[os_name]:
3556           # we removed all parameters
3557           del self.new_osp[os_name]
3558         else:
3559           # check the parameter validity (remote check)
3560           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3561                          os_name, self.new_osp[os_name])
3562
3563     # changes to the hypervisor list
3564     if self.op.enabled_hypervisors is not None:
3565       self.hv_list = self.op.enabled_hypervisors
3566       for hv in self.hv_list:
3567         # if the hypervisor doesn't already exist in the cluster
3568         # hvparams, we initialize it to empty, and then (in both
3569         # cases) we make sure to fill the defaults, as we might not
3570         # have a complete defaults list if the hypervisor wasn't
3571         # enabled before
3572         if hv not in new_hvp:
3573           new_hvp[hv] = {}
3574         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3575         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3576     else:
3577       self.hv_list = cluster.enabled_hypervisors
3578
3579     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3580       # either the enabled list has changed, or the parameters have, validate
3581       for hv_name, hv_params in self.new_hvparams.items():
3582         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3583             (self.op.enabled_hypervisors and
3584              hv_name in self.op.enabled_hypervisors)):
3585           # either this is a new hypervisor, or its parameters have changed
3586           hv_class = hypervisor.GetHypervisor(hv_name)
3587           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3588           hv_class.CheckParameterSyntax(hv_params)
3589           _CheckHVParams(self, node_list, hv_name, hv_params)
3590
3591     if self.op.os_hvp:
3592       # no need to check any newly-enabled hypervisors, since the
3593       # defaults have already been checked in the above code-block
3594       for os_name, os_hvp in self.new_os_hvp.items():
3595         for hv_name, hv_params in os_hvp.items():
3596           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3597           # we need to fill in the new os_hvp on top of the actual hv_p
3598           cluster_defaults = self.new_hvparams.get(hv_name, {})
3599           new_osp = objects.FillDict(cluster_defaults, hv_params)
3600           hv_class = hypervisor.GetHypervisor(hv_name)
3601           hv_class.CheckParameterSyntax(new_osp)
3602           _CheckHVParams(self, node_list, hv_name, new_osp)
3603
3604     if self.op.default_iallocator:
3605       alloc_script = utils.FindFile(self.op.default_iallocator,
3606                                     constants.IALLOCATOR_SEARCH_PATH,
3607                                     os.path.isfile)
3608       if alloc_script is None:
3609         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3610                                    " specified" % self.op.default_iallocator,
3611                                    errors.ECODE_INVAL)
3612
3613   def Exec(self, feedback_fn):
3614     """Change the parameters of the cluster.
3615
3616     """
3617     if self.op.vg_name is not None:
3618       new_volume = self.op.vg_name
3619       if not new_volume:
3620         new_volume = None
3621       if new_volume != self.cfg.GetVGName():
3622         self.cfg.SetVGName(new_volume)
3623       else:
3624         feedback_fn("Cluster LVM configuration already in desired"
3625                     " state, not changing")
3626     if self.op.drbd_helper is not None:
3627       new_helper = self.op.drbd_helper
3628       if not new_helper:
3629         new_helper = None
3630       if new_helper != self.cfg.GetDRBDHelper():
3631         self.cfg.SetDRBDHelper(new_helper)
3632       else:
3633         feedback_fn("Cluster DRBD helper already in desired state,"
3634                     " not changing")
3635     if self.op.hvparams:
3636       self.cluster.hvparams = self.new_hvparams
3637     if self.op.os_hvp:
3638       self.cluster.os_hvp = self.new_os_hvp
3639     if self.op.enabled_hypervisors is not None:
3640       self.cluster.hvparams = self.new_hvparams
3641       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3642     if self.op.beparams:
3643       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3644     if self.op.nicparams:
3645       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3646     if self.op.osparams:
3647       self.cluster.osparams = self.new_osp
3648     if self.op.ndparams:
3649       self.cluster.ndparams = self.new_ndparams
3650
3651     if self.op.candidate_pool_size is not None:
3652       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3653       # we need to update the pool size here, otherwise the save will fail
3654       _AdjustCandidatePool(self, [])
3655
3656     if self.op.maintain_node_health is not None:
3657       self.cluster.maintain_node_health = self.op.maintain_node_health
3658
3659     if self.op.prealloc_wipe_disks is not None:
3660       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3661
3662     if self.op.add_uids is not None:
3663       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3664
3665     if self.op.remove_uids is not None:
3666       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3667
3668     if self.op.uid_pool is not None:
3669       self.cluster.uid_pool = self.op.uid_pool
3670
3671     if self.op.default_iallocator is not None:
3672       self.cluster.default_iallocator = self.op.default_iallocator
3673
3674     if self.op.reserved_lvs is not None:
3675       self.cluster.reserved_lvs = self.op.reserved_lvs
3676
3677     def helper_os(aname, mods, desc):
3678       desc += " OS list"
3679       lst = getattr(self.cluster, aname)
3680       for key, val in mods:
3681         if key == constants.DDM_ADD:
3682           if val in lst:
3683             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3684           else:
3685             lst.append(val)
3686         elif key == constants.DDM_REMOVE:
3687           if val in lst:
3688             lst.remove(val)
3689           else:
3690             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3691         else:
3692           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3693
3694     if self.op.hidden_os:
3695       helper_os("hidden_os", self.op.hidden_os, "hidden")
3696
3697     if self.op.blacklisted_os:
3698       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3699
3700     if self.op.master_netdev:
3701       master = self.cfg.GetMasterNode()
3702       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3703                   self.cluster.master_netdev)
3704       result = self.rpc.call_node_deactivate_master_ip(master)
3705       result.Raise("Could not disable the master ip")
3706       feedback_fn("Changing master_netdev from %s to %s" %
3707                   (self.cluster.master_netdev, self.op.master_netdev))
3708       self.cluster.master_netdev = self.op.master_netdev
3709
3710     if self.op.master_netmask:
3711       master = self.cfg.GetMasterNode()
3712       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3713       result = self.rpc.call_node_change_master_netmask(master,
3714                                                         self.op.master_netmask)
3715       if result.fail_msg:
3716         msg = "Could not change the master IP netmask: %s" % result.fail_msg
3717         self.LogWarning(msg)
3718         feedback_fn(msg)
3719       else:
3720         self.cluster.master_netmask = self.op.master_netmask
3721
3722     self.cfg.Update(self.cluster, feedback_fn)
3723
3724     if self.op.master_netdev:
3725       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3726                   self.op.master_netdev)
3727       result = self.rpc.call_node_activate_master_ip(master)
3728       if result.fail_msg:
3729         self.LogWarning("Could not re-enable the master ip on"
3730                         " the master, please restart manually: %s",
3731                         result.fail_msg)
3732
3733
3734 def _UploadHelper(lu, nodes, fname):
3735   """Helper for uploading a file and showing warnings.
3736
3737   """
3738   if os.path.exists(fname):
3739     result = lu.rpc.call_upload_file(nodes, fname)
3740     for to_node, to_result in result.items():
3741       msg = to_result.fail_msg
3742       if msg:
3743         msg = ("Copy of file %s to node %s failed: %s" %
3744                (fname, to_node, msg))
3745         lu.proc.LogWarning(msg)
3746
3747
3748 def _ComputeAncillaryFiles(cluster, redist):
3749   """Compute files external to Ganeti which need to be consistent.
3750
3751   @type redist: boolean
3752   @param redist: Whether to include files which need to be redistributed
3753
3754   """
3755   # Compute files for all nodes
3756   files_all = set([
3757     constants.SSH_KNOWN_HOSTS_FILE,
3758     constants.CONFD_HMAC_KEY,
3759     constants.CLUSTER_DOMAIN_SECRET_FILE,
3760     constants.SPICE_CERT_FILE,
3761     constants.SPICE_CACERT_FILE,
3762     constants.RAPI_USERS_FILE,
3763     ])
3764
3765   if not redist:
3766     files_all.update(constants.ALL_CERT_FILES)
3767     files_all.update(ssconf.SimpleStore().GetFileList())
3768   else:
3769     # we need to ship at least the RAPI certificate
3770     files_all.add(constants.RAPI_CERT_FILE)
3771
3772   if cluster.modify_etc_hosts:
3773     files_all.add(constants.ETC_HOSTS)
3774
3775   # Files which are optional, these must:
3776   # - be present in one other category as well
3777   # - either exist or not exist on all nodes of that category (mc, vm all)
3778   files_opt = set([
3779     constants.RAPI_USERS_FILE,
3780     ])
3781
3782   # Files which should only be on master candidates
3783   files_mc = set()
3784   if not redist:
3785     files_mc.add(constants.CLUSTER_CONF_FILE)
3786
3787   # Files which should only be on VM-capable nodes
3788   files_vm = set(filename
3789     for hv_name in cluster.enabled_hypervisors
3790     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3791
3792   files_opt |= set(filename
3793     for hv_name in cluster.enabled_hypervisors
3794     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3795
3796   # Filenames in each category must be unique
3797   all_files_set = files_all | files_mc | files_vm
3798   assert (len(all_files_set) ==
3799           sum(map(len, [files_all, files_mc, files_vm]))), \
3800          "Found file listed in more than one file list"
3801
3802   # Optional files must be present in one other category
3803   assert all_files_set.issuperset(files_opt), \
3804          "Optional file not in a different required list"
3805
3806   return (files_all, files_opt, files_mc, files_vm)
3807
3808
3809 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3810   """Distribute additional files which are part of the cluster configuration.
3811
3812   ConfigWriter takes care of distributing the config and ssconf files, but
3813   there are more files which should be distributed to all nodes. This function
3814   makes sure those are copied.
3815
3816   @param lu: calling logical unit
3817   @param additional_nodes: list of nodes not in the config to distribute to
3818   @type additional_vm: boolean
3819   @param additional_vm: whether the additional nodes are vm-capable or not
3820
3821   """
3822   # Gather target nodes
3823   cluster = lu.cfg.GetClusterInfo()
3824   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3825
3826   online_nodes = lu.cfg.GetOnlineNodeList()
3827   vm_nodes = lu.cfg.GetVmCapableNodeList()
3828
3829   if additional_nodes is not None:
3830     online_nodes.extend(additional_nodes)
3831     if additional_vm:
3832       vm_nodes.extend(additional_nodes)
3833
3834   # Never distribute to master node
3835   for nodelist in [online_nodes, vm_nodes]:
3836     if master_info.name in nodelist:
3837       nodelist.remove(master_info.name)
3838
3839   # Gather file lists
3840   (files_all, _, files_mc, files_vm) = \
3841     _ComputeAncillaryFiles(cluster, True)
3842
3843   # Never re-distribute configuration file from here
3844   assert not (constants.CLUSTER_CONF_FILE in files_all or
3845               constants.CLUSTER_CONF_FILE in files_vm)
3846   assert not files_mc, "Master candidates not handled in this function"
3847
3848   filemap = [
3849     (online_nodes, files_all),
3850     (vm_nodes, files_vm),
3851     ]
3852
3853   # Upload the files
3854   for (node_list, files) in filemap:
3855     for fname in files:
3856       _UploadHelper(lu, node_list, fname)
3857
3858
3859 class LUClusterRedistConf(NoHooksLU):
3860   """Force the redistribution of cluster configuration.
3861
3862   This is a very simple LU.
3863
3864   """
3865   REQ_BGL = False
3866
3867   def ExpandNames(self):
3868     self.needed_locks = {
3869       locking.LEVEL_NODE: locking.ALL_SET,
3870     }
3871     self.share_locks[locking.LEVEL_NODE] = 1
3872
3873   def Exec(self, feedback_fn):
3874     """Redistribute the configuration.
3875
3876     """
3877     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3878     _RedistributeAncillaryFiles(self)
3879
3880
3881 class LUClusterActivateMasterIp(NoHooksLU):
3882   """Activate the master IP on the master node.
3883
3884   """
3885   def Exec(self, feedback_fn):
3886     """Activate the master IP.
3887
3888     """
3889     master = self.cfg.GetMasterNode()
3890     self.rpc.call_node_activate_master_ip(master)
3891
3892
3893 class LUClusterDeactivateMasterIp(NoHooksLU):
3894   """Deactivate the master IP on the master node.
3895
3896   """
3897   def Exec(self, feedback_fn):
3898     """Deactivate the master IP.
3899
3900     """
3901     master = self.cfg.GetMasterNode()
3902     self.rpc.call_node_deactivate_master_ip(master)
3903
3904
3905 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3906   """Sleep and poll for an instance's disk to sync.
3907
3908   """
3909   if not instance.disks or disks is not None and not disks:
3910     return True
3911
3912   disks = _ExpandCheckDisks(instance, disks)
3913
3914   if not oneshot:
3915     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3916
3917   node = instance.primary_node
3918
3919   for dev in disks:
3920     lu.cfg.SetDiskID(dev, node)
3921
3922   # TODO: Convert to utils.Retry
3923
3924   retries = 0
3925   degr_retries = 10 # in seconds, as we sleep 1 second each time
3926   while True:
3927     max_time = 0
3928     done = True
3929     cumul_degraded = False
3930     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3931     msg = rstats.fail_msg
3932     if msg:
3933       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3934       retries += 1
3935       if retries >= 10:
3936         raise errors.RemoteError("Can't contact node %s for mirror data,"
3937                                  " aborting." % node)
3938       time.sleep(6)
3939       continue
3940     rstats = rstats.payload
3941     retries = 0
3942     for i, mstat in enumerate(rstats):
3943       if mstat is None:
3944         lu.LogWarning("Can't compute data for node %s/%s",
3945                            node, disks[i].iv_name)
3946         continue
3947
3948       cumul_degraded = (cumul_degraded or
3949                         (mstat.is_degraded and mstat.sync_percent is None))
3950       if mstat.sync_percent is not None:
3951         done = False
3952         if mstat.estimated_time is not None:
3953           rem_time = ("%s remaining (estimated)" %
3954                       utils.FormatSeconds(mstat.estimated_time))
3955           max_time = mstat.estimated_time
3956         else:
3957           rem_time = "no time estimate"
3958         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3959                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3960
3961     # if we're done but degraded, let's do a few small retries, to
3962     # make sure we see a stable and not transient situation; therefore
3963     # we force restart of the loop
3964     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3965       logging.info("Degraded disks found, %d retries left", degr_retries)
3966       degr_retries -= 1
3967       time.sleep(1)
3968       continue
3969
3970     if done or oneshot:
3971       break
3972
3973     time.sleep(min(60, max_time))
3974
3975   if done:
3976     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3977   return not cumul_degraded
3978
3979
3980 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3981   """Check that mirrors are not degraded.
3982
3983   The ldisk parameter, if True, will change the test from the
3984   is_degraded attribute (which represents overall non-ok status for
3985   the device(s)) to the ldisk (representing the local storage status).
3986
3987   """
3988   lu.cfg.SetDiskID(dev, node)
3989
3990   result = True
3991
3992   if on_primary or dev.AssembleOnSecondary():
3993     rstats = lu.rpc.call_blockdev_find(node, dev)
3994     msg = rstats.fail_msg
3995     if msg:
3996       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3997       result = False
3998     elif not rstats.payload:
3999       lu.LogWarning("Can't find disk on node %s", node)
4000       result = False
4001     else:
4002       if ldisk:
4003         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4004       else:
4005         result = result and not rstats.payload.is_degraded
4006
4007   if dev.children:
4008     for child in dev.children:
4009       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4010
4011   return result
4012
4013
4014 class LUOobCommand(NoHooksLU):
4015   """Logical unit for OOB handling.
4016
4017   """
4018   REG_BGL = False
4019   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4020
4021   def ExpandNames(self):
4022     """Gather locks we need.
4023
4024     """
4025     if self.op.node_names:
4026       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4027       lock_names = self.op.node_names
4028     else:
4029       lock_names = locking.ALL_SET
4030
4031     self.needed_locks = {
4032       locking.LEVEL_NODE: lock_names,
4033       }
4034
4035   def CheckPrereq(self):
4036     """Check prerequisites.
4037
4038     This checks:
4039      - the node exists in the configuration
4040      - OOB is supported
4041
4042     Any errors are signaled by raising errors.OpPrereqError.
4043
4044     """
4045     self.nodes = []
4046     self.master_node = self.cfg.GetMasterNode()
4047
4048     assert self.op.power_delay >= 0.0
4049
4050     if self.op.node_names:
4051       if (self.op.command in self._SKIP_MASTER and
4052           self.master_node in self.op.node_names):
4053         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4054         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4055
4056         if master_oob_handler:
4057           additional_text = ("run '%s %s %s' if you want to operate on the"
4058                              " master regardless") % (master_oob_handler,
4059                                                       self.op.command,
4060                                                       self.master_node)
4061         else:
4062           additional_text = "it does not support out-of-band operations"
4063
4064         raise errors.OpPrereqError(("Operating on the master node %s is not"
4065                                     " allowed for %s; %s") %
4066                                    (self.master_node, self.op.command,
4067                                     additional_text), errors.ECODE_INVAL)
4068     else:
4069       self.op.node_names = self.cfg.GetNodeList()
4070       if self.op.command in self._SKIP_MASTER:
4071         self.op.node_names.remove(self.master_node)
4072
4073     if self.op.command in self._SKIP_MASTER:
4074       assert self.master_node not in self.op.node_names
4075
4076     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4077       if node is None:
4078         raise errors.OpPrereqError("Node %s not found" % node_name,
4079                                    errors.ECODE_NOENT)
4080       else:
4081         self.nodes.append(node)
4082
4083       if (not self.op.ignore_status and
4084           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4085         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4086                                     " not marked offline") % node_name,
4087                                    errors.ECODE_STATE)
4088
4089   def Exec(self, feedback_fn):
4090     """Execute OOB and return result if we expect any.
4091
4092     """
4093     master_node = self.master_node
4094     ret = []
4095
4096     for idx, node in enumerate(utils.NiceSort(self.nodes,
4097                                               key=lambda node: node.name)):
4098       node_entry = [(constants.RS_NORMAL, node.name)]
4099       ret.append(node_entry)
4100
4101       oob_program = _SupportsOob(self.cfg, node)
4102
4103       if not oob_program:
4104         node_entry.append((constants.RS_UNAVAIL, None))
4105         continue
4106
4107       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4108                    self.op.command, oob_program, node.name)
4109       result = self.rpc.call_run_oob(master_node, oob_program,
4110                                      self.op.command, node.name,
4111                                      self.op.timeout)
4112
4113       if result.fail_msg:
4114         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4115                         node.name, result.fail_msg)
4116         node_entry.append((constants.RS_NODATA, None))
4117       else:
4118         try:
4119           self._CheckPayload(result)
4120         except errors.OpExecError, err:
4121           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4122                           node.name, err)
4123           node_entry.append((constants.RS_NODATA, None))
4124         else:
4125           if self.op.command == constants.OOB_HEALTH:
4126             # For health we should log important events
4127             for item, status in result.payload:
4128               if status in [constants.OOB_STATUS_WARNING,
4129                             constants.OOB_STATUS_CRITICAL]:
4130                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4131                                 item, node.name, status)
4132
4133           if self.op.command == constants.OOB_POWER_ON:
4134             node.powered = True
4135           elif self.op.command == constants.OOB_POWER_OFF:
4136             node.powered = False
4137           elif self.op.command == constants.OOB_POWER_STATUS:
4138             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4139             if powered != node.powered:
4140               logging.warning(("Recorded power state (%s) of node '%s' does not"
4141                                " match actual power state (%s)"), node.powered,
4142                               node.name, powered)
4143
4144           # For configuration changing commands we should update the node
4145           if self.op.command in (constants.OOB_POWER_ON,
4146                                  constants.OOB_POWER_OFF):
4147             self.cfg.Update(node, feedback_fn)
4148
4149           node_entry.append((constants.RS_NORMAL, result.payload))
4150
4151           if (self.op.command == constants.OOB_POWER_ON and
4152               idx < len(self.nodes) - 1):
4153             time.sleep(self.op.power_delay)
4154
4155     return ret
4156
4157   def _CheckPayload(self, result):
4158     """Checks if the payload is valid.
4159
4160     @param result: RPC result
4161     @raises errors.OpExecError: If payload is not valid
4162
4163     """
4164     errs = []
4165     if self.op.command == constants.OOB_HEALTH:
4166       if not isinstance(result.payload, list):
4167         errs.append("command 'health' is expected to return a list but got %s" %
4168                     type(result.payload))
4169       else:
4170         for item, status in result.payload:
4171           if status not in constants.OOB_STATUSES:
4172             errs.append("health item '%s' has invalid status '%s'" %
4173                         (item, status))
4174
4175     if self.op.command == constants.OOB_POWER_STATUS:
4176       if not isinstance(result.payload, dict):
4177         errs.append("power-status is expected to return a dict but got %s" %
4178                     type(result.payload))
4179
4180     if self.op.command in [
4181         constants.OOB_POWER_ON,
4182         constants.OOB_POWER_OFF,
4183         constants.OOB_POWER_CYCLE,
4184         ]:
4185       if result.payload is not None:
4186         errs.append("%s is expected to not return payload but got '%s'" %
4187                     (self.op.command, result.payload))
4188
4189     if errs:
4190       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4191                                utils.CommaJoin(errs))
4192
4193
4194 class _OsQuery(_QueryBase):
4195   FIELDS = query.OS_FIELDS
4196
4197   def ExpandNames(self, lu):
4198     # Lock all nodes in shared mode
4199     # Temporary removal of locks, should be reverted later
4200     # TODO: reintroduce locks when they are lighter-weight
4201     lu.needed_locks = {}
4202     #self.share_locks[locking.LEVEL_NODE] = 1
4203     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4204
4205     # The following variables interact with _QueryBase._GetNames
4206     if self.names:
4207       self.wanted = self.names
4208     else:
4209       self.wanted = locking.ALL_SET
4210
4211     self.do_locking = self.use_locking
4212
4213   def DeclareLocks(self, lu, level):
4214     pass
4215
4216   @staticmethod
4217   def _DiagnoseByOS(rlist):
4218     """Remaps a per-node return list into an a per-os per-node dictionary
4219
4220     @param rlist: a map with node names as keys and OS objects as values
4221
4222     @rtype: dict
4223     @return: a dictionary with osnames as keys and as value another
4224         map, with nodes as keys and tuples of (path, status, diagnose,
4225         variants, parameters, api_versions) as values, eg::
4226
4227           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4228                                      (/srv/..., False, "invalid api")],
4229                            "node2": [(/srv/..., True, "", [], [])]}
4230           }
4231
4232     """
4233     all_os = {}
4234     # we build here the list of nodes that didn't fail the RPC (at RPC
4235     # level), so that nodes with a non-responding node daemon don't
4236     # make all OSes invalid
4237     good_nodes = [node_name for node_name in rlist
4238                   if not rlist[node_name].fail_msg]
4239     for node_name, nr in rlist.items():
4240       if nr.fail_msg or not nr.payload:
4241         continue
4242       for (name, path, status, diagnose, variants,
4243            params, api_versions) in nr.payload:
4244         if name not in all_os:
4245           # build a list of nodes for this os containing empty lists
4246           # for each node in node_list
4247           all_os[name] = {}
4248           for nname in good_nodes:
4249             all_os[name][nname] = []
4250         # convert params from [name, help] to (name, help)
4251         params = [tuple(v) for v in params]
4252         all_os[name][node_name].append((path, status, diagnose,
4253                                         variants, params, api_versions))
4254     return all_os
4255
4256   def _GetQueryData(self, lu):
4257     """Computes the list of nodes and their attributes.
4258
4259     """
4260     # Locking is not used
4261     assert not (compat.any(lu.glm.is_owned(level)
4262                            for level in locking.LEVELS
4263                            if level != locking.LEVEL_CLUSTER) or
4264                 self.do_locking or self.use_locking)
4265
4266     valid_nodes = [node.name
4267                    for node in lu.cfg.GetAllNodesInfo().values()
4268                    if not node.offline and node.vm_capable]
4269     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4270     cluster = lu.cfg.GetClusterInfo()
4271
4272     data = {}
4273
4274     for (os_name, os_data) in pol.items():
4275       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4276                           hidden=(os_name in cluster.hidden_os),
4277                           blacklisted=(os_name in cluster.blacklisted_os))
4278
4279       variants = set()
4280       parameters = set()
4281       api_versions = set()
4282
4283       for idx, osl in enumerate(os_data.values()):
4284         info.valid = bool(info.valid and osl and osl[0][1])
4285         if not info.valid:
4286           break
4287
4288         (node_variants, node_params, node_api) = osl[0][3:6]
4289         if idx == 0:
4290           # First entry
4291           variants.update(node_variants)
4292           parameters.update(node_params)
4293           api_versions.update(node_api)
4294         else:
4295           # Filter out inconsistent values
4296           variants.intersection_update(node_variants)
4297           parameters.intersection_update(node_params)
4298           api_versions.intersection_update(node_api)
4299
4300       info.variants = list(variants)
4301       info.parameters = list(parameters)
4302       info.api_versions = list(api_versions)
4303
4304       data[os_name] = info
4305
4306     # Prepare data in requested order
4307     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4308             if name in data]
4309
4310
4311 class LUOsDiagnose(NoHooksLU):
4312   """Logical unit for OS diagnose/query.
4313
4314   """
4315   REQ_BGL = False
4316
4317   @staticmethod
4318   def _BuildFilter(fields, names):
4319     """Builds a filter for querying OSes.
4320
4321     """
4322     name_filter = qlang.MakeSimpleFilter("name", names)
4323
4324     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4325     # respective field is not requested
4326     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4327                      for fname in ["hidden", "blacklisted"]
4328                      if fname not in fields]
4329     if "valid" not in fields:
4330       status_filter.append([qlang.OP_TRUE, "valid"])
4331
4332     if status_filter:
4333       status_filter.insert(0, qlang.OP_AND)
4334     else:
4335       status_filter = None
4336
4337     if name_filter and status_filter:
4338       return [qlang.OP_AND, name_filter, status_filter]
4339     elif name_filter:
4340       return name_filter
4341     else:
4342       return status_filter
4343
4344   def CheckArguments(self):
4345     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4346                        self.op.output_fields, False)
4347
4348   def ExpandNames(self):
4349     self.oq.ExpandNames(self)
4350
4351   def Exec(self, feedback_fn):
4352     return self.oq.OldStyleQuery(self)
4353
4354
4355 class LUNodeRemove(LogicalUnit):
4356   """Logical unit for removing a node.
4357
4358   """
4359   HPATH = "node-remove"
4360   HTYPE = constants.HTYPE_NODE
4361
4362   def BuildHooksEnv(self):
4363     """Build hooks env.
4364
4365     This doesn't run on the target node in the pre phase as a failed
4366     node would then be impossible to remove.
4367
4368     """
4369     return {
4370       "OP_TARGET": self.op.node_name,
4371       "NODE_NAME": self.op.node_name,
4372       }
4373
4374   def BuildHooksNodes(self):
4375     """Build hooks nodes.
4376
4377     """
4378     all_nodes = self.cfg.GetNodeList()
4379     try:
4380       all_nodes.remove(self.op.node_name)
4381     except ValueError:
4382       logging.warning("Node '%s', which is about to be removed, was not found"
4383                       " in the list of all nodes", self.op.node_name)
4384     return (all_nodes, all_nodes)
4385
4386   def CheckPrereq(self):
4387     """Check prerequisites.
4388
4389     This checks:
4390      - the node exists in the configuration
4391      - it does not have primary or secondary instances
4392      - it's not the master
4393
4394     Any errors are signaled by raising errors.OpPrereqError.
4395
4396     """
4397     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4398     node = self.cfg.GetNodeInfo(self.op.node_name)
4399     assert node is not None
4400
4401     masternode = self.cfg.GetMasterNode()
4402     if node.name == masternode:
4403       raise errors.OpPrereqError("Node is the master node, failover to another"
4404                                  " node is required", errors.ECODE_INVAL)
4405
4406     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4407       if node.name in instance.all_nodes:
4408         raise errors.OpPrereqError("Instance %s is still running on the node,"
4409                                    " please remove first" % instance_name,
4410                                    errors.ECODE_INVAL)
4411     self.op.node_name = node.name
4412     self.node = node
4413
4414   def Exec(self, feedback_fn):
4415     """Removes the node from the cluster.
4416
4417     """
4418     node = self.node
4419     logging.info("Stopping the node daemon and removing configs from node %s",
4420                  node.name)
4421
4422     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4423
4424     # Promote nodes to master candidate as needed
4425     _AdjustCandidatePool(self, exceptions=[node.name])
4426     self.context.RemoveNode(node.name)
4427
4428     # Run post hooks on the node before it's removed
4429     _RunPostHook(self, node.name)
4430
4431     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4432     msg = result.fail_msg
4433     if msg:
4434       self.LogWarning("Errors encountered on the remote node while leaving"
4435                       " the cluster: %s", msg)
4436
4437     # Remove node from our /etc/hosts
4438     if self.cfg.GetClusterInfo().modify_etc_hosts:
4439       master_node = self.cfg.GetMasterNode()
4440       result = self.rpc.call_etc_hosts_modify(master_node,
4441                                               constants.ETC_HOSTS_REMOVE,
4442                                               node.name, None)
4443       result.Raise("Can't update hosts file with new host data")
4444       _RedistributeAncillaryFiles(self)
4445
4446
4447 class _NodeQuery(_QueryBase):
4448   FIELDS = query.NODE_FIELDS
4449
4450   def ExpandNames(self, lu):
4451     lu.needed_locks = {}
4452     lu.share_locks = _ShareAll()
4453
4454     if self.names:
4455       self.wanted = _GetWantedNodes(lu, self.names)
4456     else:
4457       self.wanted = locking.ALL_SET
4458
4459     self.do_locking = (self.use_locking and
4460                        query.NQ_LIVE in self.requested_data)
4461
4462     if self.do_locking:
4463       # If any non-static field is requested we need to lock the nodes
4464       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4465
4466   def DeclareLocks(self, lu, level):
4467     pass
4468
4469   def _GetQueryData(self, lu):
4470     """Computes the list of nodes and their attributes.
4471
4472     """
4473     all_info = lu.cfg.GetAllNodesInfo()
4474
4475     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4476
4477     # Gather data as requested
4478     if query.NQ_LIVE in self.requested_data:
4479       # filter out non-vm_capable nodes
4480       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4481
4482       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4483                                         lu.cfg.GetHypervisorType())
4484       live_data = dict((name, nresult.payload)
4485                        for (name, nresult) in node_data.items()
4486                        if not nresult.fail_msg and nresult.payload)
4487     else:
4488       live_data = None
4489
4490     if query.NQ_INST in self.requested_data:
4491       node_to_primary = dict([(name, set()) for name in nodenames])
4492       node_to_secondary = dict([(name, set()) for name in nodenames])
4493
4494       inst_data = lu.cfg.GetAllInstancesInfo()
4495
4496       for inst in inst_data.values():
4497         if inst.primary_node in node_to_primary:
4498           node_to_primary[inst.primary_node].add(inst.name)
4499         for secnode in inst.secondary_nodes:
4500           if secnode in node_to_secondary:
4501             node_to_secondary[secnode].add(inst.name)
4502     else:
4503       node_to_primary = None
4504       node_to_secondary = None
4505
4506     if query.NQ_OOB in self.requested_data:
4507       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4508                          for name, node in all_info.iteritems())
4509     else:
4510       oob_support = None
4511
4512     if query.NQ_GROUP in self.requested_data:
4513       groups = lu.cfg.GetAllNodeGroupsInfo()
4514     else:
4515       groups = {}
4516
4517     return query.NodeQueryData([all_info[name] for name in nodenames],
4518                                live_data, lu.cfg.GetMasterNode(),
4519                                node_to_primary, node_to_secondary, groups,
4520                                oob_support, lu.cfg.GetClusterInfo())
4521
4522
4523 class LUNodeQuery(NoHooksLU):
4524   """Logical unit for querying nodes.
4525
4526   """
4527   # pylint: disable=W0142
4528   REQ_BGL = False
4529
4530   def CheckArguments(self):
4531     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4532                          self.op.output_fields, self.op.use_locking)
4533
4534   def ExpandNames(self):
4535     self.nq.ExpandNames(self)
4536
4537   def Exec(self, feedback_fn):
4538     return self.nq.OldStyleQuery(self)
4539
4540
4541 class LUNodeQueryvols(NoHooksLU):
4542   """Logical unit for getting volumes on node(s).
4543
4544   """
4545   REQ_BGL = False
4546   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4547   _FIELDS_STATIC = utils.FieldSet("node")
4548
4549   def CheckArguments(self):
4550     _CheckOutputFields(static=self._FIELDS_STATIC,
4551                        dynamic=self._FIELDS_DYNAMIC,
4552                        selected=self.op.output_fields)
4553
4554   def ExpandNames(self):
4555     self.needed_locks = {}
4556     self.share_locks[locking.LEVEL_NODE] = 1
4557     if not self.op.nodes:
4558       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4559     else:
4560       self.needed_locks[locking.LEVEL_NODE] = \
4561         _GetWantedNodes(self, self.op.nodes)
4562
4563   def Exec(self, feedback_fn):
4564     """Computes the list of nodes and their attributes.
4565
4566     """
4567     nodenames = self.owned_locks(locking.LEVEL_NODE)
4568     volumes = self.rpc.call_node_volumes(nodenames)
4569
4570     ilist = self.cfg.GetAllInstancesInfo()
4571     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4572
4573     output = []
4574     for node in nodenames:
4575       nresult = volumes[node]
4576       if nresult.offline:
4577         continue
4578       msg = nresult.fail_msg
4579       if msg:
4580         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4581         continue
4582
4583       node_vols = sorted(nresult.payload,
4584                          key=operator.itemgetter("dev"))
4585
4586       for vol in node_vols:
4587         node_output = []
4588         for field in self.op.output_fields:
4589           if field == "node":
4590             val = node
4591           elif field == "phys":
4592             val = vol["dev"]
4593           elif field == "vg":
4594             val = vol["vg"]
4595           elif field == "name":
4596             val = vol["name"]
4597           elif field == "size":
4598             val = int(float(vol["size"]))
4599           elif field == "instance":
4600             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4601           else:
4602             raise errors.ParameterError(field)
4603           node_output.append(str(val))
4604
4605         output.append(node_output)
4606
4607     return output
4608
4609
4610 class LUNodeQueryStorage(NoHooksLU):
4611   """Logical unit for getting information on storage units on node(s).
4612
4613   """
4614   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4615   REQ_BGL = False
4616
4617   def CheckArguments(self):
4618     _CheckOutputFields(static=self._FIELDS_STATIC,
4619                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4620                        selected=self.op.output_fields)
4621
4622   def ExpandNames(self):
4623     self.needed_locks = {}
4624     self.share_locks[locking.LEVEL_NODE] = 1
4625
4626     if self.op.nodes:
4627       self.needed_locks[locking.LEVEL_NODE] = \
4628         _GetWantedNodes(self, self.op.nodes)
4629     else:
4630       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4631
4632   def Exec(self, feedback_fn):
4633     """Computes the list of nodes and their attributes.
4634
4635     """
4636     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4637
4638     # Always get name to sort by
4639     if constants.SF_NAME in self.op.output_fields:
4640       fields = self.op.output_fields[:]
4641     else:
4642       fields = [constants.SF_NAME] + self.op.output_fields
4643
4644     # Never ask for node or type as it's only known to the LU
4645     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4646       while extra in fields:
4647         fields.remove(extra)
4648
4649     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4650     name_idx = field_idx[constants.SF_NAME]
4651
4652     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4653     data = self.rpc.call_storage_list(self.nodes,
4654                                       self.op.storage_type, st_args,
4655                                       self.op.name, fields)
4656
4657     result = []
4658
4659     for node in utils.NiceSort(self.nodes):
4660       nresult = data[node]
4661       if nresult.offline:
4662         continue
4663
4664       msg = nresult.fail_msg
4665       if msg:
4666         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4667         continue
4668
4669       rows = dict([(row[name_idx], row) for row in nresult.payload])
4670
4671       for name in utils.NiceSort(rows.keys()):
4672         row = rows[name]
4673
4674         out = []
4675
4676         for field in self.op.output_fields:
4677           if field == constants.SF_NODE:
4678             val = node
4679           elif field == constants.SF_TYPE:
4680             val = self.op.storage_type
4681           elif field in field_idx:
4682             val = row[field_idx[field]]
4683           else:
4684             raise errors.ParameterError(field)
4685
4686           out.append(val)
4687
4688         result.append(out)
4689
4690     return result
4691
4692
4693 class _InstanceQuery(_QueryBase):
4694   FIELDS = query.INSTANCE_FIELDS
4695
4696   def ExpandNames(self, lu):
4697     lu.needed_locks = {}
4698     lu.share_locks = _ShareAll()
4699
4700     if self.names:
4701       self.wanted = _GetWantedInstances(lu, self.names)
4702     else:
4703       self.wanted = locking.ALL_SET
4704
4705     self.do_locking = (self.use_locking and
4706                        query.IQ_LIVE in self.requested_data)
4707     if self.do_locking:
4708       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4709       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4710       lu.needed_locks[locking.LEVEL_NODE] = []
4711       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4712
4713     self.do_grouplocks = (self.do_locking and
4714                           query.IQ_NODES in self.requested_data)
4715
4716   def DeclareLocks(self, lu, level):
4717     if self.do_locking:
4718       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4719         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4720
4721         # Lock all groups used by instances optimistically; this requires going
4722         # via the node before it's locked, requiring verification later on
4723         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4724           set(group_uuid
4725               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4726               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4727       elif level == locking.LEVEL_NODE:
4728         lu._LockInstancesNodes() # pylint: disable=W0212
4729
4730   @staticmethod
4731   def _CheckGroupLocks(lu):
4732     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4733     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4734
4735     # Check if node groups for locked instances are still correct
4736     for instance_name in owned_instances:
4737       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4738
4739   def _GetQueryData(self, lu):
4740     """Computes the list of instances and their attributes.
4741
4742     """
4743     if self.do_grouplocks:
4744       self._CheckGroupLocks(lu)
4745
4746     cluster = lu.cfg.GetClusterInfo()
4747     all_info = lu.cfg.GetAllInstancesInfo()
4748
4749     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4750
4751     instance_list = [all_info[name] for name in instance_names]
4752     nodes = frozenset(itertools.chain(*(inst.all_nodes
4753                                         for inst in instance_list)))
4754     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4755     bad_nodes = []
4756     offline_nodes = []
4757     wrongnode_inst = set()
4758
4759     # Gather data as requested
4760     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4761       live_data = {}
4762       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4763       for name in nodes:
4764         result = node_data[name]
4765         if result.offline:
4766           # offline nodes will be in both lists
4767           assert result.fail_msg
4768           offline_nodes.append(name)
4769         if result.fail_msg:
4770           bad_nodes.append(name)
4771         elif result.payload:
4772           for inst in result.payload:
4773             if inst in all_info:
4774               if all_info[inst].primary_node == name:
4775                 live_data.update(result.payload)
4776               else:
4777                 wrongnode_inst.add(inst)
4778             else:
4779               # orphan instance; we don't list it here as we don't
4780               # handle this case yet in the output of instance listing
4781               logging.warning("Orphan instance '%s' found on node %s",
4782                               inst, name)
4783         # else no instance is alive
4784     else:
4785       live_data = {}
4786
4787     if query.IQ_DISKUSAGE in self.requested_data:
4788       disk_usage = dict((inst.name,
4789                          _ComputeDiskSize(inst.disk_template,
4790                                           [{constants.IDISK_SIZE: disk.size}
4791                                            for disk in inst.disks]))
4792                         for inst in instance_list)
4793     else:
4794       disk_usage = None
4795
4796     if query.IQ_CONSOLE in self.requested_data:
4797       consinfo = {}
4798       for inst in instance_list:
4799         if inst.name in live_data:
4800           # Instance is running
4801           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4802         else:
4803           consinfo[inst.name] = None
4804       assert set(consinfo.keys()) == set(instance_names)
4805     else:
4806       consinfo = None
4807
4808     if query.IQ_NODES in self.requested_data:
4809       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4810                                             instance_list)))
4811       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4812       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4813                     for uuid in set(map(operator.attrgetter("group"),
4814                                         nodes.values())))
4815     else:
4816       nodes = None
4817       groups = None
4818
4819     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4820                                    disk_usage, offline_nodes, bad_nodes,
4821                                    live_data, wrongnode_inst, consinfo,
4822                                    nodes, groups)
4823
4824
4825 class LUQuery(NoHooksLU):
4826   """Query for resources/items of a certain kind.
4827
4828   """
4829   # pylint: disable=W0142
4830   REQ_BGL = False
4831
4832   def CheckArguments(self):
4833     qcls = _GetQueryImplementation(self.op.what)
4834
4835     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4836
4837   def ExpandNames(self):
4838     self.impl.ExpandNames(self)
4839
4840   def DeclareLocks(self, level):
4841     self.impl.DeclareLocks(self, level)
4842
4843   def Exec(self, feedback_fn):
4844     return self.impl.NewStyleQuery(self)
4845
4846
4847 class LUQueryFields(NoHooksLU):
4848   """Query for resources/items of a certain kind.
4849
4850   """
4851   # pylint: disable=W0142
4852   REQ_BGL = False
4853
4854   def CheckArguments(self):
4855     self.qcls = _GetQueryImplementation(self.op.what)
4856
4857   def ExpandNames(self):
4858     self.needed_locks = {}
4859
4860   def Exec(self, feedback_fn):
4861     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4862
4863
4864 class LUNodeModifyStorage(NoHooksLU):
4865   """Logical unit for modifying a storage volume on a node.
4866
4867   """
4868   REQ_BGL = False
4869
4870   def CheckArguments(self):
4871     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4872
4873     storage_type = self.op.storage_type
4874
4875     try:
4876       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4877     except KeyError:
4878       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4879                                  " modified" % storage_type,
4880                                  errors.ECODE_INVAL)
4881
4882     diff = set(self.op.changes.keys()) - modifiable
4883     if diff:
4884       raise errors.OpPrereqError("The following fields can not be modified for"
4885                                  " storage units of type '%s': %r" %
4886                                  (storage_type, list(diff)),
4887                                  errors.ECODE_INVAL)
4888
4889   def ExpandNames(self):
4890     self.needed_locks = {
4891       locking.LEVEL_NODE: self.op.node_name,
4892       }
4893
4894   def Exec(self, feedback_fn):
4895     """Computes the list of nodes and their attributes.
4896
4897     """
4898     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4899     result = self.rpc.call_storage_modify(self.op.node_name,
4900                                           self.op.storage_type, st_args,
4901                                           self.op.name, self.op.changes)
4902     result.Raise("Failed to modify storage unit '%s' on %s" %
4903                  (self.op.name, self.op.node_name))
4904
4905
4906 class LUNodeAdd(LogicalUnit):
4907   """Logical unit for adding node to the cluster.
4908
4909   """
4910   HPATH = "node-add"
4911   HTYPE = constants.HTYPE_NODE
4912   _NFLAGS = ["master_capable", "vm_capable"]
4913
4914   def CheckArguments(self):
4915     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4916     # validate/normalize the node name
4917     self.hostname = netutils.GetHostname(name=self.op.node_name,
4918                                          family=self.primary_ip_family)
4919     self.op.node_name = self.hostname.name
4920
4921     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4922       raise errors.OpPrereqError("Cannot readd the master node",
4923                                  errors.ECODE_STATE)
4924
4925     if self.op.readd and self.op.group:
4926       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4927                                  " being readded", errors.ECODE_INVAL)
4928
4929   def BuildHooksEnv(self):
4930     """Build hooks env.
4931
4932     This will run on all nodes before, and on all nodes + the new node after.
4933
4934     """
4935     return {
4936       "OP_TARGET": self.op.node_name,
4937       "NODE_NAME": self.op.node_name,
4938       "NODE_PIP": self.op.primary_ip,
4939       "NODE_SIP": self.op.secondary_ip,
4940       "MASTER_CAPABLE": str(self.op.master_capable),
4941       "VM_CAPABLE": str(self.op.vm_capable),
4942       }
4943
4944   def BuildHooksNodes(self):
4945     """Build hooks nodes.
4946
4947     """
4948     # Exclude added node
4949     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4950     post_nodes = pre_nodes + [self.op.node_name, ]
4951
4952     return (pre_nodes, post_nodes)
4953
4954   def CheckPrereq(self):
4955     """Check prerequisites.
4956
4957     This checks:
4958      - the new node is not already in the config
4959      - it is resolvable
4960      - its parameters (single/dual homed) matches the cluster
4961
4962     Any errors are signaled by raising errors.OpPrereqError.
4963
4964     """
4965     cfg = self.cfg
4966     hostname = self.hostname
4967     node = hostname.name
4968     primary_ip = self.op.primary_ip = hostname.ip
4969     if self.op.secondary_ip is None:
4970       if self.primary_ip_family == netutils.IP6Address.family:
4971         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4972                                    " IPv4 address must be given as secondary",
4973                                    errors.ECODE_INVAL)
4974       self.op.secondary_ip = primary_ip
4975
4976     secondary_ip = self.op.secondary_ip
4977     if not netutils.IP4Address.IsValid(secondary_ip):
4978       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4979                                  " address" % secondary_ip, errors.ECODE_INVAL)
4980
4981     node_list = cfg.GetNodeList()
4982     if not self.op.readd and node in node_list:
4983       raise errors.OpPrereqError("Node %s is already in the configuration" %
4984                                  node, errors.ECODE_EXISTS)
4985     elif self.op.readd and node not in node_list:
4986       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4987                                  errors.ECODE_NOENT)
4988
4989     self.changed_primary_ip = False
4990
4991     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4992       if self.op.readd and node == existing_node_name:
4993         if existing_node.secondary_ip != secondary_ip:
4994           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4995                                      " address configuration as before",
4996                                      errors.ECODE_INVAL)
4997         if existing_node.primary_ip != primary_ip:
4998           self.changed_primary_ip = True
4999
5000         continue
5001
5002       if (existing_node.primary_ip == primary_ip or
5003           existing_node.secondary_ip == primary_ip or
5004           existing_node.primary_ip == secondary_ip or
5005           existing_node.secondary_ip == secondary_ip):
5006         raise errors.OpPrereqError("New node ip address(es) conflict with"
5007                                    " existing node %s" % existing_node.name,
5008                                    errors.ECODE_NOTUNIQUE)
5009
5010     # After this 'if' block, None is no longer a valid value for the
5011     # _capable op attributes
5012     if self.op.readd:
5013       old_node = self.cfg.GetNodeInfo(node)
5014       assert old_node is not None, "Can't retrieve locked node %s" % node
5015       for attr in self._NFLAGS:
5016         if getattr(self.op, attr) is None:
5017           setattr(self.op, attr, getattr(old_node, attr))
5018     else:
5019       for attr in self._NFLAGS:
5020         if getattr(self.op, attr) is None:
5021           setattr(self.op, attr, True)
5022
5023     if self.op.readd and not self.op.vm_capable:
5024       pri, sec = cfg.GetNodeInstances(node)
5025       if pri or sec:
5026         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5027                                    " flag set to false, but it already holds"
5028                                    " instances" % node,
5029                                    errors.ECODE_STATE)
5030
5031     # check that the type of the node (single versus dual homed) is the
5032     # same as for the master
5033     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5034     master_singlehomed = myself.secondary_ip == myself.primary_ip
5035     newbie_singlehomed = secondary_ip == primary_ip
5036     if master_singlehomed != newbie_singlehomed:
5037       if master_singlehomed:
5038         raise errors.OpPrereqError("The master has no secondary ip but the"
5039                                    " new node has one",
5040                                    errors.ECODE_INVAL)
5041       else:
5042         raise errors.OpPrereqError("The master has a secondary ip but the"
5043                                    " new node doesn't have one",
5044                                    errors.ECODE_INVAL)
5045
5046     # checks reachability
5047     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5048       raise errors.OpPrereqError("Node not reachable by ping",
5049                                  errors.ECODE_ENVIRON)
5050
5051     if not newbie_singlehomed:
5052       # check reachability from my secondary ip to newbie's secondary ip
5053       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5054                            source=myself.secondary_ip):
5055         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5056                                    " based ping to node daemon port",
5057                                    errors.ECODE_ENVIRON)
5058
5059     if self.op.readd:
5060       exceptions = [node]
5061     else:
5062       exceptions = []
5063
5064     if self.op.master_capable:
5065       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5066     else:
5067       self.master_candidate = False
5068
5069     if self.op.readd:
5070       self.new_node = old_node
5071     else:
5072       node_group = cfg.LookupNodeGroup(self.op.group)
5073       self.new_node = objects.Node(name=node,
5074                                    primary_ip=primary_ip,
5075                                    secondary_ip=secondary_ip,
5076                                    master_candidate=self.master_candidate,
5077                                    offline=False, drained=False,
5078                                    group=node_group)
5079
5080     if self.op.ndparams:
5081       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5082
5083   def Exec(self, feedback_fn):
5084     """Adds the new node to the cluster.
5085
5086     """
5087     new_node = self.new_node
5088     node = new_node.name
5089
5090     # We adding a new node so we assume it's powered
5091     new_node.powered = True
5092
5093     # for re-adds, reset the offline/drained/master-candidate flags;
5094     # we need to reset here, otherwise offline would prevent RPC calls
5095     # later in the procedure; this also means that if the re-add
5096     # fails, we are left with a non-offlined, broken node
5097     if self.op.readd:
5098       new_node.drained = new_node.offline = False # pylint: disable=W0201
5099       self.LogInfo("Readding a node, the offline/drained flags were reset")
5100       # if we demote the node, we do cleanup later in the procedure
5101       new_node.master_candidate = self.master_candidate
5102       if self.changed_primary_ip:
5103         new_node.primary_ip = self.op.primary_ip
5104
5105     # copy the master/vm_capable flags
5106     for attr in self._NFLAGS:
5107       setattr(new_node, attr, getattr(self.op, attr))
5108
5109     # notify the user about any possible mc promotion
5110     if new_node.master_candidate:
5111       self.LogInfo("Node will be a master candidate")
5112
5113     if self.op.ndparams:
5114       new_node.ndparams = self.op.ndparams
5115     else:
5116       new_node.ndparams = {}
5117
5118     # check connectivity
5119     result = self.rpc.call_version([node])[node]
5120     result.Raise("Can't get version information from node %s" % node)
5121     if constants.PROTOCOL_VERSION == result.payload:
5122       logging.info("Communication to node %s fine, sw version %s match",
5123                    node, result.payload)
5124     else:
5125       raise errors.OpExecError("Version mismatch master version %s,"
5126                                " node version %s" %
5127                                (constants.PROTOCOL_VERSION, result.payload))
5128
5129     # Add node to our /etc/hosts, and add key to known_hosts
5130     if self.cfg.GetClusterInfo().modify_etc_hosts:
5131       master_node = self.cfg.GetMasterNode()
5132       result = self.rpc.call_etc_hosts_modify(master_node,
5133                                               constants.ETC_HOSTS_ADD,
5134                                               self.hostname.name,
5135                                               self.hostname.ip)
5136       result.Raise("Can't update hosts file with new host data")
5137
5138     if new_node.secondary_ip != new_node.primary_ip:
5139       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5140                                False)
5141
5142     node_verify_list = [self.cfg.GetMasterNode()]
5143     node_verify_param = {
5144       constants.NV_NODELIST: ([node], {}),
5145       # TODO: do a node-net-test as well?
5146     }
5147
5148     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5149                                        self.cfg.GetClusterName())
5150     for verifier in node_verify_list:
5151       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5152       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5153       if nl_payload:
5154         for failed in nl_payload:
5155           feedback_fn("ssh/hostname verification failed"
5156                       " (checking from %s): %s" %
5157                       (verifier, nl_payload[failed]))
5158         raise errors.OpExecError("ssh/hostname verification failed")
5159
5160     if self.op.readd:
5161       _RedistributeAncillaryFiles(self)
5162       self.context.ReaddNode(new_node)
5163       # make sure we redistribute the config
5164       self.cfg.Update(new_node, feedback_fn)
5165       # and make sure the new node will not have old files around
5166       if not new_node.master_candidate:
5167         result = self.rpc.call_node_demote_from_mc(new_node.name)
5168         msg = result.fail_msg
5169         if msg:
5170           self.LogWarning("Node failed to demote itself from master"
5171                           " candidate status: %s" % msg)
5172     else:
5173       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5174                                   additional_vm=self.op.vm_capable)
5175       self.context.AddNode(new_node, self.proc.GetECId())
5176
5177
5178 class LUNodeSetParams(LogicalUnit):
5179   """Modifies the parameters of a node.
5180
5181   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5182       to the node role (as _ROLE_*)
5183   @cvar _R2F: a dictionary from node role to tuples of flags
5184   @cvar _FLAGS: a list of attribute names corresponding to the flags
5185
5186   """
5187   HPATH = "node-modify"
5188   HTYPE = constants.HTYPE_NODE
5189   REQ_BGL = False
5190   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5191   _F2R = {
5192     (True, False, False): _ROLE_CANDIDATE,
5193     (False, True, False): _ROLE_DRAINED,
5194     (False, False, True): _ROLE_OFFLINE,
5195     (False, False, False): _ROLE_REGULAR,
5196     }
5197   _R2F = dict((v, k) for k, v in _F2R.items())
5198   _FLAGS = ["master_candidate", "drained", "offline"]
5199
5200   def CheckArguments(self):
5201     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5202     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5203                 self.op.master_capable, self.op.vm_capable,
5204                 self.op.secondary_ip, self.op.ndparams]
5205     if all_mods.count(None) == len(all_mods):
5206       raise errors.OpPrereqError("Please pass at least one modification",
5207                                  errors.ECODE_INVAL)
5208     if all_mods.count(True) > 1:
5209       raise errors.OpPrereqError("Can't set the node into more than one"
5210                                  " state at the same time",
5211                                  errors.ECODE_INVAL)
5212
5213     # Boolean value that tells us whether we might be demoting from MC
5214     self.might_demote = (self.op.master_candidate == False or
5215                          self.op.offline == True or
5216                          self.op.drained == True or
5217                          self.op.master_capable == False)
5218
5219     if self.op.secondary_ip:
5220       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5221         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5222                                    " address" % self.op.secondary_ip,
5223                                    errors.ECODE_INVAL)
5224
5225     self.lock_all = self.op.auto_promote and self.might_demote
5226     self.lock_instances = self.op.secondary_ip is not None
5227
5228   def ExpandNames(self):
5229     if self.lock_all:
5230       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5231     else:
5232       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5233
5234     if self.lock_instances:
5235       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5236
5237   def DeclareLocks(self, level):
5238     # If we have locked all instances, before waiting to lock nodes, release
5239     # all the ones living on nodes unrelated to the current operation.
5240     if level == locking.LEVEL_NODE and self.lock_instances:
5241       self.affected_instances = []
5242       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5243         instances_keep = []
5244
5245         # Build list of instances to release
5246         locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5247         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5248           if (instance.disk_template in constants.DTS_INT_MIRROR and
5249               self.op.node_name in instance.all_nodes):
5250             instances_keep.append(instance_name)
5251             self.affected_instances.append(instance)
5252
5253         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5254
5255         assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5256                 set(instances_keep))
5257
5258   def BuildHooksEnv(self):
5259     """Build hooks env.
5260
5261     This runs on the master node.
5262
5263     """
5264     return {
5265       "OP_TARGET": self.op.node_name,
5266       "MASTER_CANDIDATE": str(self.op.master_candidate),
5267       "OFFLINE": str(self.op.offline),
5268       "DRAINED": str(self.op.drained),
5269       "MASTER_CAPABLE": str(self.op.master_capable),
5270       "VM_CAPABLE": str(self.op.vm_capable),
5271       }
5272
5273   def BuildHooksNodes(self):
5274     """Build hooks nodes.
5275
5276     """
5277     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5278     return (nl, nl)
5279
5280   def CheckPrereq(self):
5281     """Check prerequisites.
5282
5283     This only checks the instance list against the existing names.
5284
5285     """
5286     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5287
5288     if (self.op.master_candidate is not None or
5289         self.op.drained is not None or
5290         self.op.offline is not None):
5291       # we can't change the master's node flags
5292       if self.op.node_name == self.cfg.GetMasterNode():
5293         raise errors.OpPrereqError("The master role can be changed"
5294                                    " only via master-failover",
5295                                    errors.ECODE_INVAL)
5296
5297     if self.op.master_candidate and not node.master_capable:
5298       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5299                                  " it a master candidate" % node.name,
5300                                  errors.ECODE_STATE)
5301
5302     if self.op.vm_capable == False:
5303       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5304       if ipri or isec:
5305         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5306                                    " the vm_capable flag" % node.name,
5307                                    errors.ECODE_STATE)
5308
5309     if node.master_candidate and self.might_demote and not self.lock_all:
5310       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5311       # check if after removing the current node, we're missing master
5312       # candidates
5313       (mc_remaining, mc_should, _) = \
5314           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5315       if mc_remaining < mc_should:
5316         raise errors.OpPrereqError("Not enough master candidates, please"
5317                                    " pass auto promote option to allow"
5318                                    " promotion", errors.ECODE_STATE)
5319
5320     self.old_flags = old_flags = (node.master_candidate,
5321                                   node.drained, node.offline)
5322     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5323     self.old_role = old_role = self._F2R[old_flags]
5324
5325     # Check for ineffective changes
5326     for attr in self._FLAGS:
5327       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5328         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5329         setattr(self.op, attr, None)
5330
5331     # Past this point, any flag change to False means a transition
5332     # away from the respective state, as only real changes are kept
5333
5334     # TODO: We might query the real power state if it supports OOB
5335     if _SupportsOob(self.cfg, node):
5336       if self.op.offline is False and not (node.powered or
5337                                            self.op.powered == True):
5338         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5339                                     " offline status can be reset") %
5340                                    self.op.node_name)
5341     elif self.op.powered is not None:
5342       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5343                                   " as it does not support out-of-band"
5344                                   " handling") % self.op.node_name)
5345
5346     # If we're being deofflined/drained, we'll MC ourself if needed
5347     if (self.op.drained == False or self.op.offline == False or
5348         (self.op.master_capable and not node.master_capable)):
5349       if _DecideSelfPromotion(self):
5350         self.op.master_candidate = True
5351         self.LogInfo("Auto-promoting node to master candidate")
5352
5353     # If we're no longer master capable, we'll demote ourselves from MC
5354     if self.op.master_capable == False and node.master_candidate:
5355       self.LogInfo("Demoting from master candidate")
5356       self.op.master_candidate = False
5357
5358     # Compute new role
5359     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5360     if self.op.master_candidate:
5361       new_role = self._ROLE_CANDIDATE
5362     elif self.op.drained:
5363       new_role = self._ROLE_DRAINED
5364     elif self.op.offline:
5365       new_role = self._ROLE_OFFLINE
5366     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5367       # False is still in new flags, which means we're un-setting (the
5368       # only) True flag
5369       new_role = self._ROLE_REGULAR
5370     else: # no new flags, nothing, keep old role
5371       new_role = old_role
5372
5373     self.new_role = new_role
5374
5375     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5376       # Trying to transition out of offline status
5377       result = self.rpc.call_version([node.name])[node.name]
5378       if result.fail_msg:
5379         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5380                                    " to report its version: %s" %
5381                                    (node.name, result.fail_msg),
5382                                    errors.ECODE_STATE)
5383       else:
5384         self.LogWarning("Transitioning node from offline to online state"
5385                         " without using re-add. Please make sure the node"
5386                         " is healthy!")
5387
5388     if self.op.secondary_ip:
5389       # Ok even without locking, because this can't be changed by any LU
5390       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5391       master_singlehomed = master.secondary_ip == master.primary_ip
5392       if master_singlehomed and self.op.secondary_ip:
5393         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5394                                    " homed cluster", errors.ECODE_INVAL)
5395
5396       if node.offline:
5397         if self.affected_instances:
5398           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5399                                      " node has instances (%s) configured"
5400                                      " to use it" % self.affected_instances)
5401       else:
5402         # On online nodes, check that no instances are running, and that
5403         # the node has the new ip and we can reach it.
5404         for instance in self.affected_instances:
5405           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5406
5407         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5408         if master.name != node.name:
5409           # check reachability from master secondary ip to new secondary ip
5410           if not netutils.TcpPing(self.op.secondary_ip,
5411                                   constants.DEFAULT_NODED_PORT,
5412                                   source=master.secondary_ip):
5413             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5414                                        " based ping to node daemon port",
5415                                        errors.ECODE_ENVIRON)
5416
5417     if self.op.ndparams:
5418       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5419       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5420       self.new_ndparams = new_ndparams
5421
5422   def Exec(self, feedback_fn):
5423     """Modifies a node.
5424
5425     """
5426     node = self.node
5427     old_role = self.old_role
5428     new_role = self.new_role
5429
5430     result = []
5431
5432     if self.op.ndparams:
5433       node.ndparams = self.new_ndparams
5434
5435     if self.op.powered is not None:
5436       node.powered = self.op.powered
5437
5438     for attr in ["master_capable", "vm_capable"]:
5439       val = getattr(self.op, attr)
5440       if val is not None:
5441         setattr(node, attr, val)
5442         result.append((attr, str(val)))
5443
5444     if new_role != old_role:
5445       # Tell the node to demote itself, if no longer MC and not offline
5446       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5447         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5448         if msg:
5449           self.LogWarning("Node failed to demote itself: %s", msg)
5450
5451       new_flags = self._R2F[new_role]
5452       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5453         if of != nf:
5454           result.append((desc, str(nf)))
5455       (node.master_candidate, node.drained, node.offline) = new_flags
5456
5457       # we locked all nodes, we adjust the CP before updating this node
5458       if self.lock_all:
5459         _AdjustCandidatePool(self, [node.name])
5460
5461     if self.op.secondary_ip:
5462       node.secondary_ip = self.op.secondary_ip
5463       result.append(("secondary_ip", self.op.secondary_ip))
5464
5465     # this will trigger configuration file update, if needed
5466     self.cfg.Update(node, feedback_fn)
5467
5468     # this will trigger job queue propagation or cleanup if the mc
5469     # flag changed
5470     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5471       self.context.ReaddNode(node)
5472
5473     return result
5474
5475
5476 class LUNodePowercycle(NoHooksLU):
5477   """Powercycles a node.
5478
5479   """
5480   REQ_BGL = False
5481
5482   def CheckArguments(self):
5483     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5484     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5485       raise errors.OpPrereqError("The node is the master and the force"
5486                                  " parameter was not set",
5487                                  errors.ECODE_INVAL)
5488
5489   def ExpandNames(self):
5490     """Locking for PowercycleNode.
5491
5492     This is a last-resort option and shouldn't block on other
5493     jobs. Therefore, we grab no locks.
5494
5495     """
5496     self.needed_locks = {}
5497
5498   def Exec(self, feedback_fn):
5499     """Reboots a node.
5500
5501     """
5502     result = self.rpc.call_node_powercycle(self.op.node_name,
5503                                            self.cfg.GetHypervisorType())
5504     result.Raise("Failed to schedule the reboot")
5505     return result.payload
5506
5507
5508 class LUClusterQuery(NoHooksLU):
5509   """Query cluster configuration.
5510
5511   """
5512   REQ_BGL = False
5513
5514   def ExpandNames(self):
5515     self.needed_locks = {}
5516
5517   def Exec(self, feedback_fn):
5518     """Return cluster config.
5519
5520     """
5521     cluster = self.cfg.GetClusterInfo()
5522     os_hvp = {}
5523
5524     # Filter just for enabled hypervisors
5525     for os_name, hv_dict in cluster.os_hvp.items():
5526       os_hvp[os_name] = {}
5527       for hv_name, hv_params in hv_dict.items():
5528         if hv_name in cluster.enabled_hypervisors:
5529           os_hvp[os_name][hv_name] = hv_params
5530
5531     # Convert ip_family to ip_version
5532     primary_ip_version = constants.IP4_VERSION
5533     if cluster.primary_ip_family == netutils.IP6Address.family:
5534       primary_ip_version = constants.IP6_VERSION
5535
5536     result = {
5537       "software_version": constants.RELEASE_VERSION,
5538       "protocol_version": constants.PROTOCOL_VERSION,
5539       "config_version": constants.CONFIG_VERSION,
5540       "os_api_version": max(constants.OS_API_VERSIONS),
5541       "export_version": constants.EXPORT_VERSION,
5542       "architecture": (platform.architecture()[0], platform.machine()),
5543       "name": cluster.cluster_name,
5544       "master": cluster.master_node,
5545       "default_hypervisor": cluster.enabled_hypervisors[0],
5546       "enabled_hypervisors": cluster.enabled_hypervisors,
5547       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5548                         for hypervisor_name in cluster.enabled_hypervisors]),
5549       "os_hvp": os_hvp,
5550       "beparams": cluster.beparams,
5551       "osparams": cluster.osparams,
5552       "nicparams": cluster.nicparams,
5553       "ndparams": cluster.ndparams,
5554       "candidate_pool_size": cluster.candidate_pool_size,
5555       "master_netdev": cluster.master_netdev,
5556       "master_netmask": cluster.master_netmask,
5557       "volume_group_name": cluster.volume_group_name,
5558       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5559       "file_storage_dir": cluster.file_storage_dir,
5560       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5561       "maintain_node_health": cluster.maintain_node_health,
5562       "ctime": cluster.ctime,
5563       "mtime": cluster.mtime,
5564       "uuid": cluster.uuid,
5565       "tags": list(cluster.GetTags()),
5566       "uid_pool": cluster.uid_pool,
5567       "default_iallocator": cluster.default_iallocator,
5568       "reserved_lvs": cluster.reserved_lvs,
5569       "primary_ip_version": primary_ip_version,
5570       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5571       "hidden_os": cluster.hidden_os,
5572       "blacklisted_os": cluster.blacklisted_os,
5573       }
5574
5575     return result
5576
5577
5578 class LUClusterConfigQuery(NoHooksLU):
5579   """Return configuration values.
5580
5581   """
5582   REQ_BGL = False
5583   _FIELDS_DYNAMIC = utils.FieldSet()
5584   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5585                                   "watcher_pause", "volume_group_name")
5586
5587   def CheckArguments(self):
5588     _CheckOutputFields(static=self._FIELDS_STATIC,
5589                        dynamic=self._FIELDS_DYNAMIC,
5590                        selected=self.op.output_fields)
5591
5592   def ExpandNames(self):
5593     self.needed_locks = {}
5594
5595   def Exec(self, feedback_fn):
5596     """Dump a representation of the cluster config to the standard output.
5597
5598     """
5599     values = []
5600     for field in self.op.output_fields:
5601       if field == "cluster_name":
5602         entry = self.cfg.GetClusterName()
5603       elif field == "master_node":
5604         entry = self.cfg.GetMasterNode()
5605       elif field == "drain_flag":
5606         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5607       elif field == "watcher_pause":
5608         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5609       elif field == "volume_group_name":
5610         entry = self.cfg.GetVGName()
5611       else:
5612         raise errors.ParameterError(field)
5613       values.append(entry)
5614     return values
5615
5616
5617 class LUInstanceActivateDisks(NoHooksLU):
5618   """Bring up an instance's disks.
5619
5620   """
5621   REQ_BGL = False
5622
5623   def ExpandNames(self):
5624     self._ExpandAndLockInstance()
5625     self.needed_locks[locking.LEVEL_NODE] = []
5626     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5627
5628   def DeclareLocks(self, level):
5629     if level == locking.LEVEL_NODE:
5630       self._LockInstancesNodes()
5631
5632   def CheckPrereq(self):
5633     """Check prerequisites.
5634
5635     This checks that the instance is in the cluster.
5636
5637     """
5638     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5639     assert self.instance is not None, \
5640       "Cannot retrieve locked instance %s" % self.op.instance_name
5641     _CheckNodeOnline(self, self.instance.primary_node)
5642
5643   def Exec(self, feedback_fn):
5644     """Activate the disks.
5645
5646     """
5647     disks_ok, disks_info = \
5648               _AssembleInstanceDisks(self, self.instance,
5649                                      ignore_size=self.op.ignore_size)
5650     if not disks_ok:
5651       raise errors.OpExecError("Cannot activate block devices")
5652
5653     return disks_info
5654
5655
5656 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5657                            ignore_size=False):
5658   """Prepare the block devices for an instance.
5659
5660   This sets up the block devices on all nodes.
5661
5662   @type lu: L{LogicalUnit}
5663   @param lu: the logical unit on whose behalf we execute
5664   @type instance: L{objects.Instance}
5665   @param instance: the instance for whose disks we assemble
5666   @type disks: list of L{objects.Disk} or None
5667   @param disks: which disks to assemble (or all, if None)
5668   @type ignore_secondaries: boolean
5669   @param ignore_secondaries: if true, errors on secondary nodes
5670       won't result in an error return from the function
5671   @type ignore_size: boolean
5672   @param ignore_size: if true, the current known size of the disk
5673       will not be used during the disk activation, useful for cases
5674       when the size is wrong
5675   @return: False if the operation failed, otherwise a list of
5676       (host, instance_visible_name, node_visible_name)
5677       with the mapping from node devices to instance devices
5678
5679   """
5680   device_info = []
5681   disks_ok = True
5682   iname = instance.name
5683   disks = _ExpandCheckDisks(instance, disks)
5684
5685   # With the two passes mechanism we try to reduce the window of
5686   # opportunity for the race condition of switching DRBD to primary
5687   # before handshaking occured, but we do not eliminate it
5688
5689   # The proper fix would be to wait (with some limits) until the
5690   # connection has been made and drbd transitions from WFConnection
5691   # into any other network-connected state (Connected, SyncTarget,
5692   # SyncSource, etc.)
5693
5694   # 1st pass, assemble on all nodes in secondary mode
5695   for idx, inst_disk in enumerate(disks):
5696     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5697       if ignore_size:
5698         node_disk = node_disk.Copy()
5699         node_disk.UnsetSize()
5700       lu.cfg.SetDiskID(node_disk, node)
5701       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5702       msg = result.fail_msg
5703       if msg:
5704         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5705                            " (is_primary=False, pass=1): %s",
5706                            inst_disk.iv_name, node, msg)
5707         if not ignore_secondaries:
5708           disks_ok = False
5709
5710   # FIXME: race condition on drbd migration to primary
5711
5712   # 2nd pass, do only the primary node
5713   for idx, inst_disk in enumerate(disks):
5714     dev_path = None
5715
5716     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5717       if node != instance.primary_node:
5718         continue
5719       if ignore_size:
5720         node_disk = node_disk.Copy()
5721         node_disk.UnsetSize()
5722       lu.cfg.SetDiskID(node_disk, node)
5723       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5724       msg = result.fail_msg
5725       if msg:
5726         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5727                            " (is_primary=True, pass=2): %s",
5728                            inst_disk.iv_name, node, msg)
5729         disks_ok = False
5730       else:
5731         dev_path = result.payload
5732
5733     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5734
5735   # leave the disks configured for the primary node
5736   # this is a workaround that would be fixed better by
5737   # improving the logical/physical id handling
5738   for disk in disks:
5739     lu.cfg.SetDiskID(disk, instance.primary_node)
5740
5741   return disks_ok, device_info
5742
5743
5744 def _StartInstanceDisks(lu, instance, force):
5745   """Start the disks of an instance.
5746
5747   """
5748   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5749                                            ignore_secondaries=force)
5750   if not disks_ok:
5751     _ShutdownInstanceDisks(lu, instance)
5752     if force is not None and not force:
5753       lu.proc.LogWarning("", hint="If the message above refers to a"
5754                          " secondary node,"
5755                          " you can retry the operation using '--force'.")
5756     raise errors.OpExecError("Disk consistency error")
5757
5758
5759 class LUInstanceDeactivateDisks(NoHooksLU):
5760   """Shutdown an instance's disks.
5761
5762   """
5763   REQ_BGL = False
5764
5765   def ExpandNames(self):
5766     self._ExpandAndLockInstance()
5767     self.needed_locks[locking.LEVEL_NODE] = []
5768     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5769
5770   def DeclareLocks(self, level):
5771     if level == locking.LEVEL_NODE:
5772       self._LockInstancesNodes()
5773
5774   def CheckPrereq(self):
5775     """Check prerequisites.
5776
5777     This checks that the instance is in the cluster.
5778
5779     """
5780     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5781     assert self.instance is not None, \
5782       "Cannot retrieve locked instance %s" % self.op.instance_name
5783
5784   def Exec(self, feedback_fn):
5785     """Deactivate the disks
5786
5787     """
5788     instance = self.instance
5789     if self.op.force:
5790       _ShutdownInstanceDisks(self, instance)
5791     else:
5792       _SafeShutdownInstanceDisks(self, instance)
5793
5794
5795 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5796   """Shutdown block devices of an instance.
5797
5798   This function checks if an instance is running, before calling
5799   _ShutdownInstanceDisks.
5800
5801   """
5802   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5803   _ShutdownInstanceDisks(lu, instance, disks=disks)
5804
5805
5806 def _ExpandCheckDisks(instance, disks):
5807   """Return the instance disks selected by the disks list
5808
5809   @type disks: list of L{objects.Disk} or None
5810   @param disks: selected disks
5811   @rtype: list of L{objects.Disk}
5812   @return: selected instance disks to act on
5813
5814   """
5815   if disks is None:
5816     return instance.disks
5817   else:
5818     if not set(disks).issubset(instance.disks):
5819       raise errors.ProgrammerError("Can only act on disks belonging to the"
5820                                    " target instance")
5821     return disks
5822
5823
5824 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5825   """Shutdown block devices of an instance.
5826
5827   This does the shutdown on all nodes of the instance.
5828
5829   If the ignore_primary is false, errors on the primary node are
5830   ignored.
5831
5832   """
5833   all_result = True
5834   disks = _ExpandCheckDisks(instance, disks)
5835
5836   for disk in disks:
5837     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5838       lu.cfg.SetDiskID(top_disk, node)
5839       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5840       msg = result.fail_msg
5841       if msg:
5842         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5843                       disk.iv_name, node, msg)
5844         if ((node == instance.primary_node and not ignore_primary) or
5845             (node != instance.primary_node and not result.offline)):
5846           all_result = False
5847   return all_result
5848
5849
5850 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5851   """Checks if a node has enough free memory.
5852
5853   This function check if a given node has the needed amount of free
5854   memory. In case the node has less memory or we cannot get the
5855   information from the node, this function raise an OpPrereqError
5856   exception.
5857
5858   @type lu: C{LogicalUnit}
5859   @param lu: a logical unit from which we get configuration data
5860   @type node: C{str}
5861   @param node: the node to check
5862   @type reason: C{str}
5863   @param reason: string to use in the error message
5864   @type requested: C{int}
5865   @param requested: the amount of memory in MiB to check for
5866   @type hypervisor_name: C{str}
5867   @param hypervisor_name: the hypervisor to ask for memory stats
5868   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5869       we cannot check the node
5870
5871   """
5872   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5873   nodeinfo[node].Raise("Can't get data from node %s" % node,
5874                        prereq=True, ecode=errors.ECODE_ENVIRON)
5875   free_mem = nodeinfo[node].payload.get("memory_free", None)
5876   if not isinstance(free_mem, int):
5877     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5878                                " was '%s'" % (node, free_mem),
5879                                errors.ECODE_ENVIRON)
5880   if requested > free_mem:
5881     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5882                                " needed %s MiB, available %s MiB" %
5883                                (node, reason, requested, free_mem),
5884                                errors.ECODE_NORES)
5885
5886
5887 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5888   """Checks if nodes have enough free disk space in the all VGs.
5889
5890   This function check if all given nodes have the needed amount of
5891   free disk. In case any node has less disk or we cannot get the
5892   information from the node, this function raise an OpPrereqError
5893   exception.
5894
5895   @type lu: C{LogicalUnit}
5896   @param lu: a logical unit from which we get configuration data
5897   @type nodenames: C{list}
5898   @param nodenames: the list of node names to check
5899   @type req_sizes: C{dict}
5900   @param req_sizes: the hash of vg and corresponding amount of disk in
5901       MiB to check for
5902   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5903       or we cannot check the node
5904
5905   """
5906   for vg, req_size in req_sizes.items():
5907     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5908
5909
5910 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5911   """Checks if nodes have enough free disk space in the specified VG.
5912
5913   This function check if all given nodes have the needed amount of
5914   free disk. In case any node has less disk or we cannot get the
5915   information from the node, this function raise an OpPrereqError
5916   exception.
5917
5918   @type lu: C{LogicalUnit}
5919   @param lu: a logical unit from which we get configuration data
5920   @type nodenames: C{list}
5921   @param nodenames: the list of node names to check
5922   @type vg: C{str}
5923   @param vg: the volume group to check
5924   @type requested: C{int}
5925   @param requested: the amount of disk in MiB to check for
5926   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5927       or we cannot check the node
5928
5929   """
5930   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5931   for node in nodenames:
5932     info = nodeinfo[node]
5933     info.Raise("Cannot get current information from node %s" % node,
5934                prereq=True, ecode=errors.ECODE_ENVIRON)
5935     vg_free = info.payload.get("vg_free", None)
5936     if not isinstance(vg_free, int):
5937       raise errors.OpPrereqError("Can't compute free disk space on node"
5938                                  " %s for vg %s, result was '%s'" %
5939                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5940     if requested > vg_free:
5941       raise errors.OpPrereqError("Not enough disk space on target node %s"
5942                                  " vg %s: required %d MiB, available %d MiB" %
5943                                  (node, vg, requested, vg_free),
5944                                  errors.ECODE_NORES)
5945
5946
5947 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5948   """Checks if nodes have enough physical CPUs
5949
5950   This function checks if all given nodes have the needed number of
5951   physical CPUs. In case any node has less CPUs or we cannot get the
5952   information from the node, this function raises an OpPrereqError
5953   exception.
5954
5955   @type lu: C{LogicalUnit}
5956   @param lu: a logical unit from which we get configuration data
5957   @type nodenames: C{list}
5958   @param nodenames: the list of node names to check
5959   @type requested: C{int}
5960   @param requested: the minimum acceptable number of physical CPUs
5961   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5962       or we cannot check the node
5963
5964   """
5965   nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5966   for node in nodenames:
5967     info = nodeinfo[node]
5968     info.Raise("Cannot get current information from node %s" % node,
5969                prereq=True, ecode=errors.ECODE_ENVIRON)
5970     num_cpus = info.payload.get("cpu_total", None)
5971     if not isinstance(num_cpus, int):
5972       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5973                                  " on node %s, result was '%s'" %
5974                                  (node, num_cpus), errors.ECODE_ENVIRON)
5975     if requested > num_cpus:
5976       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5977                                  "required" % (node, num_cpus, requested),
5978                                  errors.ECODE_NORES)
5979
5980
5981 class LUInstanceStartup(LogicalUnit):
5982   """Starts an instance.
5983
5984   """
5985   HPATH = "instance-start"
5986   HTYPE = constants.HTYPE_INSTANCE
5987   REQ_BGL = False
5988
5989   def CheckArguments(self):
5990     # extra beparams
5991     if self.op.beparams:
5992       # fill the beparams dict
5993       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5994
5995   def ExpandNames(self):
5996     self._ExpandAndLockInstance()
5997
5998   def BuildHooksEnv(self):
5999     """Build hooks env.
6000
6001     This runs on master, primary and secondary nodes of the instance.
6002
6003     """
6004     env = {
6005       "FORCE": self.op.force,
6006       }
6007
6008     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6009
6010     return env
6011
6012   def BuildHooksNodes(self):
6013     """Build hooks nodes.
6014
6015     """
6016     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6017     return (nl, nl)
6018
6019   def CheckPrereq(self):
6020     """Check prerequisites.
6021
6022     This checks that the instance is in the cluster.
6023
6024     """
6025     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6026     assert self.instance is not None, \
6027       "Cannot retrieve locked instance %s" % self.op.instance_name
6028
6029     # extra hvparams
6030     if self.op.hvparams:
6031       # check hypervisor parameter syntax (locally)
6032       cluster = self.cfg.GetClusterInfo()
6033       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6034       filled_hvp = cluster.FillHV(instance)
6035       filled_hvp.update(self.op.hvparams)
6036       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6037       hv_type.CheckParameterSyntax(filled_hvp)
6038       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6039
6040     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6041
6042     if self.primary_offline and self.op.ignore_offline_nodes:
6043       self.proc.LogWarning("Ignoring offline primary node")
6044
6045       if self.op.hvparams or self.op.beparams:
6046         self.proc.LogWarning("Overridden parameters are ignored")
6047     else:
6048       _CheckNodeOnline(self, instance.primary_node)
6049
6050       bep = self.cfg.GetClusterInfo().FillBE(instance)
6051
6052       # check bridges existence
6053       _CheckInstanceBridgesExist(self, instance)
6054
6055       remote_info = self.rpc.call_instance_info(instance.primary_node,
6056                                                 instance.name,
6057                                                 instance.hypervisor)
6058       remote_info.Raise("Error checking node %s" % instance.primary_node,
6059                         prereq=True, ecode=errors.ECODE_ENVIRON)
6060       if not remote_info.payload: # not running already
6061         _CheckNodeFreeMemory(self, instance.primary_node,
6062                              "starting instance %s" % instance.name,
6063                              bep[constants.BE_MEMORY], instance.hypervisor)
6064
6065   def Exec(self, feedback_fn):
6066     """Start the instance.
6067
6068     """
6069     instance = self.instance
6070     force = self.op.force
6071
6072     if not self.op.no_remember:
6073       self.cfg.MarkInstanceUp(instance.name)
6074
6075     if self.primary_offline:
6076       assert self.op.ignore_offline_nodes
6077       self.proc.LogInfo("Primary node offline, marked instance as started")
6078     else:
6079       node_current = instance.primary_node
6080
6081       _StartInstanceDisks(self, instance, force)
6082
6083       result = \
6084         self.rpc.call_instance_start(node_current,
6085                                      (instance, self.op.hvparams,
6086                                       self.op.beparams),
6087                                      self.op.startup_paused)
6088       msg = result.fail_msg
6089       if msg:
6090         _ShutdownInstanceDisks(self, instance)
6091         raise errors.OpExecError("Could not start instance: %s" % msg)
6092
6093
6094 class LUInstanceReboot(LogicalUnit):
6095   """Reboot an instance.
6096
6097   """
6098   HPATH = "instance-reboot"
6099   HTYPE = constants.HTYPE_INSTANCE
6100   REQ_BGL = False
6101
6102   def ExpandNames(self):
6103     self._ExpandAndLockInstance()
6104
6105   def BuildHooksEnv(self):
6106     """Build hooks env.
6107
6108     This runs on master, primary and secondary nodes of the instance.
6109
6110     """
6111     env = {
6112       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6113       "REBOOT_TYPE": self.op.reboot_type,
6114       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6115       }
6116
6117     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6118
6119     return env
6120
6121   def BuildHooksNodes(self):
6122     """Build hooks nodes.
6123
6124     """
6125     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6126     return (nl, nl)
6127
6128   def CheckPrereq(self):
6129     """Check prerequisites.
6130
6131     This checks that the instance is in the cluster.
6132
6133     """
6134     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6135     assert self.instance is not None, \
6136       "Cannot retrieve locked instance %s" % self.op.instance_name
6137
6138     _CheckNodeOnline(self, instance.primary_node)
6139
6140     # check bridges existence
6141     _CheckInstanceBridgesExist(self, instance)
6142
6143   def Exec(self, feedback_fn):
6144     """Reboot the instance.
6145
6146     """
6147     instance = self.instance
6148     ignore_secondaries = self.op.ignore_secondaries
6149     reboot_type = self.op.reboot_type
6150
6151     remote_info = self.rpc.call_instance_info(instance.primary_node,
6152                                               instance.name,
6153                                               instance.hypervisor)
6154     remote_info.Raise("Error checking node %s" % instance.primary_node)
6155     instance_running = bool(remote_info.payload)
6156
6157     node_current = instance.primary_node
6158
6159     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6160                                             constants.INSTANCE_REBOOT_HARD]:
6161       for disk in instance.disks:
6162         self.cfg.SetDiskID(disk, node_current)
6163       result = self.rpc.call_instance_reboot(node_current, instance,
6164                                              reboot_type,
6165                                              self.op.shutdown_timeout)
6166       result.Raise("Could not reboot instance")
6167     else:
6168       if instance_running:
6169         result = self.rpc.call_instance_shutdown(node_current, instance,
6170                                                  self.op.shutdown_timeout)
6171         result.Raise("Could not shutdown instance for full reboot")
6172         _ShutdownInstanceDisks(self, instance)
6173       else:
6174         self.LogInfo("Instance %s was already stopped, starting now",
6175                      instance.name)
6176       _StartInstanceDisks(self, instance, ignore_secondaries)
6177       result = self.rpc.call_instance_start(node_current,
6178                                             (instance, None, None), False)
6179       msg = result.fail_msg
6180       if msg:
6181         _ShutdownInstanceDisks(self, instance)
6182         raise errors.OpExecError("Could not start instance for"
6183                                  " full reboot: %s" % msg)
6184
6185     self.cfg.MarkInstanceUp(instance.name)
6186
6187
6188 class LUInstanceShutdown(LogicalUnit):
6189   """Shutdown an instance.
6190
6191   """
6192   HPATH = "instance-stop"
6193   HTYPE = constants.HTYPE_INSTANCE
6194   REQ_BGL = False
6195
6196   def ExpandNames(self):
6197     self._ExpandAndLockInstance()
6198
6199   def BuildHooksEnv(self):
6200     """Build hooks env.
6201
6202     This runs on master, primary and secondary nodes of the instance.
6203
6204     """
6205     env = _BuildInstanceHookEnvByObject(self, self.instance)
6206     env["TIMEOUT"] = self.op.timeout
6207     return env
6208
6209   def BuildHooksNodes(self):
6210     """Build hooks nodes.
6211
6212     """
6213     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6214     return (nl, nl)
6215
6216   def CheckPrereq(self):
6217     """Check prerequisites.
6218
6219     This checks that the instance is in the cluster.
6220
6221     """
6222     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6223     assert self.instance is not None, \
6224       "Cannot retrieve locked instance %s" % self.op.instance_name
6225
6226     self.primary_offline = \
6227       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6228
6229     if self.primary_offline and self.op.ignore_offline_nodes:
6230       self.proc.LogWarning("Ignoring offline primary node")
6231     else:
6232       _CheckNodeOnline(self, self.instance.primary_node)
6233
6234   def Exec(self, feedback_fn):
6235     """Shutdown the instance.
6236
6237     """
6238     instance = self.instance
6239     node_current = instance.primary_node
6240     timeout = self.op.timeout
6241
6242     if not self.op.no_remember:
6243       self.cfg.MarkInstanceDown(instance.name)
6244
6245     if self.primary_offline:
6246       assert self.op.ignore_offline_nodes
6247       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6248     else:
6249       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6250       msg = result.fail_msg
6251       if msg:
6252         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6253
6254       _ShutdownInstanceDisks(self, instance)
6255
6256
6257 class LUInstanceReinstall(LogicalUnit):
6258   """Reinstall an instance.
6259
6260   """
6261   HPATH = "instance-reinstall"
6262   HTYPE = constants.HTYPE_INSTANCE
6263   REQ_BGL = False
6264
6265   def ExpandNames(self):
6266     self._ExpandAndLockInstance()
6267
6268   def BuildHooksEnv(self):
6269     """Build hooks env.
6270
6271     This runs on master, primary and secondary nodes of the instance.
6272
6273     """
6274     return _BuildInstanceHookEnvByObject(self, self.instance)
6275
6276   def BuildHooksNodes(self):
6277     """Build hooks nodes.
6278
6279     """
6280     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6281     return (nl, nl)
6282
6283   def CheckPrereq(self):
6284     """Check prerequisites.
6285
6286     This checks that the instance is in the cluster and is not running.
6287
6288     """
6289     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6290     assert instance is not None, \
6291       "Cannot retrieve locked instance %s" % self.op.instance_name
6292     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6293                      " offline, cannot reinstall")
6294     for node in instance.secondary_nodes:
6295       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6296                        " cannot reinstall")
6297
6298     if instance.disk_template == constants.DT_DISKLESS:
6299       raise errors.OpPrereqError("Instance '%s' has no disks" %
6300                                  self.op.instance_name,
6301                                  errors.ECODE_INVAL)
6302     _CheckInstanceDown(self, instance, "cannot reinstall")
6303
6304     if self.op.os_type is not None:
6305       # OS verification
6306       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6307       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6308       instance_os = self.op.os_type
6309     else:
6310       instance_os = instance.os
6311
6312     nodelist = list(instance.all_nodes)
6313
6314     if self.op.osparams:
6315       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6316       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6317       self.os_inst = i_osdict # the new dict (without defaults)
6318     else:
6319       self.os_inst = None
6320
6321     self.instance = instance
6322
6323   def Exec(self, feedback_fn):
6324     """Reinstall the instance.
6325
6326     """
6327     inst = self.instance
6328
6329     if self.op.os_type is not None:
6330       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6331       inst.os = self.op.os_type
6332       # Write to configuration
6333       self.cfg.Update(inst, feedback_fn)
6334
6335     _StartInstanceDisks(self, inst, None)
6336     try:
6337       feedback_fn("Running the instance OS create scripts...")
6338       # FIXME: pass debug option from opcode to backend
6339       result = self.rpc.call_instance_os_add(inst.primary_node,
6340                                              (inst, self.os_inst), True,
6341                                              self.op.debug_level)
6342       result.Raise("Could not install OS for instance %s on node %s" %
6343                    (inst.name, inst.primary_node))
6344     finally:
6345       _ShutdownInstanceDisks(self, inst)
6346
6347
6348 class LUInstanceRecreateDisks(LogicalUnit):
6349   """Recreate an instance's missing disks.
6350
6351   """
6352   HPATH = "instance-recreate-disks"
6353   HTYPE = constants.HTYPE_INSTANCE
6354   REQ_BGL = False
6355
6356   def CheckArguments(self):
6357     # normalise the disk list
6358     self.op.disks = sorted(frozenset(self.op.disks))
6359
6360   def ExpandNames(self):
6361     self._ExpandAndLockInstance()
6362     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6363     if self.op.nodes:
6364       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6365       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6366     else:
6367       self.needed_locks[locking.LEVEL_NODE] = []
6368
6369   def DeclareLocks(self, level):
6370     if level == locking.LEVEL_NODE:
6371       # if we replace the nodes, we only need to lock the old primary,
6372       # otherwise we need to lock all nodes for disk re-creation
6373       primary_only = bool(self.op.nodes)
6374       self._LockInstancesNodes(primary_only=primary_only)
6375
6376   def BuildHooksEnv(self):
6377     """Build hooks env.
6378
6379     This runs on master, primary and secondary nodes of the instance.
6380
6381     """
6382     return _BuildInstanceHookEnvByObject(self, self.instance)
6383
6384   def BuildHooksNodes(self):
6385     """Build hooks nodes.
6386
6387     """
6388     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6389     return (nl, nl)
6390
6391   def CheckPrereq(self):
6392     """Check prerequisites.
6393
6394     This checks that the instance is in the cluster and is not running.
6395
6396     """
6397     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6398     assert instance is not None, \
6399       "Cannot retrieve locked instance %s" % self.op.instance_name
6400     if self.op.nodes:
6401       if len(self.op.nodes) != len(instance.all_nodes):
6402         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6403                                    " %d replacement nodes were specified" %
6404                                    (instance.name, len(instance.all_nodes),
6405                                     len(self.op.nodes)),
6406                                    errors.ECODE_INVAL)
6407       assert instance.disk_template != constants.DT_DRBD8 or \
6408           len(self.op.nodes) == 2
6409       assert instance.disk_template != constants.DT_PLAIN or \
6410           len(self.op.nodes) == 1
6411       primary_node = self.op.nodes[0]
6412     else:
6413       primary_node = instance.primary_node
6414     _CheckNodeOnline(self, primary_node)
6415
6416     if instance.disk_template == constants.DT_DISKLESS:
6417       raise errors.OpPrereqError("Instance '%s' has no disks" %
6418                                  self.op.instance_name, errors.ECODE_INVAL)
6419     # if we replace nodes *and* the old primary is offline, we don't
6420     # check
6421     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6422     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6423     if not (self.op.nodes and old_pnode.offline):
6424       _CheckInstanceDown(self, instance, "cannot recreate disks")
6425
6426     if not self.op.disks:
6427       self.op.disks = range(len(instance.disks))
6428     else:
6429       for idx in self.op.disks:
6430         if idx >= len(instance.disks):
6431           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6432                                      errors.ECODE_INVAL)
6433     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6434       raise errors.OpPrereqError("Can't recreate disks partially and"
6435                                  " change the nodes at the same time",
6436                                  errors.ECODE_INVAL)
6437     self.instance = instance
6438
6439   def Exec(self, feedback_fn):
6440     """Recreate the disks.
6441
6442     """
6443     instance = self.instance
6444
6445     to_skip = []
6446     mods = [] # keeps track of needed logical_id changes
6447
6448     for idx, disk in enumerate(instance.disks):
6449       if idx not in self.op.disks: # disk idx has not been passed in
6450         to_skip.append(idx)
6451         continue
6452       # update secondaries for disks, if needed
6453       if self.op.nodes:
6454         if disk.dev_type == constants.LD_DRBD8:
6455           # need to update the nodes and minors
6456           assert len(self.op.nodes) == 2
6457           assert len(disk.logical_id) == 6 # otherwise disk internals
6458                                            # have changed
6459           (_, _, old_port, _, _, old_secret) = disk.logical_id
6460           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6461           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6462                     new_minors[0], new_minors[1], old_secret)
6463           assert len(disk.logical_id) == len(new_id)
6464           mods.append((idx, new_id))
6465
6466     # now that we have passed all asserts above, we can apply the mods
6467     # in a single run (to avoid partial changes)
6468     for idx, new_id in mods:
6469       instance.disks[idx].logical_id = new_id
6470
6471     # change primary node, if needed
6472     if self.op.nodes:
6473       instance.primary_node = self.op.nodes[0]
6474       self.LogWarning("Changing the instance's nodes, you will have to"
6475                       " remove any disks left on the older nodes manually")
6476
6477     if self.op.nodes:
6478       self.cfg.Update(instance, feedback_fn)
6479
6480     _CreateDisks(self, instance, to_skip=to_skip)
6481
6482
6483 class LUInstanceRename(LogicalUnit):
6484   """Rename an instance.
6485
6486   """
6487   HPATH = "instance-rename"
6488   HTYPE = constants.HTYPE_INSTANCE
6489
6490   def CheckArguments(self):
6491     """Check arguments.
6492
6493     """
6494     if self.op.ip_check and not self.op.name_check:
6495       # TODO: make the ip check more flexible and not depend on the name check
6496       raise errors.OpPrereqError("IP address check requires a name check",
6497                                  errors.ECODE_INVAL)
6498
6499   def BuildHooksEnv(self):
6500     """Build hooks env.
6501
6502     This runs on master, primary and secondary nodes of the instance.
6503
6504     """
6505     env = _BuildInstanceHookEnvByObject(self, self.instance)
6506     env["INSTANCE_NEW_NAME"] = self.op.new_name
6507     return env
6508
6509   def BuildHooksNodes(self):
6510     """Build hooks nodes.
6511
6512     """
6513     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6514     return (nl, nl)
6515
6516   def CheckPrereq(self):
6517     """Check prerequisites.
6518
6519     This checks that the instance is in the cluster and is not running.
6520
6521     """
6522     self.op.instance_name = _ExpandInstanceName(self.cfg,
6523                                                 self.op.instance_name)
6524     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6525     assert instance is not None
6526     _CheckNodeOnline(self, instance.primary_node)
6527     _CheckInstanceDown(self, instance, "cannot rename")
6528     self.instance = instance
6529
6530     new_name = self.op.new_name
6531     if self.op.name_check:
6532       hostname = netutils.GetHostname(name=new_name)
6533       if hostname != new_name:
6534         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6535                      hostname.name)
6536       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6537         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6538                                     " same as given hostname '%s'") %
6539                                     (hostname.name, self.op.new_name),
6540                                     errors.ECODE_INVAL)
6541       new_name = self.op.new_name = hostname.name
6542       if (self.op.ip_check and
6543           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6544         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6545                                    (hostname.ip, new_name),
6546                                    errors.ECODE_NOTUNIQUE)
6547
6548     instance_list = self.cfg.GetInstanceList()
6549     if new_name in instance_list and new_name != instance.name:
6550       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6551                                  new_name, errors.ECODE_EXISTS)
6552
6553   def Exec(self, feedback_fn):
6554     """Rename the instance.
6555
6556     """
6557     inst = self.instance
6558     old_name = inst.name
6559
6560     rename_file_storage = False
6561     if (inst.disk_template in constants.DTS_FILEBASED and
6562         self.op.new_name != inst.name):
6563       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6564       rename_file_storage = True
6565
6566     self.cfg.RenameInstance(inst.name, self.op.new_name)
6567     # Change the instance lock. This is definitely safe while we hold the BGL.
6568     # Otherwise the new lock would have to be added in acquired mode.
6569     assert self.REQ_BGL
6570     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6571     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6572
6573     # re-read the instance from the configuration after rename
6574     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6575
6576     if rename_file_storage:
6577       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6578       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6579                                                      old_file_storage_dir,
6580                                                      new_file_storage_dir)
6581       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6582                    " (but the instance has been renamed in Ganeti)" %
6583                    (inst.primary_node, old_file_storage_dir,
6584                     new_file_storage_dir))
6585
6586     _StartInstanceDisks(self, inst, None)
6587     try:
6588       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6589                                                  old_name, self.op.debug_level)
6590       msg = result.fail_msg
6591       if msg:
6592         msg = ("Could not run OS rename script for instance %s on node %s"
6593                " (but the instance has been renamed in Ganeti): %s" %
6594                (inst.name, inst.primary_node, msg))
6595         self.proc.LogWarning(msg)
6596     finally:
6597       _ShutdownInstanceDisks(self, inst)
6598
6599     return inst.name
6600
6601
6602 class LUInstanceRemove(LogicalUnit):
6603   """Remove an instance.
6604
6605   """
6606   HPATH = "instance-remove"
6607   HTYPE = constants.HTYPE_INSTANCE
6608   REQ_BGL = False
6609
6610   def ExpandNames(self):
6611     self._ExpandAndLockInstance()
6612     self.needed_locks[locking.LEVEL_NODE] = []
6613     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6614
6615   def DeclareLocks(self, level):
6616     if level == locking.LEVEL_NODE:
6617       self._LockInstancesNodes()
6618
6619   def BuildHooksEnv(self):
6620     """Build hooks env.
6621
6622     This runs on master, primary and secondary nodes of the instance.
6623
6624     """
6625     env = _BuildInstanceHookEnvByObject(self, self.instance)
6626     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6627     return env
6628
6629   def BuildHooksNodes(self):
6630     """Build hooks nodes.
6631
6632     """
6633     nl = [self.cfg.GetMasterNode()]
6634     nl_post = list(self.instance.all_nodes) + nl
6635     return (nl, nl_post)
6636
6637   def CheckPrereq(self):
6638     """Check prerequisites.
6639
6640     This checks that the instance is in the cluster.
6641
6642     """
6643     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6644     assert self.instance is not None, \
6645       "Cannot retrieve locked instance %s" % self.op.instance_name
6646
6647   def Exec(self, feedback_fn):
6648     """Remove the instance.
6649
6650     """
6651     instance = self.instance
6652     logging.info("Shutting down instance %s on node %s",
6653                  instance.name, instance.primary_node)
6654
6655     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6656                                              self.op.shutdown_timeout)
6657     msg = result.fail_msg
6658     if msg:
6659       if self.op.ignore_failures:
6660         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6661       else:
6662         raise errors.OpExecError("Could not shutdown instance %s on"
6663                                  " node %s: %s" %
6664                                  (instance.name, instance.primary_node, msg))
6665
6666     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6667
6668
6669 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6670   """Utility function to remove an instance.
6671
6672   """
6673   logging.info("Removing block devices for instance %s", instance.name)
6674
6675   if not _RemoveDisks(lu, instance):
6676     if not ignore_failures:
6677       raise errors.OpExecError("Can't remove instance's disks")
6678     feedback_fn("Warning: can't remove instance's disks")
6679
6680   logging.info("Removing instance %s out of cluster config", instance.name)
6681
6682   lu.cfg.RemoveInstance(instance.name)
6683
6684   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6685     "Instance lock removal conflict"
6686
6687   # Remove lock for the instance
6688   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6689
6690
6691 class LUInstanceQuery(NoHooksLU):
6692   """Logical unit for querying instances.
6693
6694   """
6695   # pylint: disable=W0142
6696   REQ_BGL = False
6697
6698   def CheckArguments(self):
6699     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6700                              self.op.output_fields, self.op.use_locking)
6701
6702   def ExpandNames(self):
6703     self.iq.ExpandNames(self)
6704
6705   def DeclareLocks(self, level):
6706     self.iq.DeclareLocks(self, level)
6707
6708   def Exec(self, feedback_fn):
6709     return self.iq.OldStyleQuery(self)
6710
6711
6712 class LUInstanceFailover(LogicalUnit):
6713   """Failover an instance.
6714
6715   """
6716   HPATH = "instance-failover"
6717   HTYPE = constants.HTYPE_INSTANCE
6718   REQ_BGL = False
6719
6720   def CheckArguments(self):
6721     """Check the arguments.
6722
6723     """
6724     self.iallocator = getattr(self.op, "iallocator", None)
6725     self.target_node = getattr(self.op, "target_node", None)
6726
6727   def ExpandNames(self):
6728     self._ExpandAndLockInstance()
6729
6730     if self.op.target_node is not None:
6731       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6732
6733     self.needed_locks[locking.LEVEL_NODE] = []
6734     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6735
6736     ignore_consistency = self.op.ignore_consistency
6737     shutdown_timeout = self.op.shutdown_timeout
6738     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6739                                        cleanup=False,
6740                                        failover=True,
6741                                        ignore_consistency=ignore_consistency,
6742                                        shutdown_timeout=shutdown_timeout)
6743     self.tasklets = [self._migrater]
6744
6745   def DeclareLocks(self, level):
6746     if level == locking.LEVEL_NODE:
6747       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6748       if instance.disk_template in constants.DTS_EXT_MIRROR:
6749         if self.op.target_node is None:
6750           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6751         else:
6752           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6753                                                    self.op.target_node]
6754         del self.recalculate_locks[locking.LEVEL_NODE]
6755       else:
6756         self._LockInstancesNodes()
6757
6758   def BuildHooksEnv(self):
6759     """Build hooks env.
6760
6761     This runs on master, primary and secondary nodes of the instance.
6762
6763     """
6764     instance = self._migrater.instance
6765     source_node = instance.primary_node
6766     target_node = self.op.target_node
6767     env = {
6768       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6769       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6770       "OLD_PRIMARY": source_node,
6771       "NEW_PRIMARY": target_node,
6772       }
6773
6774     if instance.disk_template in constants.DTS_INT_MIRROR:
6775       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6776       env["NEW_SECONDARY"] = source_node
6777     else:
6778       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6779
6780     env.update(_BuildInstanceHookEnvByObject(self, instance))
6781
6782     return env
6783
6784   def BuildHooksNodes(self):
6785     """Build hooks nodes.
6786
6787     """
6788     instance = self._migrater.instance
6789     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6790     return (nl, nl + [instance.primary_node])
6791
6792
6793 class LUInstanceMigrate(LogicalUnit):
6794   """Migrate an instance.
6795
6796   This is migration without shutting down, compared to the failover,
6797   which is done with shutdown.
6798
6799   """
6800   HPATH = "instance-migrate"
6801   HTYPE = constants.HTYPE_INSTANCE
6802   REQ_BGL = False
6803
6804   def ExpandNames(self):
6805     self._ExpandAndLockInstance()
6806
6807     if self.op.target_node is not None:
6808       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6809
6810     self.needed_locks[locking.LEVEL_NODE] = []
6811     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6812
6813     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6814                                        cleanup=self.op.cleanup,
6815                                        failover=False,
6816                                        fallback=self.op.allow_failover)
6817     self.tasklets = [self._migrater]
6818
6819   def DeclareLocks(self, level):
6820     if level == locking.LEVEL_NODE:
6821       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6822       if instance.disk_template in constants.DTS_EXT_MIRROR:
6823         if self.op.target_node is None:
6824           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6825         else:
6826           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6827                                                    self.op.target_node]
6828         del self.recalculate_locks[locking.LEVEL_NODE]
6829       else:
6830         self._LockInstancesNodes()
6831
6832   def BuildHooksEnv(self):
6833     """Build hooks env.
6834
6835     This runs on master, primary and secondary nodes of the instance.
6836
6837     """
6838     instance = self._migrater.instance
6839     source_node = instance.primary_node
6840     target_node = self.op.target_node
6841     env = _BuildInstanceHookEnvByObject(self, instance)
6842     env.update({
6843       "MIGRATE_LIVE": self._migrater.live,
6844       "MIGRATE_CLEANUP": self.op.cleanup,
6845       "OLD_PRIMARY": source_node,
6846       "NEW_PRIMARY": target_node,
6847       })
6848
6849     if instance.disk_template in constants.DTS_INT_MIRROR:
6850       env["OLD_SECONDARY"] = target_node
6851       env["NEW_SECONDARY"] = source_node
6852     else:
6853       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6854
6855     return env
6856
6857   def BuildHooksNodes(self):
6858     """Build hooks nodes.
6859
6860     """
6861     instance = self._migrater.instance
6862     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6863     return (nl, nl + [instance.primary_node])
6864
6865
6866 class LUInstanceMove(LogicalUnit):
6867   """Move an instance by data-copying.
6868
6869   """
6870   HPATH = "instance-move"
6871   HTYPE = constants.HTYPE_INSTANCE
6872   REQ_BGL = False
6873
6874   def ExpandNames(self):
6875     self._ExpandAndLockInstance()
6876     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6877     self.op.target_node = target_node
6878     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6879     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6880
6881   def DeclareLocks(self, level):
6882     if level == locking.LEVEL_NODE:
6883       self._LockInstancesNodes(primary_only=True)
6884
6885   def BuildHooksEnv(self):
6886     """Build hooks env.
6887
6888     This runs on master, primary and secondary nodes of the instance.
6889
6890     """
6891     env = {
6892       "TARGET_NODE": self.op.target_node,
6893       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6894       }
6895     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6896     return env
6897
6898   def BuildHooksNodes(self):
6899     """Build hooks nodes.
6900
6901     """
6902     nl = [
6903       self.cfg.GetMasterNode(),
6904       self.instance.primary_node,
6905       self.op.target_node,
6906       ]
6907     return (nl, nl)
6908
6909   def CheckPrereq(self):
6910     """Check prerequisites.
6911
6912     This checks that the instance is in the cluster.
6913
6914     """
6915     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6916     assert self.instance is not None, \
6917       "Cannot retrieve locked instance %s" % self.op.instance_name
6918
6919     node = self.cfg.GetNodeInfo(self.op.target_node)
6920     assert node is not None, \
6921       "Cannot retrieve locked node %s" % self.op.target_node
6922
6923     self.target_node = target_node = node.name
6924
6925     if target_node == instance.primary_node:
6926       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6927                                  (instance.name, target_node),
6928                                  errors.ECODE_STATE)
6929
6930     bep = self.cfg.GetClusterInfo().FillBE(instance)
6931
6932     for idx, dsk in enumerate(instance.disks):
6933       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6934         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6935                                    " cannot copy" % idx, errors.ECODE_STATE)
6936
6937     _CheckNodeOnline(self, target_node)
6938     _CheckNodeNotDrained(self, target_node)
6939     _CheckNodeVmCapable(self, target_node)
6940
6941     if instance.admin_up:
6942       # check memory requirements on the secondary node
6943       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6944                            instance.name, bep[constants.BE_MEMORY],
6945                            instance.hypervisor)
6946     else:
6947       self.LogInfo("Not checking memory on the secondary node as"
6948                    " instance will not be started")
6949
6950     # check bridge existance
6951     _CheckInstanceBridgesExist(self, instance, node=target_node)
6952
6953   def Exec(self, feedback_fn):
6954     """Move an instance.
6955
6956     The move is done by shutting it down on its present node, copying
6957     the data over (slow) and starting it on the new node.
6958
6959     """
6960     instance = self.instance
6961
6962     source_node = instance.primary_node
6963     target_node = self.target_node
6964
6965     self.LogInfo("Shutting down instance %s on source node %s",
6966                  instance.name, source_node)
6967
6968     result = self.rpc.call_instance_shutdown(source_node, instance,
6969                                              self.op.shutdown_timeout)
6970     msg = result.fail_msg
6971     if msg:
6972       if self.op.ignore_consistency:
6973         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6974                              " Proceeding anyway. Please make sure node"
6975                              " %s is down. Error details: %s",
6976                              instance.name, source_node, source_node, msg)
6977       else:
6978         raise errors.OpExecError("Could not shutdown instance %s on"
6979                                  " node %s: %s" %
6980                                  (instance.name, source_node, msg))
6981
6982     # create the target disks
6983     try:
6984       _CreateDisks(self, instance, target_node=target_node)
6985     except errors.OpExecError:
6986       self.LogWarning("Device creation failed, reverting...")
6987       try:
6988         _RemoveDisks(self, instance, target_node=target_node)
6989       finally:
6990         self.cfg.ReleaseDRBDMinors(instance.name)
6991         raise
6992
6993     cluster_name = self.cfg.GetClusterInfo().cluster_name
6994
6995     errs = []
6996     # activate, get path, copy the data over
6997     for idx, disk in enumerate(instance.disks):
6998       self.LogInfo("Copying data for disk %d", idx)
6999       result = self.rpc.call_blockdev_assemble(target_node, disk,
7000                                                instance.name, True, idx)
7001       if result.fail_msg:
7002         self.LogWarning("Can't assemble newly created disk %d: %s",
7003                         idx, result.fail_msg)
7004         errs.append(result.fail_msg)
7005         break
7006       dev_path = result.payload
7007       result = self.rpc.call_blockdev_export(source_node, disk,
7008                                              target_node, dev_path,
7009                                              cluster_name)
7010       if result.fail_msg:
7011         self.LogWarning("Can't copy data over for disk %d: %s",
7012                         idx, result.fail_msg)
7013         errs.append(result.fail_msg)
7014         break
7015
7016     if errs:
7017       self.LogWarning("Some disks failed to copy, aborting")
7018       try:
7019         _RemoveDisks(self, instance, target_node=target_node)
7020       finally:
7021         self.cfg.ReleaseDRBDMinors(instance.name)
7022         raise errors.OpExecError("Errors during disk copy: %s" %
7023                                  (",".join(errs),))
7024
7025     instance.primary_node = target_node
7026     self.cfg.Update(instance, feedback_fn)
7027
7028     self.LogInfo("Removing the disks on the original node")
7029     _RemoveDisks(self, instance, target_node=source_node)
7030
7031     # Only start the instance if it's marked as up
7032     if instance.admin_up:
7033       self.LogInfo("Starting instance %s on node %s",
7034                    instance.name, target_node)
7035
7036       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7037                                            ignore_secondaries=True)
7038       if not disks_ok:
7039         _ShutdownInstanceDisks(self, instance)
7040         raise errors.OpExecError("Can't activate the instance's disks")
7041
7042       result = self.rpc.call_instance_start(target_node,
7043                                             (instance, None, None), False)
7044       msg = result.fail_msg
7045       if msg:
7046         _ShutdownInstanceDisks(self, instance)
7047         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7048                                  (instance.name, target_node, msg))
7049
7050
7051 class LUNodeMigrate(LogicalUnit):
7052   """Migrate all instances from a node.
7053
7054   """
7055   HPATH = "node-migrate"
7056   HTYPE = constants.HTYPE_NODE
7057   REQ_BGL = False
7058
7059   def CheckArguments(self):
7060     pass
7061
7062   def ExpandNames(self):
7063     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7064
7065     self.share_locks = _ShareAll()
7066     self.needed_locks = {
7067       locking.LEVEL_NODE: [self.op.node_name],
7068       }
7069
7070   def BuildHooksEnv(self):
7071     """Build hooks env.
7072
7073     This runs on the master, the primary and all the secondaries.
7074
7075     """
7076     return {
7077       "NODE_NAME": self.op.node_name,
7078       }
7079
7080   def BuildHooksNodes(self):
7081     """Build hooks nodes.
7082
7083     """
7084     nl = [self.cfg.GetMasterNode()]
7085     return (nl, nl)
7086
7087   def CheckPrereq(self):
7088     pass
7089
7090   def Exec(self, feedback_fn):
7091     # Prepare jobs for migration instances
7092     jobs = [
7093       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7094                                  mode=self.op.mode,
7095                                  live=self.op.live,
7096                                  iallocator=self.op.iallocator,
7097                                  target_node=self.op.target_node)]
7098       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7099       ]
7100
7101     # TODO: Run iallocator in this opcode and pass correct placement options to
7102     # OpInstanceMigrate. Since other jobs can modify the cluster between
7103     # running the iallocator and the actual migration, a good consistency model
7104     # will have to be found.
7105
7106     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7107             frozenset([self.op.node_name]))
7108
7109     return ResultWithJobs(jobs)
7110
7111
7112 class TLMigrateInstance(Tasklet):
7113   """Tasklet class for instance migration.
7114
7115   @type live: boolean
7116   @ivar live: whether the migration will be done live or non-live;
7117       this variable is initalized only after CheckPrereq has run
7118   @type cleanup: boolean
7119   @ivar cleanup: Wheater we cleanup from a failed migration
7120   @type iallocator: string
7121   @ivar iallocator: The iallocator used to determine target_node
7122   @type target_node: string
7123   @ivar target_node: If given, the target_node to reallocate the instance to
7124   @type failover: boolean
7125   @ivar failover: Whether operation results in failover or migration
7126   @type fallback: boolean
7127   @ivar fallback: Whether fallback to failover is allowed if migration not
7128                   possible
7129   @type ignore_consistency: boolean
7130   @ivar ignore_consistency: Wheter we should ignore consistency between source
7131                             and target node
7132   @type shutdown_timeout: int
7133   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7134
7135   """
7136
7137   # Constants
7138   _MIGRATION_POLL_INTERVAL = 1      # seconds
7139   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7140
7141   def __init__(self, lu, instance_name, cleanup=False,
7142                failover=False, fallback=False,
7143                ignore_consistency=False,
7144                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7145     """Initializes this class.
7146
7147     """
7148     Tasklet.__init__(self, lu)
7149
7150     # Parameters
7151     self.instance_name = instance_name
7152     self.cleanup = cleanup
7153     self.live = False # will be overridden later
7154     self.failover = failover
7155     self.fallback = fallback
7156     self.ignore_consistency = ignore_consistency
7157     self.shutdown_timeout = shutdown_timeout
7158
7159   def CheckPrereq(self):
7160     """Check prerequisites.
7161
7162     This checks that the instance is in the cluster.
7163
7164     """
7165     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7166     instance = self.cfg.GetInstanceInfo(instance_name)
7167     assert instance is not None
7168     self.instance = instance
7169
7170     if (not self.cleanup and not instance.admin_up and not self.failover and
7171         self.fallback):
7172       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7173                       " to failover")
7174       self.failover = True
7175
7176     if instance.disk_template not in constants.DTS_MIRRORED:
7177       if self.failover:
7178         text = "failovers"
7179       else:
7180         text = "migrations"
7181       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7182                                  " %s" % (instance.disk_template, text),
7183                                  errors.ECODE_STATE)
7184
7185     if instance.disk_template in constants.DTS_EXT_MIRROR:
7186       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7187
7188       if self.lu.op.iallocator:
7189         self._RunAllocator()
7190       else:
7191         # We set set self.target_node as it is required by
7192         # BuildHooksEnv
7193         self.target_node = self.lu.op.target_node
7194
7195       # self.target_node is already populated, either directly or by the
7196       # iallocator run
7197       target_node = self.target_node
7198       if self.target_node == instance.primary_node:
7199         raise errors.OpPrereqError("Cannot migrate instance %s"
7200                                    " to its primary (%s)" %
7201                                    (instance.name, instance.primary_node))
7202
7203       if len(self.lu.tasklets) == 1:
7204         # It is safe to release locks only when we're the only tasklet
7205         # in the LU
7206         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7207                       keep=[instance.primary_node, self.target_node])
7208
7209     else:
7210       secondary_nodes = instance.secondary_nodes
7211       if not secondary_nodes:
7212         raise errors.ConfigurationError("No secondary node but using"
7213                                         " %s disk template" %
7214                                         instance.disk_template)
7215       target_node = secondary_nodes[0]
7216       if self.lu.op.iallocator or (self.lu.op.target_node and
7217                                    self.lu.op.target_node != target_node):
7218         if self.failover:
7219           text = "failed over"
7220         else:
7221           text = "migrated"
7222         raise errors.OpPrereqError("Instances with disk template %s cannot"
7223                                    " be %s to arbitrary nodes"
7224                                    " (neither an iallocator nor a target"
7225                                    " node can be passed)" %
7226                                    (instance.disk_template, text),
7227                                    errors.ECODE_INVAL)
7228
7229     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7230
7231     # check memory requirements on the secondary node
7232     if not self.failover or instance.admin_up:
7233       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7234                            instance.name, i_be[constants.BE_MEMORY],
7235                            instance.hypervisor)
7236     else:
7237       self.lu.LogInfo("Not checking memory on the secondary node as"
7238                       " instance will not be started")
7239
7240     # check bridge existance
7241     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7242
7243     if not self.cleanup:
7244       _CheckNodeNotDrained(self.lu, target_node)
7245       if not self.failover:
7246         result = self.rpc.call_instance_migratable(instance.primary_node,
7247                                                    instance)
7248         if result.fail_msg and self.fallback:
7249           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7250                           " failover")
7251           self.failover = True
7252         else:
7253           result.Raise("Can't migrate, please use failover",
7254                        prereq=True, ecode=errors.ECODE_STATE)
7255
7256     assert not (self.failover and self.cleanup)
7257
7258     if not self.failover:
7259       if self.lu.op.live is not None and self.lu.op.mode is not None:
7260         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7261                                    " parameters are accepted",
7262                                    errors.ECODE_INVAL)
7263       if self.lu.op.live is not None:
7264         if self.lu.op.live:
7265           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7266         else:
7267           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7268         # reset the 'live' parameter to None so that repeated
7269         # invocations of CheckPrereq do not raise an exception
7270         self.lu.op.live = None
7271       elif self.lu.op.mode is None:
7272         # read the default value from the hypervisor
7273         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7274                                                 skip_globals=False)
7275         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7276
7277       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7278     else:
7279       # Failover is never live
7280       self.live = False
7281
7282   def _RunAllocator(self):
7283     """Run the allocator based on input opcode.
7284
7285     """
7286     ial = IAllocator(self.cfg, self.rpc,
7287                      mode=constants.IALLOCATOR_MODE_RELOC,
7288                      name=self.instance_name,
7289                      # TODO See why hail breaks with a single node below
7290                      relocate_from=[self.instance.primary_node,
7291                                     self.instance.primary_node],
7292                      )
7293
7294     ial.Run(self.lu.op.iallocator)
7295
7296     if not ial.success:
7297       raise errors.OpPrereqError("Can't compute nodes using"
7298                                  " iallocator '%s': %s" %
7299                                  (self.lu.op.iallocator, ial.info),
7300                                  errors.ECODE_NORES)
7301     if len(ial.result) != ial.required_nodes:
7302       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7303                                  " of nodes (%s), required %s" %
7304                                  (self.lu.op.iallocator, len(ial.result),
7305                                   ial.required_nodes), errors.ECODE_FAULT)
7306     self.target_node = ial.result[0]
7307     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7308                  self.instance_name, self.lu.op.iallocator,
7309                  utils.CommaJoin(ial.result))
7310
7311   def _WaitUntilSync(self):
7312     """Poll with custom rpc for disk sync.
7313
7314     This uses our own step-based rpc call.
7315
7316     """
7317     self.feedback_fn("* wait until resync is done")
7318     all_done = False
7319     while not all_done:
7320       all_done = True
7321       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7322                                             self.nodes_ip,
7323                                             self.instance.disks)
7324       min_percent = 100
7325       for node, nres in result.items():
7326         nres.Raise("Cannot resync disks on node %s" % node)
7327         node_done, node_percent = nres.payload
7328         all_done = all_done and node_done
7329         if node_percent is not None:
7330           min_percent = min(min_percent, node_percent)
7331       if not all_done:
7332         if min_percent < 100:
7333           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7334         time.sleep(2)
7335
7336   def _EnsureSecondary(self, node):
7337     """Demote a node to secondary.
7338
7339     """
7340     self.feedback_fn("* switching node %s to secondary mode" % node)
7341
7342     for dev in self.instance.disks:
7343       self.cfg.SetDiskID(dev, node)
7344
7345     result = self.rpc.call_blockdev_close(node, self.instance.name,
7346                                           self.instance.disks)
7347     result.Raise("Cannot change disk to secondary on node %s" % node)
7348
7349   def _GoStandalone(self):
7350     """Disconnect from the network.
7351
7352     """
7353     self.feedback_fn("* changing into standalone mode")
7354     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7355                                                self.instance.disks)
7356     for node, nres in result.items():
7357       nres.Raise("Cannot disconnect disks node %s" % node)
7358
7359   def _GoReconnect(self, multimaster):
7360     """Reconnect to the network.
7361
7362     """
7363     if multimaster:
7364       msg = "dual-master"
7365     else:
7366       msg = "single-master"
7367     self.feedback_fn("* changing disks into %s mode" % msg)
7368     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7369                                            self.instance.disks,
7370                                            self.instance.name, multimaster)
7371     for node, nres in result.items():
7372       nres.Raise("Cannot change disks config on node %s" % node)
7373
7374   def _ExecCleanup(self):
7375     """Try to cleanup after a failed migration.
7376
7377     The cleanup is done by:
7378       - check that the instance is running only on one node
7379         (and update the config if needed)
7380       - change disks on its secondary node to secondary
7381       - wait until disks are fully synchronized
7382       - disconnect from the network
7383       - change disks into single-master mode
7384       - wait again until disks are fully synchronized
7385
7386     """
7387     instance = self.instance
7388     target_node = self.target_node
7389     source_node = self.source_node
7390
7391     # check running on only one node
7392     self.feedback_fn("* checking where the instance actually runs"
7393                      " (if this hangs, the hypervisor might be in"
7394                      " a bad state)")
7395     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7396     for node, result in ins_l.items():
7397       result.Raise("Can't contact node %s" % node)
7398
7399     runningon_source = instance.name in ins_l[source_node].payload
7400     runningon_target = instance.name in ins_l[target_node].payload
7401
7402     if runningon_source and runningon_target:
7403       raise errors.OpExecError("Instance seems to be running on two nodes,"
7404                                " or the hypervisor is confused; you will have"
7405                                " to ensure manually that it runs only on one"
7406                                " and restart this operation")
7407
7408     if not (runningon_source or runningon_target):
7409       raise errors.OpExecError("Instance does not seem to be running at all;"
7410                                " in this case it's safer to repair by"
7411                                " running 'gnt-instance stop' to ensure disk"
7412                                " shutdown, and then restarting it")
7413
7414     if runningon_target:
7415       # the migration has actually succeeded, we need to update the config
7416       self.feedback_fn("* instance running on secondary node (%s),"
7417                        " updating config" % target_node)
7418       instance.primary_node = target_node
7419       self.cfg.Update(instance, self.feedback_fn)
7420       demoted_node = source_node
7421     else:
7422       self.feedback_fn("* instance confirmed to be running on its"
7423                        " primary node (%s)" % source_node)
7424       demoted_node = target_node
7425
7426     if instance.disk_template in constants.DTS_INT_MIRROR:
7427       self._EnsureSecondary(demoted_node)
7428       try:
7429         self._WaitUntilSync()
7430       except errors.OpExecError:
7431         # we ignore here errors, since if the device is standalone, it
7432         # won't be able to sync
7433         pass
7434       self._GoStandalone()
7435       self._GoReconnect(False)
7436       self._WaitUntilSync()
7437
7438     self.feedback_fn("* done")
7439
7440   def _RevertDiskStatus(self):
7441     """Try to revert the disk status after a failed migration.
7442
7443     """
7444     target_node = self.target_node
7445     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7446       return
7447
7448     try:
7449       self._EnsureSecondary(target_node)
7450       self._GoStandalone()
7451       self._GoReconnect(False)
7452       self._WaitUntilSync()
7453     except errors.OpExecError, err:
7454       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7455                          " please try to recover the instance manually;"
7456                          " error '%s'" % str(err))
7457
7458   def _AbortMigration(self):
7459     """Call the hypervisor code to abort a started migration.
7460
7461     """
7462     instance = self.instance
7463     target_node = self.target_node
7464     source_node = self.source_node
7465     migration_info = self.migration_info
7466
7467     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7468                                                                  instance,
7469                                                                  migration_info,
7470                                                                  False)
7471     abort_msg = abort_result.fail_msg
7472     if abort_msg:
7473       logging.error("Aborting migration failed on target node %s: %s",
7474                     target_node, abort_msg)
7475       # Don't raise an exception here, as we stil have to try to revert the
7476       # disk status, even if this step failed.
7477
7478     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7479         instance, False, self.live)
7480     abort_msg = abort_result.fail_msg
7481     if abort_msg:
7482       logging.error("Aborting migration failed on source node %s: %s",
7483                     source_node, abort_msg)
7484
7485   def _ExecMigration(self):
7486     """Migrate an instance.
7487
7488     The migrate is done by:
7489       - change the disks into dual-master mode
7490       - wait until disks are fully synchronized again
7491       - migrate the instance
7492       - change disks on the new secondary node (the old primary) to secondary
7493       - wait until disks are fully synchronized
7494       - change disks into single-master mode
7495
7496     """
7497     instance = self.instance
7498     target_node = self.target_node
7499     source_node = self.source_node
7500
7501     # Check for hypervisor version mismatch and warn the user.
7502     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7503                                        None, self.instance.hypervisor)
7504     src_info = nodeinfo[source_node]
7505     dst_info = nodeinfo[target_node]
7506
7507     if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7508         (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7509       src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7510       dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7511       if src_version != dst_version:
7512         self.feedback_fn("* warning: hypervisor version mismatch between"
7513                          " source (%s) and target (%s) node" %
7514                          (src_version, dst_version))
7515
7516     self.feedback_fn("* checking disk consistency between source and target")
7517     for dev in instance.disks:
7518       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7519         raise errors.OpExecError("Disk %s is degraded or not fully"
7520                                  " synchronized on target node,"
7521                                  " aborting migration" % dev.iv_name)
7522
7523     # First get the migration information from the remote node
7524     result = self.rpc.call_migration_info(source_node, instance)
7525     msg = result.fail_msg
7526     if msg:
7527       log_err = ("Failed fetching source migration information from %s: %s" %
7528                  (source_node, msg))
7529       logging.error(log_err)
7530       raise errors.OpExecError(log_err)
7531
7532     self.migration_info = migration_info = result.payload
7533
7534     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7535       # Then switch the disks to master/master mode
7536       self._EnsureSecondary(target_node)
7537       self._GoStandalone()
7538       self._GoReconnect(True)
7539       self._WaitUntilSync()
7540
7541     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7542     result = self.rpc.call_accept_instance(target_node,
7543                                            instance,
7544                                            migration_info,
7545                                            self.nodes_ip[target_node])
7546
7547     msg = result.fail_msg
7548     if msg:
7549       logging.error("Instance pre-migration failed, trying to revert"
7550                     " disk status: %s", msg)
7551       self.feedback_fn("Pre-migration failed, aborting")
7552       self._AbortMigration()
7553       self._RevertDiskStatus()
7554       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7555                                (instance.name, msg))
7556
7557     self.feedback_fn("* migrating instance to %s" % target_node)
7558     result = self.rpc.call_instance_migrate(source_node, instance,
7559                                             self.nodes_ip[target_node],
7560                                             self.live)
7561     msg = result.fail_msg
7562     if msg:
7563       logging.error("Instance migration failed, trying to revert"
7564                     " disk status: %s", msg)
7565       self.feedback_fn("Migration failed, aborting")
7566       self._AbortMigration()
7567       self._RevertDiskStatus()
7568       raise errors.OpExecError("Could not migrate instance %s: %s" %
7569                                (instance.name, msg))
7570
7571     self.feedback_fn("* starting memory transfer")
7572     last_feedback = time.time()
7573     while True:
7574       result = self.rpc.call_instance_get_migration_status(source_node,
7575                                                            instance)
7576       msg = result.fail_msg
7577       ms = result.payload   # MigrationStatus instance
7578       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7579         logging.error("Instance migration failed, trying to revert"
7580                       " disk status: %s", msg)
7581         self.feedback_fn("Migration failed, aborting")
7582         self._AbortMigration()
7583         self._RevertDiskStatus()
7584         raise errors.OpExecError("Could not migrate instance %s: %s" %
7585                                  (instance.name, msg))
7586
7587       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7588         self.feedback_fn("* memory transfer complete")
7589         break
7590
7591       if (utils.TimeoutExpired(last_feedback,
7592                                self._MIGRATION_FEEDBACK_INTERVAL) and
7593           ms.transferred_ram is not None):
7594         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7595         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7596         last_feedback = time.time()
7597
7598       time.sleep(self._MIGRATION_POLL_INTERVAL)
7599
7600     result = self.rpc.call_instance_finalize_migration_src(source_node,
7601                                                            instance,
7602                                                            True,
7603                                                            self.live)
7604     msg = result.fail_msg
7605     if msg:
7606       logging.error("Instance migration succeeded, but finalization failed"
7607                     " on the source node: %s", msg)
7608       raise errors.OpExecError("Could not finalize instance migration: %s" %
7609                                msg)
7610
7611     instance.primary_node = target_node
7612
7613     # distribute new instance config to the other nodes
7614     self.cfg.Update(instance, self.feedback_fn)
7615
7616     result = self.rpc.call_instance_finalize_migration_dst(target_node,
7617                                                            instance,
7618                                                            migration_info,
7619                                                            True)
7620     msg = result.fail_msg
7621     if msg:
7622       logging.error("Instance migration succeeded, but finalization failed"
7623                     " on the target node: %s", msg)
7624       raise errors.OpExecError("Could not finalize instance migration: %s" %
7625                                msg)
7626
7627     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7628       self._EnsureSecondary(source_node)
7629       self._WaitUntilSync()
7630       self._GoStandalone()
7631       self._GoReconnect(False)
7632       self._WaitUntilSync()
7633
7634     self.feedback_fn("* done")
7635
7636   def _ExecFailover(self):
7637     """Failover an instance.
7638
7639     The failover is done by shutting it down on its present node and
7640     starting it on the secondary.
7641
7642     """
7643     instance = self.instance
7644     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7645
7646     source_node = instance.primary_node
7647     target_node = self.target_node
7648
7649     if instance.admin_up:
7650       self.feedback_fn("* checking disk consistency between source and target")
7651       for dev in instance.disks:
7652         # for drbd, these are drbd over lvm
7653         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7654           if primary_node.offline:
7655             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7656                              " target node %s" %
7657                              (primary_node.name, dev.iv_name, target_node))
7658           elif not self.ignore_consistency:
7659             raise errors.OpExecError("Disk %s is degraded on target node,"
7660                                      " aborting failover" % dev.iv_name)
7661     else:
7662       self.feedback_fn("* not checking disk consistency as instance is not"
7663                        " running")
7664
7665     self.feedback_fn("* shutting down instance on source node")
7666     logging.info("Shutting down instance %s on node %s",
7667                  instance.name, source_node)
7668
7669     result = self.rpc.call_instance_shutdown(source_node, instance,
7670                                              self.shutdown_timeout)
7671     msg = result.fail_msg
7672     if msg:
7673       if self.ignore_consistency or primary_node.offline:
7674         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7675                            " proceeding anyway; please make sure node"
7676                            " %s is down; error details: %s",
7677                            instance.name, source_node, source_node, msg)
7678       else:
7679         raise errors.OpExecError("Could not shutdown instance %s on"
7680                                  " node %s: %s" %
7681                                  (instance.name, source_node, msg))
7682
7683     self.feedback_fn("* deactivating the instance's disks on source node")
7684     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7685       raise errors.OpExecError("Can't shut down the instance's disks")
7686
7687     instance.primary_node = target_node
7688     # distribute new instance config to the other nodes
7689     self.cfg.Update(instance, self.feedback_fn)
7690
7691     # Only start the instance if it's marked as up
7692     if instance.admin_up:
7693       self.feedback_fn("* activating the instance's disks on target node %s" %
7694                        target_node)
7695       logging.info("Starting instance %s on node %s",
7696                    instance.name, target_node)
7697
7698       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7699                                            ignore_secondaries=True)
7700       if not disks_ok:
7701         _ShutdownInstanceDisks(self.lu, instance)
7702         raise errors.OpExecError("Can't activate the instance's disks")
7703
7704       self.feedback_fn("* starting the instance on the target node %s" %
7705                        target_node)
7706       result = self.rpc.call_instance_start(target_node, (instance, None, None),
7707                                             False)
7708       msg = result.fail_msg
7709       if msg:
7710         _ShutdownInstanceDisks(self.lu, instance)
7711         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7712                                  (instance.name, target_node, msg))
7713
7714   def Exec(self, feedback_fn):
7715     """Perform the migration.
7716
7717     """
7718     self.feedback_fn = feedback_fn
7719     self.source_node = self.instance.primary_node
7720
7721     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7722     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7723       self.target_node = self.instance.secondary_nodes[0]
7724       # Otherwise self.target_node has been populated either
7725       # directly, or through an iallocator.
7726
7727     self.all_nodes = [self.source_node, self.target_node]
7728     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7729                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7730
7731     if self.failover:
7732       feedback_fn("Failover instance %s" % self.instance.name)
7733       self._ExecFailover()
7734     else:
7735       feedback_fn("Migrating instance %s" % self.instance.name)
7736
7737       if self.cleanup:
7738         return self._ExecCleanup()
7739       else:
7740         return self._ExecMigration()
7741
7742
7743 def _CreateBlockDev(lu, node, instance, device, force_create,
7744                     info, force_open):
7745   """Create a tree of block devices on a given node.
7746
7747   If this device type has to be created on secondaries, create it and
7748   all its children.
7749
7750   If not, just recurse to children keeping the same 'force' value.
7751
7752   @param lu: the lu on whose behalf we execute
7753   @param node: the node on which to create the device
7754   @type instance: L{objects.Instance}
7755   @param instance: the instance which owns the device
7756   @type device: L{objects.Disk}
7757   @param device: the device to create
7758   @type force_create: boolean
7759   @param force_create: whether to force creation of this device; this
7760       will be change to True whenever we find a device which has
7761       CreateOnSecondary() attribute
7762   @param info: the extra 'metadata' we should attach to the device
7763       (this will be represented as a LVM tag)
7764   @type force_open: boolean
7765   @param force_open: this parameter will be passes to the
7766       L{backend.BlockdevCreate} function where it specifies
7767       whether we run on primary or not, and it affects both
7768       the child assembly and the device own Open() execution
7769
7770   """
7771   if device.CreateOnSecondary():
7772     force_create = True
7773
7774   if device.children:
7775     for child in device.children:
7776       _CreateBlockDev(lu, node, instance, child, force_create,
7777                       info, force_open)
7778
7779   if not force_create:
7780     return
7781
7782   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7783
7784
7785 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7786   """Create a single block device on a given node.
7787
7788   This will not recurse over children of the device, so they must be
7789   created in advance.
7790
7791   @param lu: the lu on whose behalf we execute
7792   @param node: the node on which to create the device
7793   @type instance: L{objects.Instance}
7794   @param instance: the instance which owns the device
7795   @type device: L{objects.Disk}
7796   @param device: the device to create
7797   @param info: the extra 'metadata' we should attach to the device
7798       (this will be represented as a LVM tag)
7799   @type force_open: boolean
7800   @param force_open: this parameter will be passes to the
7801       L{backend.BlockdevCreate} function where it specifies
7802       whether we run on primary or not, and it affects both
7803       the child assembly and the device own Open() execution
7804
7805   """
7806   lu.cfg.SetDiskID(device, node)
7807   result = lu.rpc.call_blockdev_create(node, device, device.size,
7808                                        instance.name, force_open, info)
7809   result.Raise("Can't create block device %s on"
7810                " node %s for instance %s" % (device, node, instance.name))
7811   if device.physical_id is None:
7812     device.physical_id = result.payload
7813
7814
7815 def _GenerateUniqueNames(lu, exts):
7816   """Generate a suitable LV name.
7817
7818   This will generate a logical volume name for the given instance.
7819
7820   """
7821   results = []
7822   for val in exts:
7823     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7824     results.append("%s%s" % (new_id, val))
7825   return results
7826
7827
7828 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7829                          iv_name, p_minor, s_minor):
7830   """Generate a drbd8 device complete with its children.
7831
7832   """
7833   assert len(vgnames) == len(names) == 2
7834   port = lu.cfg.AllocatePort()
7835   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7836   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7837                           logical_id=(vgnames[0], names[0]))
7838   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7839                           logical_id=(vgnames[1], names[1]))
7840   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7841                           logical_id=(primary, secondary, port,
7842                                       p_minor, s_minor,
7843                                       shared_secret),
7844                           children=[dev_data, dev_meta],
7845                           iv_name=iv_name)
7846   return drbd_dev
7847
7848
7849 def _GenerateDiskTemplate(lu, template_name,
7850                           instance_name, primary_node,
7851                           secondary_nodes, disk_info,
7852                           file_storage_dir, file_driver,
7853                           base_index, feedback_fn):
7854   """Generate the entire disk layout for a given template type.
7855
7856   """
7857   #TODO: compute space requirements
7858
7859   vgname = lu.cfg.GetVGName()
7860   disk_count = len(disk_info)
7861   disks = []
7862   if template_name == constants.DT_DISKLESS:
7863     pass
7864   elif template_name == constants.DT_PLAIN:
7865     if len(secondary_nodes) != 0:
7866       raise errors.ProgrammerError("Wrong template configuration")
7867
7868     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7869                                       for i in range(disk_count)])
7870     for idx, disk in enumerate(disk_info):
7871       disk_index = idx + base_index
7872       vg = disk.get(constants.IDISK_VG, vgname)
7873       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7874       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7875                               size=disk[constants.IDISK_SIZE],
7876                               logical_id=(vg, names[idx]),
7877                               iv_name="disk/%d" % disk_index,
7878                               mode=disk[constants.IDISK_MODE])
7879       disks.append(disk_dev)
7880   elif template_name == constants.DT_DRBD8:
7881     if len(secondary_nodes) != 1:
7882       raise errors.ProgrammerError("Wrong template configuration")
7883     remote_node = secondary_nodes[0]
7884     minors = lu.cfg.AllocateDRBDMinor(
7885       [primary_node, remote_node] * len(disk_info), instance_name)
7886
7887     names = []
7888     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7889                                                for i in range(disk_count)]):
7890       names.append(lv_prefix + "_data")
7891       names.append(lv_prefix + "_meta")
7892     for idx, disk in enumerate(disk_info):
7893       disk_index = idx + base_index
7894       data_vg = disk.get(constants.IDISK_VG, vgname)
7895       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7896       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7897                                       disk[constants.IDISK_SIZE],
7898                                       [data_vg, meta_vg],
7899                                       names[idx * 2:idx * 2 + 2],
7900                                       "disk/%d" % disk_index,
7901                                       minors[idx * 2], minors[idx * 2 + 1])
7902       disk_dev.mode = disk[constants.IDISK_MODE]
7903       disks.append(disk_dev)
7904   elif template_name == constants.DT_FILE:
7905     if len(secondary_nodes) != 0:
7906       raise errors.ProgrammerError("Wrong template configuration")
7907
7908     opcodes.RequireFileStorage()
7909
7910     for idx, disk in enumerate(disk_info):
7911       disk_index = idx + base_index
7912       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7913                               size=disk[constants.IDISK_SIZE],
7914                               iv_name="disk/%d" % disk_index,
7915                               logical_id=(file_driver,
7916                                           "%s/disk%d" % (file_storage_dir,
7917                                                          disk_index)),
7918                               mode=disk[constants.IDISK_MODE])
7919       disks.append(disk_dev)
7920   elif template_name == constants.DT_SHARED_FILE:
7921     if len(secondary_nodes) != 0:
7922       raise errors.ProgrammerError("Wrong template configuration")
7923
7924     opcodes.RequireSharedFileStorage()
7925
7926     for idx, disk in enumerate(disk_info):
7927       disk_index = idx + base_index
7928       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7929                               size=disk[constants.IDISK_SIZE],
7930                               iv_name="disk/%d" % disk_index,
7931                               logical_id=(file_driver,
7932                                           "%s/disk%d" % (file_storage_dir,
7933                                                          disk_index)),
7934                               mode=disk[constants.IDISK_MODE])
7935       disks.append(disk_dev)
7936   elif template_name == constants.DT_BLOCK:
7937     if len(secondary_nodes) != 0:
7938       raise errors.ProgrammerError("Wrong template configuration")
7939
7940     for idx, disk in enumerate(disk_info):
7941       disk_index = idx + base_index
7942       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7943                               size=disk[constants.IDISK_SIZE],
7944                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7945                                           disk[constants.IDISK_ADOPT]),
7946                               iv_name="disk/%d" % disk_index,
7947                               mode=disk[constants.IDISK_MODE])
7948       disks.append(disk_dev)
7949
7950   else:
7951     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7952   return disks
7953
7954
7955 def _GetInstanceInfoText(instance):
7956   """Compute that text that should be added to the disk's metadata.
7957
7958   """
7959   return "originstname+%s" % instance.name
7960
7961
7962 def _CalcEta(time_taken, written, total_size):
7963   """Calculates the ETA based on size written and total size.
7964
7965   @param time_taken: The time taken so far
7966   @param written: amount written so far
7967   @param total_size: The total size of data to be written
7968   @return: The remaining time in seconds
7969
7970   """
7971   avg_time = time_taken / float(written)
7972   return (total_size - written) * avg_time
7973
7974
7975 def _WipeDisks(lu, instance):
7976   """Wipes instance disks.
7977
7978   @type lu: L{LogicalUnit}
7979   @param lu: the logical unit on whose behalf we execute
7980   @type instance: L{objects.Instance}
7981   @param instance: the instance whose disks we should create
7982   @return: the success of the wipe
7983
7984   """
7985   node = instance.primary_node
7986
7987   for device in instance.disks:
7988     lu.cfg.SetDiskID(device, node)
7989
7990   logging.info("Pause sync of instance %s disks", instance.name)
7991   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7992
7993   for idx, success in enumerate(result.payload):
7994     if not success:
7995       logging.warn("pause-sync of instance %s for disks %d failed",
7996                    instance.name, idx)
7997
7998   try:
7999     for idx, device in enumerate(instance.disks):
8000       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8001       # MAX_WIPE_CHUNK at max
8002       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8003                             constants.MIN_WIPE_CHUNK_PERCENT)
8004       # we _must_ make this an int, otherwise rounding errors will
8005       # occur
8006       wipe_chunk_size = int(wipe_chunk_size)
8007
8008       lu.LogInfo("* Wiping disk %d", idx)
8009       logging.info("Wiping disk %d for instance %s, node %s using"
8010                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8011
8012       offset = 0
8013       size = device.size
8014       last_output = 0
8015       start_time = time.time()
8016
8017       while offset < size:
8018         wipe_size = min(wipe_chunk_size, size - offset)
8019         logging.debug("Wiping disk %d, offset %s, chunk %s",
8020                       idx, offset, wipe_size)
8021         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8022         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8023                      (idx, offset, wipe_size))
8024         now = time.time()
8025         offset += wipe_size
8026         if now - last_output >= 60:
8027           eta = _CalcEta(now - start_time, offset, size)
8028           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8029                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8030           last_output = now
8031   finally:
8032     logging.info("Resume sync of instance %s disks", instance.name)
8033
8034     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8035
8036     for idx, success in enumerate(result.payload):
8037       if not success:
8038         lu.LogWarning("Resume sync of disk %d failed, please have a"
8039                       " look at the status and troubleshoot the issue", idx)
8040         logging.warn("resume-sync of instance %s for disks %d failed",
8041                      instance.name, idx)
8042
8043
8044 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8045   """Create all disks for an instance.
8046
8047   This abstracts away some work from AddInstance.
8048
8049   @type lu: L{LogicalUnit}
8050   @param lu: the logical unit on whose behalf we execute
8051   @type instance: L{objects.Instance}
8052   @param instance: the instance whose disks we should create
8053   @type to_skip: list
8054   @param to_skip: list of indices to skip
8055   @type target_node: string
8056   @param target_node: if passed, overrides the target node for creation
8057   @rtype: boolean
8058   @return: the success of the creation
8059
8060   """
8061   info = _GetInstanceInfoText(instance)
8062   if target_node is None:
8063     pnode = instance.primary_node
8064     all_nodes = instance.all_nodes
8065   else:
8066     pnode = target_node
8067     all_nodes = [pnode]
8068
8069   if instance.disk_template in constants.DTS_FILEBASED:
8070     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8071     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8072
8073     result.Raise("Failed to create directory '%s' on"
8074                  " node %s" % (file_storage_dir, pnode))
8075
8076   # Note: this needs to be kept in sync with adding of disks in
8077   # LUInstanceSetParams
8078   for idx, device in enumerate(instance.disks):
8079     if to_skip and idx in to_skip:
8080       continue
8081     logging.info("Creating volume %s for instance %s",
8082                  device.iv_name, instance.name)
8083     #HARDCODE
8084     for node in all_nodes:
8085       f_create = node == pnode
8086       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8087
8088
8089 def _RemoveDisks(lu, instance, target_node=None):
8090   """Remove all disks for an instance.
8091
8092   This abstracts away some work from `AddInstance()` and
8093   `RemoveInstance()`. Note that in case some of the devices couldn't
8094   be removed, the removal will continue with the other ones (compare
8095   with `_CreateDisks()`).
8096
8097   @type lu: L{LogicalUnit}
8098   @param lu: the logical unit on whose behalf we execute
8099   @type instance: L{objects.Instance}
8100   @param instance: the instance whose disks we should remove
8101   @type target_node: string
8102   @param target_node: used to override the node on which to remove the disks
8103   @rtype: boolean
8104   @return: the success of the removal
8105
8106   """
8107   logging.info("Removing block devices for instance %s", instance.name)
8108
8109   all_result = True
8110   for device in instance.disks:
8111     if target_node:
8112       edata = [(target_node, device)]
8113     else:
8114       edata = device.ComputeNodeTree(instance.primary_node)
8115     for node, disk in edata:
8116       lu.cfg.SetDiskID(disk, node)
8117       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8118       if msg:
8119         lu.LogWarning("Could not remove block device %s on node %s,"
8120                       " continuing anyway: %s", device.iv_name, node, msg)
8121         all_result = False
8122
8123   if instance.disk_template == constants.DT_FILE:
8124     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8125     if target_node:
8126       tgt = target_node
8127     else:
8128       tgt = instance.primary_node
8129     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8130     if result.fail_msg:
8131       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8132                     file_storage_dir, instance.primary_node, result.fail_msg)
8133       all_result = False
8134
8135   return all_result
8136
8137
8138 def _ComputeDiskSizePerVG(disk_template, disks):
8139   """Compute disk size requirements in the volume group
8140
8141   """
8142   def _compute(disks, payload):
8143     """Universal algorithm.
8144
8145     """
8146     vgs = {}
8147     for disk in disks:
8148       vgs[disk[constants.IDISK_VG]] = \
8149         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8150
8151     return vgs
8152
8153   # Required free disk space as a function of disk and swap space
8154   req_size_dict = {
8155     constants.DT_DISKLESS: {},
8156     constants.DT_PLAIN: _compute(disks, 0),
8157     # 128 MB are added for drbd metadata for each disk
8158     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8159     constants.DT_FILE: {},
8160     constants.DT_SHARED_FILE: {},
8161   }
8162
8163   if disk_template not in req_size_dict:
8164     raise errors.ProgrammerError("Disk template '%s' size requirement"
8165                                  " is unknown" % disk_template)
8166
8167   return req_size_dict[disk_template]
8168
8169
8170 def _ComputeDiskSize(disk_template, disks):
8171   """Compute disk size requirements in the volume group
8172
8173   """
8174   # Required free disk space as a function of disk and swap space
8175   req_size_dict = {
8176     constants.DT_DISKLESS: None,
8177     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8178     # 128 MB are added for drbd metadata for each disk
8179     constants.DT_DRBD8:
8180       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8181     constants.DT_FILE: None,
8182     constants.DT_SHARED_FILE: 0,
8183     constants.DT_BLOCK: 0,
8184   }
8185
8186   if disk_template not in req_size_dict:
8187     raise errors.ProgrammerError("Disk template '%s' size requirement"
8188                                  " is unknown" % disk_template)
8189
8190   return req_size_dict[disk_template]
8191
8192
8193 def _FilterVmNodes(lu, nodenames):
8194   """Filters out non-vm_capable nodes from a list.
8195
8196   @type lu: L{LogicalUnit}
8197   @param lu: the logical unit for which we check
8198   @type nodenames: list
8199   @param nodenames: the list of nodes on which we should check
8200   @rtype: list
8201   @return: the list of vm-capable nodes
8202
8203   """
8204   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8205   return [name for name in nodenames if name not in vm_nodes]
8206
8207
8208 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8209   """Hypervisor parameter validation.
8210
8211   This function abstract the hypervisor parameter validation to be
8212   used in both instance create and instance modify.
8213
8214   @type lu: L{LogicalUnit}
8215   @param lu: the logical unit for which we check
8216   @type nodenames: list
8217   @param nodenames: the list of nodes on which we should check
8218   @type hvname: string
8219   @param hvname: the name of the hypervisor we should use
8220   @type hvparams: dict
8221   @param hvparams: the parameters which we need to check
8222   @raise errors.OpPrereqError: if the parameters are not valid
8223
8224   """
8225   nodenames = _FilterVmNodes(lu, nodenames)
8226
8227   cluster = lu.cfg.GetClusterInfo()
8228   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8229
8230   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8231   for node in nodenames:
8232     info = hvinfo[node]
8233     if info.offline:
8234       continue
8235     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8236
8237
8238 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8239   """OS parameters validation.
8240
8241   @type lu: L{LogicalUnit}
8242   @param lu: the logical unit for which we check
8243   @type required: boolean
8244   @param required: whether the validation should fail if the OS is not
8245       found
8246   @type nodenames: list
8247   @param nodenames: the list of nodes on which we should check
8248   @type osname: string
8249   @param osname: the name of the hypervisor we should use
8250   @type osparams: dict
8251   @param osparams: the parameters which we need to check
8252   @raise errors.OpPrereqError: if the parameters are not valid
8253
8254   """
8255   nodenames = _FilterVmNodes(lu, nodenames)
8256   result = lu.rpc.call_os_validate(nodenames, required, osname,
8257                                    [constants.OS_VALIDATE_PARAMETERS],
8258                                    osparams)
8259   for node, nres in result.items():
8260     # we don't check for offline cases since this should be run only
8261     # against the master node and/or an instance's nodes
8262     nres.Raise("OS Parameters validation failed on node %s" % node)
8263     if not nres.payload:
8264       lu.LogInfo("OS %s not found on node %s, validation skipped",
8265                  osname, node)
8266
8267
8268 class LUInstanceCreate(LogicalUnit):
8269   """Create an instance.
8270
8271   """
8272   HPATH = "instance-add"
8273   HTYPE = constants.HTYPE_INSTANCE
8274   REQ_BGL = False
8275
8276   def CheckArguments(self):
8277     """Check arguments.
8278
8279     """
8280     # do not require name_check to ease forward/backward compatibility
8281     # for tools
8282     if self.op.no_install and self.op.start:
8283       self.LogInfo("No-installation mode selected, disabling startup")
8284       self.op.start = False
8285     # validate/normalize the instance name
8286     self.op.instance_name = \
8287       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8288
8289     if self.op.ip_check and not self.op.name_check:
8290       # TODO: make the ip check more flexible and not depend on the name check
8291       raise errors.OpPrereqError("Cannot do IP address check without a name"
8292                                  " check", errors.ECODE_INVAL)
8293
8294     # check nics' parameter names
8295     for nic in self.op.nics:
8296       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8297
8298     # check disks. parameter names and consistent adopt/no-adopt strategy
8299     has_adopt = has_no_adopt = False
8300     for disk in self.op.disks:
8301       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8302       if constants.IDISK_ADOPT in disk:
8303         has_adopt = True
8304       else:
8305         has_no_adopt = True
8306     if has_adopt and has_no_adopt:
8307       raise errors.OpPrereqError("Either all disks are adopted or none is",
8308                                  errors.ECODE_INVAL)
8309     if has_adopt:
8310       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8311         raise errors.OpPrereqError("Disk adoption is not supported for the"
8312                                    " '%s' disk template" %
8313                                    self.op.disk_template,
8314                                    errors.ECODE_INVAL)
8315       if self.op.iallocator is not None:
8316         raise errors.OpPrereqError("Disk adoption not allowed with an"
8317                                    " iallocator script", errors.ECODE_INVAL)
8318       if self.op.mode == constants.INSTANCE_IMPORT:
8319         raise errors.OpPrereqError("Disk adoption not allowed for"
8320                                    " instance import", errors.ECODE_INVAL)
8321     else:
8322       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8323         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8324                                    " but no 'adopt' parameter given" %
8325                                    self.op.disk_template,
8326                                    errors.ECODE_INVAL)
8327
8328     self.adopt_disks = has_adopt
8329
8330     # instance name verification
8331     if self.op.name_check:
8332       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8333       self.op.instance_name = self.hostname1.name
8334       # used in CheckPrereq for ip ping check
8335       self.check_ip = self.hostname1.ip
8336     else:
8337       self.check_ip = None
8338
8339     # file storage checks
8340     if (self.op.file_driver and
8341         not self.op.file_driver in constants.FILE_DRIVER):
8342       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8343                                  self.op.file_driver, errors.ECODE_INVAL)
8344
8345     if self.op.disk_template == constants.DT_FILE:
8346       opcodes.RequireFileStorage()
8347     elif self.op.disk_template == constants.DT_SHARED_FILE:
8348       opcodes.RequireSharedFileStorage()
8349
8350     ### Node/iallocator related checks
8351     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8352
8353     if self.op.pnode is not None:
8354       if self.op.disk_template in constants.DTS_INT_MIRROR:
8355         if self.op.snode is None:
8356           raise errors.OpPrereqError("The networked disk templates need"
8357                                      " a mirror node", errors.ECODE_INVAL)
8358       elif self.op.snode:
8359         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8360                         " template")
8361         self.op.snode = None
8362
8363     self._cds = _GetClusterDomainSecret()
8364
8365     if self.op.mode == constants.INSTANCE_IMPORT:
8366       # On import force_variant must be True, because if we forced it at
8367       # initial install, our only chance when importing it back is that it
8368       # works again!
8369       self.op.force_variant = True
8370
8371       if self.op.no_install:
8372         self.LogInfo("No-installation mode has no effect during import")
8373
8374     elif self.op.mode == constants.INSTANCE_CREATE:
8375       if self.op.os_type is None:
8376         raise errors.OpPrereqError("No guest OS specified",
8377                                    errors.ECODE_INVAL)
8378       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8379         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8380                                    " installation" % self.op.os_type,
8381                                    errors.ECODE_STATE)
8382       if self.op.disk_template is None:
8383         raise errors.OpPrereqError("No disk template specified",
8384                                    errors.ECODE_INVAL)
8385
8386     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8387       # Check handshake to ensure both clusters have the same domain secret
8388       src_handshake = self.op.source_handshake
8389       if not src_handshake:
8390         raise errors.OpPrereqError("Missing source handshake",
8391                                    errors.ECODE_INVAL)
8392
8393       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8394                                                            src_handshake)
8395       if errmsg:
8396         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8397                                    errors.ECODE_INVAL)
8398
8399       # Load and check source CA
8400       self.source_x509_ca_pem = self.op.source_x509_ca
8401       if not self.source_x509_ca_pem:
8402         raise errors.OpPrereqError("Missing source X509 CA",
8403                                    errors.ECODE_INVAL)
8404
8405       try:
8406         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8407                                                     self._cds)
8408       except OpenSSL.crypto.Error, err:
8409         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8410                                    (err, ), errors.ECODE_INVAL)
8411
8412       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8413       if errcode is not None:
8414         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8415                                    errors.ECODE_INVAL)
8416
8417       self.source_x509_ca = cert
8418
8419       src_instance_name = self.op.source_instance_name
8420       if not src_instance_name:
8421         raise errors.OpPrereqError("Missing source instance name",
8422                                    errors.ECODE_INVAL)
8423
8424       self.source_instance_name = \
8425           netutils.GetHostname(name=src_instance_name).name
8426
8427     else:
8428       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8429                                  self.op.mode, errors.ECODE_INVAL)
8430
8431   def ExpandNames(self):
8432     """ExpandNames for CreateInstance.
8433
8434     Figure out the right locks for instance creation.
8435
8436     """
8437     self.needed_locks = {}
8438
8439     instance_name = self.op.instance_name
8440     # this is just a preventive check, but someone might still add this
8441     # instance in the meantime, and creation will fail at lock-add time
8442     if instance_name in self.cfg.GetInstanceList():
8443       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8444                                  instance_name, errors.ECODE_EXISTS)
8445
8446     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8447
8448     if self.op.iallocator:
8449       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8450     else:
8451       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8452       nodelist = [self.op.pnode]
8453       if self.op.snode is not None:
8454         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8455         nodelist.append(self.op.snode)
8456       self.needed_locks[locking.LEVEL_NODE] = nodelist
8457
8458     # in case of import lock the source node too
8459     if self.op.mode == constants.INSTANCE_IMPORT:
8460       src_node = self.op.src_node
8461       src_path = self.op.src_path
8462
8463       if src_path is None:
8464         self.op.src_path = src_path = self.op.instance_name
8465
8466       if src_node is None:
8467         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8468         self.op.src_node = None
8469         if os.path.isabs(src_path):
8470           raise errors.OpPrereqError("Importing an instance from a path"
8471                                      " requires a source node option",
8472                                      errors.ECODE_INVAL)
8473       else:
8474         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8475         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8476           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8477         if not os.path.isabs(src_path):
8478           self.op.src_path = src_path = \
8479             utils.PathJoin(constants.EXPORT_DIR, src_path)
8480
8481   def _RunAllocator(self):
8482     """Run the allocator based on input opcode.
8483
8484     """
8485     nics = [n.ToDict() for n in self.nics]
8486     ial = IAllocator(self.cfg, self.rpc,
8487                      mode=constants.IALLOCATOR_MODE_ALLOC,
8488                      name=self.op.instance_name,
8489                      disk_template=self.op.disk_template,
8490                      tags=self.op.tags,
8491                      os=self.op.os_type,
8492                      vcpus=self.be_full[constants.BE_VCPUS],
8493                      memory=self.be_full[constants.BE_MEMORY],
8494                      disks=self.disks,
8495                      nics=nics,
8496                      hypervisor=self.op.hypervisor,
8497                      )
8498
8499     ial.Run(self.op.iallocator)
8500
8501     if not ial.success:
8502       raise errors.OpPrereqError("Can't compute nodes using"
8503                                  " iallocator '%s': %s" %
8504                                  (self.op.iallocator, ial.info),
8505                                  errors.ECODE_NORES)
8506     if len(ial.result) != ial.required_nodes:
8507       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8508                                  " of nodes (%s), required %s" %
8509                                  (self.op.iallocator, len(ial.result),
8510                                   ial.required_nodes), errors.ECODE_FAULT)
8511     self.op.pnode = ial.result[0]
8512     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8513                  self.op.instance_name, self.op.iallocator,
8514                  utils.CommaJoin(ial.result))
8515     if ial.required_nodes == 2:
8516       self.op.snode = ial.result[1]
8517
8518   def BuildHooksEnv(self):
8519     """Build hooks env.
8520
8521     This runs on master, primary and secondary nodes of the instance.
8522
8523     """
8524     env = {
8525       "ADD_MODE": self.op.mode,
8526       }
8527     if self.op.mode == constants.INSTANCE_IMPORT:
8528       env["SRC_NODE"] = self.op.src_node
8529       env["SRC_PATH"] = self.op.src_path
8530       env["SRC_IMAGES"] = self.src_images
8531
8532     env.update(_BuildInstanceHookEnv(
8533       name=self.op.instance_name,
8534       primary_node=self.op.pnode,
8535       secondary_nodes=self.secondaries,
8536       status=self.op.start,
8537       os_type=self.op.os_type,
8538       memory=self.be_full[constants.BE_MEMORY],
8539       vcpus=self.be_full[constants.BE_VCPUS],
8540       nics=_NICListToTuple(self, self.nics),
8541       disk_template=self.op.disk_template,
8542       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8543              for d in self.disks],
8544       bep=self.be_full,
8545       hvp=self.hv_full,
8546       hypervisor_name=self.op.hypervisor,
8547       tags=self.op.tags,
8548     ))
8549
8550     return env
8551
8552   def BuildHooksNodes(self):
8553     """Build hooks nodes.
8554
8555     """
8556     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8557     return nl, nl
8558
8559   def _ReadExportInfo(self):
8560     """Reads the export information from disk.
8561
8562     It will override the opcode source node and path with the actual
8563     information, if these two were not specified before.
8564
8565     @return: the export information
8566
8567     """
8568     assert self.op.mode == constants.INSTANCE_IMPORT
8569
8570     src_node = self.op.src_node
8571     src_path = self.op.src_path
8572
8573     if src_node is None:
8574       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8575       exp_list = self.rpc.call_export_list(locked_nodes)
8576       found = False
8577       for node in exp_list:
8578         if exp_list[node].fail_msg:
8579           continue
8580         if src_path in exp_list[node].payload:
8581           found = True
8582           self.op.src_node = src_node = node
8583           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8584                                                        src_path)
8585           break
8586       if not found:
8587         raise errors.OpPrereqError("No export found for relative path %s" %
8588                                     src_path, errors.ECODE_INVAL)
8589
8590     _CheckNodeOnline(self, src_node)
8591     result = self.rpc.call_export_info(src_node, src_path)
8592     result.Raise("No export or invalid export found in dir %s" % src_path)
8593
8594     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8595     if not export_info.has_section(constants.INISECT_EXP):
8596       raise errors.ProgrammerError("Corrupted export config",
8597                                    errors.ECODE_ENVIRON)
8598
8599     ei_version = export_info.get(constants.INISECT_EXP, "version")
8600     if (int(ei_version) != constants.EXPORT_VERSION):
8601       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8602                                  (ei_version, constants.EXPORT_VERSION),
8603                                  errors.ECODE_ENVIRON)
8604     return export_info
8605
8606   def _ReadExportParams(self, einfo):
8607     """Use export parameters as defaults.
8608
8609     In case the opcode doesn't specify (as in override) some instance
8610     parameters, then try to use them from the export information, if
8611     that declares them.
8612
8613     """
8614     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8615
8616     if self.op.disk_template is None:
8617       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8618         self.op.disk_template = einfo.get(constants.INISECT_INS,
8619                                           "disk_template")
8620         if self.op.disk_template not in constants.DISK_TEMPLATES:
8621           raise errors.OpPrereqError("Disk template specified in configuration"
8622                                      " file is not one of the allowed values:"
8623                                      " %s" % " ".join(constants.DISK_TEMPLATES))
8624       else:
8625         raise errors.OpPrereqError("No disk template specified and the export"
8626                                    " is missing the disk_template information",
8627                                    errors.ECODE_INVAL)
8628
8629     if not self.op.disks:
8630       disks = []
8631       # TODO: import the disk iv_name too
8632       for idx in range(constants.MAX_DISKS):
8633         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8634           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8635           disks.append({constants.IDISK_SIZE: disk_sz})
8636       self.op.disks = disks
8637       if not disks and self.op.disk_template != constants.DT_DISKLESS:
8638         raise errors.OpPrereqError("No disk info specified and the export"
8639                                    " is missing the disk information",
8640                                    errors.ECODE_INVAL)
8641
8642     if not self.op.nics:
8643       nics = []
8644       for idx in range(constants.MAX_NICS):
8645         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8646           ndict = {}
8647           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8648             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8649             ndict[name] = v
8650           nics.append(ndict)
8651         else:
8652           break
8653       self.op.nics = nics
8654
8655     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8656       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8657
8658     if (self.op.hypervisor is None and
8659         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8660       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8661
8662     if einfo.has_section(constants.INISECT_HYP):
8663       # use the export parameters but do not override the ones
8664       # specified by the user
8665       for name, value in einfo.items(constants.INISECT_HYP):
8666         if name not in self.op.hvparams:
8667           self.op.hvparams[name] = value
8668
8669     if einfo.has_section(constants.INISECT_BEP):
8670       # use the parameters, without overriding
8671       for name, value in einfo.items(constants.INISECT_BEP):
8672         if name not in self.op.beparams:
8673           self.op.beparams[name] = value
8674     else:
8675       # try to read the parameters old style, from the main section
8676       for name in constants.BES_PARAMETERS:
8677         if (name not in self.op.beparams and
8678             einfo.has_option(constants.INISECT_INS, name)):
8679           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8680
8681     if einfo.has_section(constants.INISECT_OSP):
8682       # use the parameters, without overriding
8683       for name, value in einfo.items(constants.INISECT_OSP):
8684         if name not in self.op.osparams:
8685           self.op.osparams[name] = value
8686
8687   def _RevertToDefaults(self, cluster):
8688     """Revert the instance parameters to the default values.
8689
8690     """
8691     # hvparams
8692     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8693     for name in self.op.hvparams.keys():
8694       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8695         del self.op.hvparams[name]
8696     # beparams
8697     be_defs = cluster.SimpleFillBE({})
8698     for name in self.op.beparams.keys():
8699       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8700         del self.op.beparams[name]
8701     # nic params
8702     nic_defs = cluster.SimpleFillNIC({})
8703     for nic in self.op.nics:
8704       for name in constants.NICS_PARAMETERS:
8705         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8706           del nic[name]
8707     # osparams
8708     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8709     for name in self.op.osparams.keys():
8710       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8711         del self.op.osparams[name]
8712
8713   def _CalculateFileStorageDir(self):
8714     """Calculate final instance file storage dir.
8715
8716     """
8717     # file storage dir calculation/check
8718     self.instance_file_storage_dir = None
8719     if self.op.disk_template in constants.DTS_FILEBASED:
8720       # build the full file storage dir path
8721       joinargs = []
8722
8723       if self.op.disk_template == constants.DT_SHARED_FILE:
8724         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8725       else:
8726         get_fsd_fn = self.cfg.GetFileStorageDir
8727
8728       cfg_storagedir = get_fsd_fn()
8729       if not cfg_storagedir:
8730         raise errors.OpPrereqError("Cluster file storage dir not defined")
8731       joinargs.append(cfg_storagedir)
8732
8733       if self.op.file_storage_dir is not None:
8734         joinargs.append(self.op.file_storage_dir)
8735
8736       joinargs.append(self.op.instance_name)
8737
8738       # pylint: disable=W0142
8739       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8740
8741   def CheckPrereq(self):
8742     """Check prerequisites.
8743
8744     """
8745     self._CalculateFileStorageDir()
8746
8747     if self.op.mode == constants.INSTANCE_IMPORT:
8748       export_info = self._ReadExportInfo()
8749       self._ReadExportParams(export_info)
8750
8751     if (not self.cfg.GetVGName() and
8752         self.op.disk_template not in constants.DTS_NOT_LVM):
8753       raise errors.OpPrereqError("Cluster does not support lvm-based"
8754                                  " instances", errors.ECODE_STATE)
8755
8756     if (self.op.hypervisor is None or
8757         self.op.hypervisor == constants.VALUE_AUTO):
8758       self.op.hypervisor = self.cfg.GetHypervisorType()
8759
8760     cluster = self.cfg.GetClusterInfo()
8761     enabled_hvs = cluster.enabled_hypervisors
8762     if self.op.hypervisor not in enabled_hvs:
8763       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8764                                  " cluster (%s)" % (self.op.hypervisor,
8765                                   ",".join(enabled_hvs)),
8766                                  errors.ECODE_STATE)
8767
8768     # Check tag validity
8769     for tag in self.op.tags:
8770       objects.TaggableObject.ValidateTag(tag)
8771
8772     # check hypervisor parameter syntax (locally)
8773     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8774     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8775                                       self.op.hvparams)
8776     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8777     hv_type.CheckParameterSyntax(filled_hvp)
8778     self.hv_full = filled_hvp
8779     # check that we don't specify global parameters on an instance
8780     _CheckGlobalHvParams(self.op.hvparams)
8781
8782     # fill and remember the beparams dict
8783     default_beparams = cluster.beparams[constants.PP_DEFAULT]
8784     for param, value in self.op.beparams.iteritems():
8785       if value == constants.VALUE_AUTO:
8786         self.op.beparams[param] = default_beparams[param]
8787     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8788     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8789
8790     # build os parameters
8791     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8792
8793     # now that hvp/bep are in final format, let's reset to defaults,
8794     # if told to do so
8795     if self.op.identify_defaults:
8796       self._RevertToDefaults(cluster)
8797
8798     # NIC buildup
8799     self.nics = []
8800     for idx, nic in enumerate(self.op.nics):
8801       nic_mode_req = nic.get(constants.INIC_MODE, None)
8802       nic_mode = nic_mode_req
8803       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8804         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8805
8806       # in routed mode, for the first nic, the default ip is 'auto'
8807       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8808         default_ip_mode = constants.VALUE_AUTO
8809       else:
8810         default_ip_mode = constants.VALUE_NONE
8811
8812       # ip validity checks
8813       ip = nic.get(constants.INIC_IP, default_ip_mode)
8814       if ip is None or ip.lower() == constants.VALUE_NONE:
8815         nic_ip = None
8816       elif ip.lower() == constants.VALUE_AUTO:
8817         if not self.op.name_check:
8818           raise errors.OpPrereqError("IP address set to auto but name checks"
8819                                      " have been skipped",
8820                                      errors.ECODE_INVAL)
8821         nic_ip = self.hostname1.ip
8822       else:
8823         if not netutils.IPAddress.IsValid(ip):
8824           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8825                                      errors.ECODE_INVAL)
8826         nic_ip = ip
8827
8828       # TODO: check the ip address for uniqueness
8829       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8830         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8831                                    errors.ECODE_INVAL)
8832
8833       # MAC address verification
8834       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8835       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8836         mac = utils.NormalizeAndValidateMac(mac)
8837
8838         try:
8839           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8840         except errors.ReservationError:
8841           raise errors.OpPrereqError("MAC address %s already in use"
8842                                      " in cluster" % mac,
8843                                      errors.ECODE_NOTUNIQUE)
8844
8845       #  Build nic parameters
8846       link = nic.get(constants.INIC_LINK, None)
8847       if link == constants.VALUE_AUTO:
8848         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8849       nicparams = {}
8850       if nic_mode_req:
8851         nicparams[constants.NIC_MODE] = nic_mode
8852       if link:
8853         nicparams[constants.NIC_LINK] = link
8854
8855       check_params = cluster.SimpleFillNIC(nicparams)
8856       objects.NIC.CheckParameterSyntax(check_params)
8857       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8858
8859     # disk checks/pre-build
8860     default_vg = self.cfg.GetVGName()
8861     self.disks = []
8862     for disk in self.op.disks:
8863       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8864       if mode not in constants.DISK_ACCESS_SET:
8865         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8866                                    mode, errors.ECODE_INVAL)
8867       size = disk.get(constants.IDISK_SIZE, None)
8868       if size is None:
8869         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8870       try:
8871         size = int(size)
8872       except (TypeError, ValueError):
8873         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8874                                    errors.ECODE_INVAL)
8875
8876       data_vg = disk.get(constants.IDISK_VG, default_vg)
8877       new_disk = {
8878         constants.IDISK_SIZE: size,
8879         constants.IDISK_MODE: mode,
8880         constants.IDISK_VG: data_vg,
8881         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8882         }
8883       if constants.IDISK_ADOPT in disk:
8884         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8885       self.disks.append(new_disk)
8886
8887     if self.op.mode == constants.INSTANCE_IMPORT:
8888       disk_images = []
8889       for idx in range(len(self.disks)):
8890         option = "disk%d_dump" % idx
8891         if export_info.has_option(constants.INISECT_INS, option):
8892           # FIXME: are the old os-es, disk sizes, etc. useful?
8893           export_name = export_info.get(constants.INISECT_INS, option)
8894           image = utils.PathJoin(self.op.src_path, export_name)
8895           disk_images.append(image)
8896         else:
8897           disk_images.append(False)
8898
8899       self.src_images = disk_images
8900
8901       old_name = export_info.get(constants.INISECT_INS, "name")
8902       if self.op.instance_name == old_name:
8903         for idx, nic in enumerate(self.nics):
8904           if nic.mac == constants.VALUE_AUTO:
8905             nic_mac_ini = "nic%d_mac" % idx
8906             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8907
8908     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8909
8910     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8911     if self.op.ip_check:
8912       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8913         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8914                                    (self.check_ip, self.op.instance_name),
8915                                    errors.ECODE_NOTUNIQUE)
8916
8917     #### mac address generation
8918     # By generating here the mac address both the allocator and the hooks get
8919     # the real final mac address rather than the 'auto' or 'generate' value.
8920     # There is a race condition between the generation and the instance object
8921     # creation, which means that we know the mac is valid now, but we're not
8922     # sure it will be when we actually add the instance. If things go bad
8923     # adding the instance will abort because of a duplicate mac, and the
8924     # creation job will fail.
8925     for nic in self.nics:
8926       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8927         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8928
8929     #### allocator run
8930
8931     if self.op.iallocator is not None:
8932       self._RunAllocator()
8933
8934     #### node related checks
8935
8936     # check primary node
8937     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8938     assert self.pnode is not None, \
8939       "Cannot retrieve locked node %s" % self.op.pnode
8940     if pnode.offline:
8941       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8942                                  pnode.name, errors.ECODE_STATE)
8943     if pnode.drained:
8944       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8945                                  pnode.name, errors.ECODE_STATE)
8946     if not pnode.vm_capable:
8947       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8948                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8949
8950     self.secondaries = []
8951
8952     # mirror node verification
8953     if self.op.disk_template in constants.DTS_INT_MIRROR:
8954       if self.op.snode == pnode.name:
8955         raise errors.OpPrereqError("The secondary node cannot be the"
8956                                    " primary node", errors.ECODE_INVAL)
8957       _CheckNodeOnline(self, self.op.snode)
8958       _CheckNodeNotDrained(self, self.op.snode)
8959       _CheckNodeVmCapable(self, self.op.snode)
8960       self.secondaries.append(self.op.snode)
8961
8962     nodenames = [pnode.name] + self.secondaries
8963
8964     if not self.adopt_disks:
8965       # Check lv size requirements, if not adopting
8966       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8967       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8968
8969     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8970       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8971                                 disk[constants.IDISK_ADOPT])
8972                      for disk in self.disks])
8973       if len(all_lvs) != len(self.disks):
8974         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8975                                    errors.ECODE_INVAL)
8976       for lv_name in all_lvs:
8977         try:
8978           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8979           # to ReserveLV uses the same syntax
8980           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8981         except errors.ReservationError:
8982           raise errors.OpPrereqError("LV named %s used by another instance" %
8983                                      lv_name, errors.ECODE_NOTUNIQUE)
8984
8985       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8986       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8987
8988       node_lvs = self.rpc.call_lv_list([pnode.name],
8989                                        vg_names.payload.keys())[pnode.name]
8990       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8991       node_lvs = node_lvs.payload
8992
8993       delta = all_lvs.difference(node_lvs.keys())
8994       if delta:
8995         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8996                                    utils.CommaJoin(delta),
8997                                    errors.ECODE_INVAL)
8998       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8999       if online_lvs:
9000         raise errors.OpPrereqError("Online logical volumes found, cannot"
9001                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9002                                    errors.ECODE_STATE)
9003       # update the size of disk based on what is found
9004       for dsk in self.disks:
9005         dsk[constants.IDISK_SIZE] = \
9006           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9007                                         dsk[constants.IDISK_ADOPT])][0]))
9008
9009     elif self.op.disk_template == constants.DT_BLOCK:
9010       # Normalize and de-duplicate device paths
9011       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9012                        for disk in self.disks])
9013       if len(all_disks) != len(self.disks):
9014         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9015                                    errors.ECODE_INVAL)
9016       baddisks = [d for d in all_disks
9017                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9018       if baddisks:
9019         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9020                                    " cannot be adopted" %
9021                                    (", ".join(baddisks),
9022                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9023                                    errors.ECODE_INVAL)
9024
9025       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9026                                             list(all_disks))[pnode.name]
9027       node_disks.Raise("Cannot get block device information from node %s" %
9028                        pnode.name)
9029       node_disks = node_disks.payload
9030       delta = all_disks.difference(node_disks.keys())
9031       if delta:
9032         raise errors.OpPrereqError("Missing block device(s): %s" %
9033                                    utils.CommaJoin(delta),
9034                                    errors.ECODE_INVAL)
9035       for dsk in self.disks:
9036         dsk[constants.IDISK_SIZE] = \
9037           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9038
9039     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9040
9041     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9042     # check OS parameters (remotely)
9043     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9044
9045     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9046
9047     # memory check on primary node
9048     if self.op.start:
9049       _CheckNodeFreeMemory(self, self.pnode.name,
9050                            "creating instance %s" % self.op.instance_name,
9051                            self.be_full[constants.BE_MEMORY],
9052                            self.op.hypervisor)
9053
9054     self.dry_run_result = list(nodenames)
9055
9056   def Exec(self, feedback_fn):
9057     """Create and add the instance to the cluster.
9058
9059     """
9060     instance = self.op.instance_name
9061     pnode_name = self.pnode.name
9062
9063     ht_kind = self.op.hypervisor
9064     if ht_kind in constants.HTS_REQ_PORT:
9065       network_port = self.cfg.AllocatePort()
9066     else:
9067       network_port = None
9068
9069     disks = _GenerateDiskTemplate(self,
9070                                   self.op.disk_template,
9071                                   instance, pnode_name,
9072                                   self.secondaries,
9073                                   self.disks,
9074                                   self.instance_file_storage_dir,
9075                                   self.op.file_driver,
9076                                   0,
9077                                   feedback_fn)
9078
9079     iobj = objects.Instance(name=instance, os=self.op.os_type,
9080                             primary_node=pnode_name,
9081                             nics=self.nics, disks=disks,
9082                             disk_template=self.op.disk_template,
9083                             admin_up=False,
9084                             network_port=network_port,
9085                             beparams=self.op.beparams,
9086                             hvparams=self.op.hvparams,
9087                             hypervisor=self.op.hypervisor,
9088                             osparams=self.op.osparams,
9089                             )
9090
9091     if self.op.tags:
9092       for tag in self.op.tags:
9093         iobj.AddTag(tag)
9094
9095     if self.adopt_disks:
9096       if self.op.disk_template == constants.DT_PLAIN:
9097         # rename LVs to the newly-generated names; we need to construct
9098         # 'fake' LV disks with the old data, plus the new unique_id
9099         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9100         rename_to = []
9101         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9102           rename_to.append(t_dsk.logical_id)
9103           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9104           self.cfg.SetDiskID(t_dsk, pnode_name)
9105         result = self.rpc.call_blockdev_rename(pnode_name,
9106                                                zip(tmp_disks, rename_to))
9107         result.Raise("Failed to rename adoped LVs")
9108     else:
9109       feedback_fn("* creating instance disks...")
9110       try:
9111         _CreateDisks(self, iobj)
9112       except errors.OpExecError:
9113         self.LogWarning("Device creation failed, reverting...")
9114         try:
9115           _RemoveDisks(self, iobj)
9116         finally:
9117           self.cfg.ReleaseDRBDMinors(instance)
9118           raise
9119
9120     feedback_fn("adding instance %s to cluster config" % instance)
9121
9122     self.cfg.AddInstance(iobj, self.proc.GetECId())
9123
9124     # Declare that we don't want to remove the instance lock anymore, as we've
9125     # added the instance to the config
9126     del self.remove_locks[locking.LEVEL_INSTANCE]
9127
9128     if self.op.mode == constants.INSTANCE_IMPORT:
9129       # Release unused nodes
9130       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9131     else:
9132       # Release all nodes
9133       _ReleaseLocks(self, locking.LEVEL_NODE)
9134
9135     disk_abort = False
9136     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9137       feedback_fn("* wiping instance disks...")
9138       try:
9139         _WipeDisks(self, iobj)
9140       except errors.OpExecError, err:
9141         logging.exception("Wiping disks failed")
9142         self.LogWarning("Wiping instance disks failed (%s)", err)
9143         disk_abort = True
9144
9145     if disk_abort:
9146       # Something is already wrong with the disks, don't do anything else
9147       pass
9148     elif self.op.wait_for_sync:
9149       disk_abort = not _WaitForSync(self, iobj)
9150     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9151       # make sure the disks are not degraded (still sync-ing is ok)
9152       feedback_fn("* checking mirrors status")
9153       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9154     else:
9155       disk_abort = False
9156
9157     if disk_abort:
9158       _RemoveDisks(self, iobj)
9159       self.cfg.RemoveInstance(iobj.name)
9160       # Make sure the instance lock gets removed
9161       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9162       raise errors.OpExecError("There are some degraded disks for"
9163                                " this instance")
9164
9165     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9166       if self.op.mode == constants.INSTANCE_CREATE:
9167         if not self.op.no_install:
9168           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9169                         not self.op.wait_for_sync)
9170           if pause_sync:
9171             feedback_fn("* pausing disk sync to install instance OS")
9172             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9173                                                               iobj.disks, True)
9174             for idx, success in enumerate(result.payload):
9175               if not success:
9176                 logging.warn("pause-sync of instance %s for disk %d failed",
9177                              instance, idx)
9178
9179           feedback_fn("* running the instance OS create scripts...")
9180           # FIXME: pass debug option from opcode to backend
9181           os_add_result = \
9182             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9183                                           self.op.debug_level)
9184           if pause_sync:
9185             feedback_fn("* resuming disk sync")
9186             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9187                                                               iobj.disks, False)
9188             for idx, success in enumerate(result.payload):
9189               if not success:
9190                 logging.warn("resume-sync of instance %s for disk %d failed",
9191                              instance, idx)
9192
9193           os_add_result.Raise("Could not add os for instance %s"
9194                               " on node %s" % (instance, pnode_name))
9195
9196       elif self.op.mode == constants.INSTANCE_IMPORT:
9197         feedback_fn("* running the instance OS import scripts...")
9198
9199         transfers = []
9200
9201         for idx, image in enumerate(self.src_images):
9202           if not image:
9203             continue
9204
9205           # FIXME: pass debug option from opcode to backend
9206           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9207                                              constants.IEIO_FILE, (image, ),
9208                                              constants.IEIO_SCRIPT,
9209                                              (iobj.disks[idx], idx),
9210                                              None)
9211           transfers.append(dt)
9212
9213         import_result = \
9214           masterd.instance.TransferInstanceData(self, feedback_fn,
9215                                                 self.op.src_node, pnode_name,
9216                                                 self.pnode.secondary_ip,
9217                                                 iobj, transfers)
9218         if not compat.all(import_result):
9219           self.LogWarning("Some disks for instance %s on node %s were not"
9220                           " imported successfully" % (instance, pnode_name))
9221
9222       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9223         feedback_fn("* preparing remote import...")
9224         # The source cluster will stop the instance before attempting to make a
9225         # connection. In some cases stopping an instance can take a long time,
9226         # hence the shutdown timeout is added to the connection timeout.
9227         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9228                            self.op.source_shutdown_timeout)
9229         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9230
9231         assert iobj.primary_node == self.pnode.name
9232         disk_results = \
9233           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9234                                         self.source_x509_ca,
9235                                         self._cds, timeouts)
9236         if not compat.all(disk_results):
9237           # TODO: Should the instance still be started, even if some disks
9238           # failed to import (valid for local imports, too)?
9239           self.LogWarning("Some disks for instance %s on node %s were not"
9240                           " imported successfully" % (instance, pnode_name))
9241
9242         # Run rename script on newly imported instance
9243         assert iobj.name == instance
9244         feedback_fn("Running rename script for %s" % instance)
9245         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9246                                                    self.source_instance_name,
9247                                                    self.op.debug_level)
9248         if result.fail_msg:
9249           self.LogWarning("Failed to run rename script for %s on node"
9250                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9251
9252       else:
9253         # also checked in the prereq part
9254         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9255                                      % self.op.mode)
9256
9257     if self.op.start:
9258       iobj.admin_up = True
9259       self.cfg.Update(iobj, feedback_fn)
9260       logging.info("Starting instance %s on node %s", instance, pnode_name)
9261       feedback_fn("* starting instance...")
9262       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9263                                             False)
9264       result.Raise("Could not start instance")
9265
9266     return list(iobj.all_nodes)
9267
9268
9269 class LUInstanceConsole(NoHooksLU):
9270   """Connect to an instance's console.
9271
9272   This is somewhat special in that it returns the command line that
9273   you need to run on the master node in order to connect to the
9274   console.
9275
9276   """
9277   REQ_BGL = False
9278
9279   def ExpandNames(self):
9280     self._ExpandAndLockInstance()
9281
9282   def CheckPrereq(self):
9283     """Check prerequisites.
9284
9285     This checks that the instance is in the cluster.
9286
9287     """
9288     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9289     assert self.instance is not None, \
9290       "Cannot retrieve locked instance %s" % self.op.instance_name
9291     _CheckNodeOnline(self, self.instance.primary_node)
9292
9293   def Exec(self, feedback_fn):
9294     """Connect to the console of an instance
9295
9296     """
9297     instance = self.instance
9298     node = instance.primary_node
9299
9300     node_insts = self.rpc.call_instance_list([node],
9301                                              [instance.hypervisor])[node]
9302     node_insts.Raise("Can't get node information from %s" % node)
9303
9304     if instance.name not in node_insts.payload:
9305       if instance.admin_up:
9306         state = constants.INSTST_ERRORDOWN
9307       else:
9308         state = constants.INSTST_ADMINDOWN
9309       raise errors.OpExecError("Instance %s is not running (state %s)" %
9310                                (instance.name, state))
9311
9312     logging.debug("Connecting to console of %s on %s", instance.name, node)
9313
9314     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9315
9316
9317 def _GetInstanceConsole(cluster, instance):
9318   """Returns console information for an instance.
9319
9320   @type cluster: L{objects.Cluster}
9321   @type instance: L{objects.Instance}
9322   @rtype: dict
9323
9324   """
9325   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9326   # beparams and hvparams are passed separately, to avoid editing the
9327   # instance and then saving the defaults in the instance itself.
9328   hvparams = cluster.FillHV(instance)
9329   beparams = cluster.FillBE(instance)
9330   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9331
9332   assert console.instance == instance.name
9333   assert console.Validate()
9334
9335   return console.ToDict()
9336
9337
9338 class LUInstanceReplaceDisks(LogicalUnit):
9339   """Replace the disks of an instance.
9340
9341   """
9342   HPATH = "mirrors-replace"
9343   HTYPE = constants.HTYPE_INSTANCE
9344   REQ_BGL = False
9345
9346   def CheckArguments(self):
9347     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9348                                   self.op.iallocator)
9349
9350   def ExpandNames(self):
9351     self._ExpandAndLockInstance()
9352
9353     assert locking.LEVEL_NODE not in self.needed_locks
9354     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9355
9356     assert self.op.iallocator is None or self.op.remote_node is None, \
9357       "Conflicting options"
9358
9359     if self.op.remote_node is not None:
9360       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9361
9362       # Warning: do not remove the locking of the new secondary here
9363       # unless DRBD8.AddChildren is changed to work in parallel;
9364       # currently it doesn't since parallel invocations of
9365       # FindUnusedMinor will conflict
9366       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9367       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9368     else:
9369       self.needed_locks[locking.LEVEL_NODE] = []
9370       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9371
9372       if self.op.iallocator is not None:
9373         # iallocator will select a new node in the same group
9374         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9375
9376     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9377                                    self.op.iallocator, self.op.remote_node,
9378                                    self.op.disks, False, self.op.early_release)
9379
9380     self.tasklets = [self.replacer]
9381
9382   def DeclareLocks(self, level):
9383     if level == locking.LEVEL_NODEGROUP:
9384       assert self.op.remote_node is None
9385       assert self.op.iallocator is not None
9386       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9387
9388       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9389       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9390         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9391
9392     elif level == locking.LEVEL_NODE:
9393       if self.op.iallocator is not None:
9394         assert self.op.remote_node is None
9395         assert not self.needed_locks[locking.LEVEL_NODE]
9396
9397         # Lock member nodes of all locked groups
9398         self.needed_locks[locking.LEVEL_NODE] = [node_name
9399           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9400           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9401       else:
9402         self._LockInstancesNodes()
9403
9404   def BuildHooksEnv(self):
9405     """Build hooks env.
9406
9407     This runs on the master, the primary and all the secondaries.
9408
9409     """
9410     instance = self.replacer.instance
9411     env = {
9412       "MODE": self.op.mode,
9413       "NEW_SECONDARY": self.op.remote_node,
9414       "OLD_SECONDARY": instance.secondary_nodes[0],
9415       }
9416     env.update(_BuildInstanceHookEnvByObject(self, instance))
9417     return env
9418
9419   def BuildHooksNodes(self):
9420     """Build hooks nodes.
9421
9422     """
9423     instance = self.replacer.instance
9424     nl = [
9425       self.cfg.GetMasterNode(),
9426       instance.primary_node,
9427       ]
9428     if self.op.remote_node is not None:
9429       nl.append(self.op.remote_node)
9430     return nl, nl
9431
9432   def CheckPrereq(self):
9433     """Check prerequisites.
9434
9435     """
9436     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9437             self.op.iallocator is None)
9438
9439     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9440     if owned_groups:
9441       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9442
9443     return LogicalUnit.CheckPrereq(self)
9444
9445
9446 class TLReplaceDisks(Tasklet):
9447   """Replaces disks for an instance.
9448
9449   Note: Locking is not within the scope of this class.
9450
9451   """
9452   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9453                disks, delay_iallocator, early_release):
9454     """Initializes this class.
9455
9456     """
9457     Tasklet.__init__(self, lu)
9458
9459     # Parameters
9460     self.instance_name = instance_name
9461     self.mode = mode
9462     self.iallocator_name = iallocator_name
9463     self.remote_node = remote_node
9464     self.disks = disks
9465     self.delay_iallocator = delay_iallocator
9466     self.early_release = early_release
9467
9468     # Runtime data
9469     self.instance = None
9470     self.new_node = None
9471     self.target_node = None
9472     self.other_node = None
9473     self.remote_node_info = None
9474     self.node_secondary_ip = None
9475
9476   @staticmethod
9477   def CheckArguments(mode, remote_node, iallocator):
9478     """Helper function for users of this class.
9479
9480     """
9481     # check for valid parameter combination
9482     if mode == constants.REPLACE_DISK_CHG:
9483       if remote_node is None and iallocator is None:
9484         raise errors.OpPrereqError("When changing the secondary either an"
9485                                    " iallocator script must be used or the"
9486                                    " new node given", errors.ECODE_INVAL)
9487
9488       if remote_node is not None and iallocator is not None:
9489         raise errors.OpPrereqError("Give either the iallocator or the new"
9490                                    " secondary, not both", errors.ECODE_INVAL)
9491
9492     elif remote_node is not None or iallocator is not None:
9493       # Not replacing the secondary
9494       raise errors.OpPrereqError("The iallocator and new node options can"
9495                                  " only be used when changing the"
9496                                  " secondary node", errors.ECODE_INVAL)
9497
9498   @staticmethod
9499   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9500     """Compute a new secondary node using an IAllocator.
9501
9502     """
9503     ial = IAllocator(lu.cfg, lu.rpc,
9504                      mode=constants.IALLOCATOR_MODE_RELOC,
9505                      name=instance_name,
9506                      relocate_from=list(relocate_from))
9507
9508     ial.Run(iallocator_name)
9509
9510     if not ial.success:
9511       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9512                                  " %s" % (iallocator_name, ial.info),
9513                                  errors.ECODE_NORES)
9514
9515     if len(ial.result) != ial.required_nodes:
9516       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9517                                  " of nodes (%s), required %s" %
9518                                  (iallocator_name,
9519                                   len(ial.result), ial.required_nodes),
9520                                  errors.ECODE_FAULT)
9521
9522     remote_node_name = ial.result[0]
9523
9524     lu.LogInfo("Selected new secondary for instance '%s': %s",
9525                instance_name, remote_node_name)
9526
9527     return remote_node_name
9528
9529   def _FindFaultyDisks(self, node_name):
9530     """Wrapper for L{_FindFaultyInstanceDisks}.
9531
9532     """
9533     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9534                                     node_name, True)
9535
9536   def _CheckDisksActivated(self, instance):
9537     """Checks if the instance disks are activated.
9538
9539     @param instance: The instance to check disks
9540     @return: True if they are activated, False otherwise
9541
9542     """
9543     nodes = instance.all_nodes
9544
9545     for idx, dev in enumerate(instance.disks):
9546       for node in nodes:
9547         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9548         self.cfg.SetDiskID(dev, node)
9549
9550         result = self.rpc.call_blockdev_find(node, dev)
9551
9552         if result.offline:
9553           continue
9554         elif result.fail_msg or not result.payload:
9555           return False
9556
9557     return True
9558
9559   def CheckPrereq(self):
9560     """Check prerequisites.
9561
9562     This checks that the instance is in the cluster.
9563
9564     """
9565     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9566     assert instance is not None, \
9567       "Cannot retrieve locked instance %s" % self.instance_name
9568
9569     if instance.disk_template != constants.DT_DRBD8:
9570       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9571                                  " instances", errors.ECODE_INVAL)
9572
9573     if len(instance.secondary_nodes) != 1:
9574       raise errors.OpPrereqError("The instance has a strange layout,"
9575                                  " expected one secondary but found %d" %
9576                                  len(instance.secondary_nodes),
9577                                  errors.ECODE_FAULT)
9578
9579     if not self.delay_iallocator:
9580       self._CheckPrereq2()
9581
9582   def _CheckPrereq2(self):
9583     """Check prerequisites, second part.
9584
9585     This function should always be part of CheckPrereq. It was separated and is
9586     now called from Exec because during node evacuation iallocator was only
9587     called with an unmodified cluster model, not taking planned changes into
9588     account.
9589
9590     """
9591     instance = self.instance
9592     secondary_node = instance.secondary_nodes[0]
9593
9594     if self.iallocator_name is None:
9595       remote_node = self.remote_node
9596     else:
9597       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9598                                        instance.name, instance.secondary_nodes)
9599
9600     if remote_node is None:
9601       self.remote_node_info = None
9602     else:
9603       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9604              "Remote node '%s' is not locked" % remote_node
9605
9606       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9607       assert self.remote_node_info is not None, \
9608         "Cannot retrieve locked node %s" % remote_node
9609
9610     if remote_node == self.instance.primary_node:
9611       raise errors.OpPrereqError("The specified node is the primary node of"
9612                                  " the instance", errors.ECODE_INVAL)
9613
9614     if remote_node == secondary_node:
9615       raise errors.OpPrereqError("The specified node is already the"
9616                                  " secondary node of the instance",
9617                                  errors.ECODE_INVAL)
9618
9619     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9620                                     constants.REPLACE_DISK_CHG):
9621       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9622                                  errors.ECODE_INVAL)
9623
9624     if self.mode == constants.REPLACE_DISK_AUTO:
9625       if not self._CheckDisksActivated(instance):
9626         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9627                                    " first" % self.instance_name,
9628                                    errors.ECODE_STATE)
9629       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9630       faulty_secondary = self._FindFaultyDisks(secondary_node)
9631
9632       if faulty_primary and faulty_secondary:
9633         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9634                                    " one node and can not be repaired"
9635                                    " automatically" % self.instance_name,
9636                                    errors.ECODE_STATE)
9637
9638       if faulty_primary:
9639         self.disks = faulty_primary
9640         self.target_node = instance.primary_node
9641         self.other_node = secondary_node
9642         check_nodes = [self.target_node, self.other_node]
9643       elif faulty_secondary:
9644         self.disks = faulty_secondary
9645         self.target_node = secondary_node
9646         self.other_node = instance.primary_node
9647         check_nodes = [self.target_node, self.other_node]
9648       else:
9649         self.disks = []
9650         check_nodes = []
9651
9652     else:
9653       # Non-automatic modes
9654       if self.mode == constants.REPLACE_DISK_PRI:
9655         self.target_node = instance.primary_node
9656         self.other_node = secondary_node
9657         check_nodes = [self.target_node, self.other_node]
9658
9659       elif self.mode == constants.REPLACE_DISK_SEC:
9660         self.target_node = secondary_node
9661         self.other_node = instance.primary_node
9662         check_nodes = [self.target_node, self.other_node]
9663
9664       elif self.mode == constants.REPLACE_DISK_CHG:
9665         self.new_node = remote_node
9666         self.other_node = instance.primary_node
9667         self.target_node = secondary_node
9668         check_nodes = [self.new_node, self.other_node]
9669
9670         _CheckNodeNotDrained(self.lu, remote_node)
9671         _CheckNodeVmCapable(self.lu, remote_node)
9672
9673         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9674         assert old_node_info is not None
9675         if old_node_info.offline and not self.early_release:
9676           # doesn't make sense to delay the release
9677           self.early_release = True
9678           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9679                           " early-release mode", secondary_node)
9680
9681       else:
9682         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9683                                      self.mode)
9684
9685       # If not specified all disks should be replaced
9686       if not self.disks:
9687         self.disks = range(len(self.instance.disks))
9688
9689     for node in check_nodes:
9690       _CheckNodeOnline(self.lu, node)
9691
9692     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9693                                                           self.other_node,
9694                                                           self.target_node]
9695                               if node_name is not None)
9696
9697     # Release unneeded node locks
9698     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9699
9700     # Release any owned node group
9701     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9702       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9703
9704     # Check whether disks are valid
9705     for disk_idx in self.disks:
9706       instance.FindDisk(disk_idx)
9707
9708     # Get secondary node IP addresses
9709     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9710                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9711
9712   def Exec(self, feedback_fn):
9713     """Execute disk replacement.
9714
9715     This dispatches the disk replacement to the appropriate handler.
9716
9717     """
9718     if self.delay_iallocator:
9719       self._CheckPrereq2()
9720
9721     if __debug__:
9722       # Verify owned locks before starting operation
9723       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9724       assert set(owned_nodes) == set(self.node_secondary_ip), \
9725           ("Incorrect node locks, owning %s, expected %s" %
9726            (owned_nodes, self.node_secondary_ip.keys()))
9727
9728       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9729       assert list(owned_instances) == [self.instance_name], \
9730           "Instance '%s' not locked" % self.instance_name
9731
9732       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9733           "Should not own any node group lock at this point"
9734
9735     if not self.disks:
9736       feedback_fn("No disks need replacement")
9737       return
9738
9739     feedback_fn("Replacing disk(s) %s for %s" %
9740                 (utils.CommaJoin(self.disks), self.instance.name))
9741
9742     activate_disks = (not self.instance.admin_up)
9743
9744     # Activate the instance disks if we're replacing them on a down instance
9745     if activate_disks:
9746       _StartInstanceDisks(self.lu, self.instance, True)
9747
9748     try:
9749       # Should we replace the secondary node?
9750       if self.new_node is not None:
9751         fn = self._ExecDrbd8Secondary
9752       else:
9753         fn = self._ExecDrbd8DiskOnly
9754
9755       result = fn(feedback_fn)
9756     finally:
9757       # Deactivate the instance disks if we're replacing them on a
9758       # down instance
9759       if activate_disks:
9760         _SafeShutdownInstanceDisks(self.lu, self.instance)
9761
9762     if __debug__:
9763       # Verify owned locks
9764       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9765       nodes = frozenset(self.node_secondary_ip)
9766       assert ((self.early_release and not owned_nodes) or
9767               (not self.early_release and not (set(owned_nodes) - nodes))), \
9768         ("Not owning the correct locks, early_release=%s, owned=%r,"
9769          " nodes=%r" % (self.early_release, owned_nodes, nodes))
9770
9771     return result
9772
9773   def _CheckVolumeGroup(self, nodes):
9774     self.lu.LogInfo("Checking volume groups")
9775
9776     vgname = self.cfg.GetVGName()
9777
9778     # Make sure volume group exists on all involved nodes
9779     results = self.rpc.call_vg_list(nodes)
9780     if not results:
9781       raise errors.OpExecError("Can't list volume groups on the nodes")
9782
9783     for node in nodes:
9784       res = results[node]
9785       res.Raise("Error checking node %s" % node)
9786       if vgname not in res.payload:
9787         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9788                                  (vgname, node))
9789
9790   def _CheckDisksExistence(self, nodes):
9791     # Check disk existence
9792     for idx, dev in enumerate(self.instance.disks):
9793       if idx not in self.disks:
9794         continue
9795
9796       for node in nodes:
9797         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9798         self.cfg.SetDiskID(dev, node)
9799
9800         result = self.rpc.call_blockdev_find(node, dev)
9801
9802         msg = result.fail_msg
9803         if msg or not result.payload:
9804           if not msg:
9805             msg = "disk not found"
9806           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9807                                    (idx, node, msg))
9808
9809   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9810     for idx, dev in enumerate(self.instance.disks):
9811       if idx not in self.disks:
9812         continue
9813
9814       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9815                       (idx, node_name))
9816
9817       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9818                                    ldisk=ldisk):
9819         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9820                                  " replace disks for instance %s" %
9821                                  (node_name, self.instance.name))
9822
9823   def _CreateNewStorage(self, node_name):
9824     """Create new storage on the primary or secondary node.
9825
9826     This is only used for same-node replaces, not for changing the
9827     secondary node, hence we don't want to modify the existing disk.
9828
9829     """
9830     iv_names = {}
9831
9832     for idx, dev in enumerate(self.instance.disks):
9833       if idx not in self.disks:
9834         continue
9835
9836       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9837
9838       self.cfg.SetDiskID(dev, node_name)
9839
9840       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9841       names = _GenerateUniqueNames(self.lu, lv_names)
9842
9843       vg_data = dev.children[0].logical_id[0]
9844       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9845                              logical_id=(vg_data, names[0]))
9846       vg_meta = dev.children[1].logical_id[0]
9847       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9848                              logical_id=(vg_meta, names[1]))
9849
9850       new_lvs = [lv_data, lv_meta]
9851       old_lvs = [child.Copy() for child in dev.children]
9852       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9853
9854       # we pass force_create=True to force the LVM creation
9855       for new_lv in new_lvs:
9856         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9857                         _GetInstanceInfoText(self.instance), False)
9858
9859     return iv_names
9860
9861   def _CheckDevices(self, node_name, iv_names):
9862     for name, (dev, _, _) in iv_names.iteritems():
9863       self.cfg.SetDiskID(dev, node_name)
9864
9865       result = self.rpc.call_blockdev_find(node_name, dev)
9866
9867       msg = result.fail_msg
9868       if msg or not result.payload:
9869         if not msg:
9870           msg = "disk not found"
9871         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9872                                  (name, msg))
9873
9874       if result.payload.is_degraded:
9875         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9876
9877   def _RemoveOldStorage(self, node_name, iv_names):
9878     for name, (_, old_lvs, _) in iv_names.iteritems():
9879       self.lu.LogInfo("Remove logical volumes for %s" % name)
9880
9881       for lv in old_lvs:
9882         self.cfg.SetDiskID(lv, node_name)
9883
9884         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9885         if msg:
9886           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9887                              hint="remove unused LVs manually")
9888
9889   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9890     """Replace a disk on the primary or secondary for DRBD 8.
9891
9892     The algorithm for replace is quite complicated:
9893
9894       1. for each disk to be replaced:
9895
9896         1. create new LVs on the target node with unique names
9897         1. detach old LVs from the drbd device
9898         1. rename old LVs to name_replaced.<time_t>
9899         1. rename new LVs to old LVs
9900         1. attach the new LVs (with the old names now) to the drbd device
9901
9902       1. wait for sync across all devices
9903
9904       1. for each modified disk:
9905
9906         1. remove old LVs (which have the name name_replaces.<time_t>)
9907
9908     Failures are not very well handled.
9909
9910     """
9911     steps_total = 6
9912
9913     # Step: check device activation
9914     self.lu.LogStep(1, steps_total, "Check device existence")
9915     self._CheckDisksExistence([self.other_node, self.target_node])
9916     self._CheckVolumeGroup([self.target_node, self.other_node])
9917
9918     # Step: check other node consistency
9919     self.lu.LogStep(2, steps_total, "Check peer consistency")
9920     self._CheckDisksConsistency(self.other_node,
9921                                 self.other_node == self.instance.primary_node,
9922                                 False)
9923
9924     # Step: create new storage
9925     self.lu.LogStep(3, steps_total, "Allocate new storage")
9926     iv_names = self._CreateNewStorage(self.target_node)
9927
9928     # Step: for each lv, detach+rename*2+attach
9929     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9930     for dev, old_lvs, new_lvs in iv_names.itervalues():
9931       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9932
9933       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9934                                                      old_lvs)
9935       result.Raise("Can't detach drbd from local storage on node"
9936                    " %s for device %s" % (self.target_node, dev.iv_name))
9937       #dev.children = []
9938       #cfg.Update(instance)
9939
9940       # ok, we created the new LVs, so now we know we have the needed
9941       # storage; as such, we proceed on the target node to rename
9942       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9943       # using the assumption that logical_id == physical_id (which in
9944       # turn is the unique_id on that node)
9945
9946       # FIXME(iustin): use a better name for the replaced LVs
9947       temp_suffix = int(time.time())
9948       ren_fn = lambda d, suff: (d.physical_id[0],
9949                                 d.physical_id[1] + "_replaced-%s" % suff)
9950
9951       # Build the rename list based on what LVs exist on the node
9952       rename_old_to_new = []
9953       for to_ren in old_lvs:
9954         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9955         if not result.fail_msg and result.payload:
9956           # device exists
9957           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9958
9959       self.lu.LogInfo("Renaming the old LVs on the target node")
9960       result = self.rpc.call_blockdev_rename(self.target_node,
9961                                              rename_old_to_new)
9962       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9963
9964       # Now we rename the new LVs to the old LVs
9965       self.lu.LogInfo("Renaming the new LVs on the target node")
9966       rename_new_to_old = [(new, old.physical_id)
9967                            for old, new in zip(old_lvs, new_lvs)]
9968       result = self.rpc.call_blockdev_rename(self.target_node,
9969                                              rename_new_to_old)
9970       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9971
9972       # Intermediate steps of in memory modifications
9973       for old, new in zip(old_lvs, new_lvs):
9974         new.logical_id = old.logical_id
9975         self.cfg.SetDiskID(new, self.target_node)
9976
9977       # We need to modify old_lvs so that removal later removes the
9978       # right LVs, not the newly added ones; note that old_lvs is a
9979       # copy here
9980       for disk in old_lvs:
9981         disk.logical_id = ren_fn(disk, temp_suffix)
9982         self.cfg.SetDiskID(disk, self.target_node)
9983
9984       # Now that the new lvs have the old name, we can add them to the device
9985       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9986       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9987                                                   new_lvs)
9988       msg = result.fail_msg
9989       if msg:
9990         for new_lv in new_lvs:
9991           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9992                                                new_lv).fail_msg
9993           if msg2:
9994             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9995                                hint=("cleanup manually the unused logical"
9996                                      "volumes"))
9997         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9998
9999     cstep = 5
10000     if self.early_release:
10001       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10002       cstep += 1
10003       self._RemoveOldStorage(self.target_node, iv_names)
10004       # WARNING: we release both node locks here, do not do other RPCs
10005       # than WaitForSync to the primary node
10006       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10007                     names=[self.target_node, self.other_node])
10008
10009     # Wait for sync
10010     # This can fail as the old devices are degraded and _WaitForSync
10011     # does a combined result over all disks, so we don't check its return value
10012     self.lu.LogStep(cstep, steps_total, "Sync devices")
10013     cstep += 1
10014     _WaitForSync(self.lu, self.instance)
10015
10016     # Check all devices manually
10017     self._CheckDevices(self.instance.primary_node, iv_names)
10018
10019     # Step: remove old storage
10020     if not self.early_release:
10021       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10022       cstep += 1
10023       self._RemoveOldStorage(self.target_node, iv_names)
10024
10025   def _ExecDrbd8Secondary(self, feedback_fn):
10026     """Replace the secondary node for DRBD 8.
10027
10028     The algorithm for replace is quite complicated:
10029       - for all disks of the instance:
10030         - create new LVs on the new node with same names
10031         - shutdown the drbd device on the old secondary
10032         - disconnect the drbd network on the primary
10033         - create the drbd device on the new secondary
10034         - network attach the drbd on the primary, using an artifice:
10035           the drbd code for Attach() will connect to the network if it
10036           finds a device which is connected to the good local disks but
10037           not network enabled
10038       - wait for sync across all devices
10039       - remove all disks from the old secondary
10040
10041     Failures are not very well handled.
10042
10043     """
10044     steps_total = 6
10045
10046     pnode = self.instance.primary_node
10047
10048     # Step: check device activation
10049     self.lu.LogStep(1, steps_total, "Check device existence")
10050     self._CheckDisksExistence([self.instance.primary_node])
10051     self._CheckVolumeGroup([self.instance.primary_node])
10052
10053     # Step: check other node consistency
10054     self.lu.LogStep(2, steps_total, "Check peer consistency")
10055     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10056
10057     # Step: create new storage
10058     self.lu.LogStep(3, steps_total, "Allocate new storage")
10059     for idx, dev in enumerate(self.instance.disks):
10060       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10061                       (self.new_node, idx))
10062       # we pass force_create=True to force LVM creation
10063       for new_lv in dev.children:
10064         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10065                         _GetInstanceInfoText(self.instance), False)
10066
10067     # Step 4: dbrd minors and drbd setups changes
10068     # after this, we must manually remove the drbd minors on both the
10069     # error and the success paths
10070     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10071     minors = self.cfg.AllocateDRBDMinor([self.new_node
10072                                          for dev in self.instance.disks],
10073                                         self.instance.name)
10074     logging.debug("Allocated minors %r", minors)
10075
10076     iv_names = {}
10077     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10078       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10079                       (self.new_node, idx))
10080       # create new devices on new_node; note that we create two IDs:
10081       # one without port, so the drbd will be activated without
10082       # networking information on the new node at this stage, and one
10083       # with network, for the latter activation in step 4
10084       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10085       if self.instance.primary_node == o_node1:
10086         p_minor = o_minor1
10087       else:
10088         assert self.instance.primary_node == o_node2, "Three-node instance?"
10089         p_minor = o_minor2
10090
10091       new_alone_id = (self.instance.primary_node, self.new_node, None,
10092                       p_minor, new_minor, o_secret)
10093       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10094                     p_minor, new_minor, o_secret)
10095
10096       iv_names[idx] = (dev, dev.children, new_net_id)
10097       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10098                     new_net_id)
10099       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10100                               logical_id=new_alone_id,
10101                               children=dev.children,
10102                               size=dev.size)
10103       try:
10104         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10105                               _GetInstanceInfoText(self.instance), False)
10106       except errors.GenericError:
10107         self.cfg.ReleaseDRBDMinors(self.instance.name)
10108         raise
10109
10110     # We have new devices, shutdown the drbd on the old secondary
10111     for idx, dev in enumerate(self.instance.disks):
10112       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10113       self.cfg.SetDiskID(dev, self.target_node)
10114       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10115       if msg:
10116         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10117                            "node: %s" % (idx, msg),
10118                            hint=("Please cleanup this device manually as"
10119                                  " soon as possible"))
10120
10121     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10122     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10123                                                self.instance.disks)[pnode]
10124
10125     msg = result.fail_msg
10126     if msg:
10127       # detaches didn't succeed (unlikely)
10128       self.cfg.ReleaseDRBDMinors(self.instance.name)
10129       raise errors.OpExecError("Can't detach the disks from the network on"
10130                                " old node: %s" % (msg,))
10131
10132     # if we managed to detach at least one, we update all the disks of
10133     # the instance to point to the new secondary
10134     self.lu.LogInfo("Updating instance configuration")
10135     for dev, _, new_logical_id in iv_names.itervalues():
10136       dev.logical_id = new_logical_id
10137       self.cfg.SetDiskID(dev, self.instance.primary_node)
10138
10139     self.cfg.Update(self.instance, feedback_fn)
10140
10141     # and now perform the drbd attach
10142     self.lu.LogInfo("Attaching primary drbds to new secondary"
10143                     " (standalone => connected)")
10144     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10145                                             self.new_node],
10146                                            self.node_secondary_ip,
10147                                            self.instance.disks,
10148                                            self.instance.name,
10149                                            False)
10150     for to_node, to_result in result.items():
10151       msg = to_result.fail_msg
10152       if msg:
10153         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10154                            to_node, msg,
10155                            hint=("please do a gnt-instance info to see the"
10156                                  " status of disks"))
10157     cstep = 5
10158     if self.early_release:
10159       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10160       cstep += 1
10161       self._RemoveOldStorage(self.target_node, iv_names)
10162       # WARNING: we release all node locks here, do not do other RPCs
10163       # than WaitForSync to the primary node
10164       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10165                     names=[self.instance.primary_node,
10166                            self.target_node,
10167                            self.new_node])
10168
10169     # Wait for sync
10170     # This can fail as the old devices are degraded and _WaitForSync
10171     # does a combined result over all disks, so we don't check its return value
10172     self.lu.LogStep(cstep, steps_total, "Sync devices")
10173     cstep += 1
10174     _WaitForSync(self.lu, self.instance)
10175
10176     # Check all devices manually
10177     self._CheckDevices(self.instance.primary_node, iv_names)
10178
10179     # Step: remove old storage
10180     if not self.early_release:
10181       self.lu.LogStep(cstep, steps_total, "Removing old storage")
10182       self._RemoveOldStorage(self.target_node, iv_names)
10183
10184
10185 class LURepairNodeStorage(NoHooksLU):
10186   """Repairs the volume group on a node.
10187
10188   """
10189   REQ_BGL = False
10190
10191   def CheckArguments(self):
10192     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10193
10194     storage_type = self.op.storage_type
10195
10196     if (constants.SO_FIX_CONSISTENCY not in
10197         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10198       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10199                                  " repaired" % storage_type,
10200                                  errors.ECODE_INVAL)
10201
10202   def ExpandNames(self):
10203     self.needed_locks = {
10204       locking.LEVEL_NODE: [self.op.node_name],
10205       }
10206
10207   def _CheckFaultyDisks(self, instance, node_name):
10208     """Ensure faulty disks abort the opcode or at least warn."""
10209     try:
10210       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10211                                   node_name, True):
10212         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10213                                    " node '%s'" % (instance.name, node_name),
10214                                    errors.ECODE_STATE)
10215     except errors.OpPrereqError, err:
10216       if self.op.ignore_consistency:
10217         self.proc.LogWarning(str(err.args[0]))
10218       else:
10219         raise
10220
10221   def CheckPrereq(self):
10222     """Check prerequisites.
10223
10224     """
10225     # Check whether any instance on this node has faulty disks
10226     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10227       if not inst.admin_up:
10228         continue
10229       check_nodes = set(inst.all_nodes)
10230       check_nodes.discard(self.op.node_name)
10231       for inst_node_name in check_nodes:
10232         self._CheckFaultyDisks(inst, inst_node_name)
10233
10234   def Exec(self, feedback_fn):
10235     feedback_fn("Repairing storage unit '%s' on %s ..." %
10236                 (self.op.name, self.op.node_name))
10237
10238     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10239     result = self.rpc.call_storage_execute(self.op.node_name,
10240                                            self.op.storage_type, st_args,
10241                                            self.op.name,
10242                                            constants.SO_FIX_CONSISTENCY)
10243     result.Raise("Failed to repair storage unit '%s' on %s" %
10244                  (self.op.name, self.op.node_name))
10245
10246
10247 class LUNodeEvacuate(NoHooksLU):
10248   """Evacuates instances off a list of nodes.
10249
10250   """
10251   REQ_BGL = False
10252
10253   def CheckArguments(self):
10254     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10255
10256   def ExpandNames(self):
10257     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10258
10259     if self.op.remote_node is not None:
10260       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10261       assert self.op.remote_node
10262
10263       if self.op.remote_node == self.op.node_name:
10264         raise errors.OpPrereqError("Can not use evacuated node as a new"
10265                                    " secondary node", errors.ECODE_INVAL)
10266
10267       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10268         raise errors.OpPrereqError("Without the use of an iallocator only"
10269                                    " secondary instances can be evacuated",
10270                                    errors.ECODE_INVAL)
10271
10272     # Declare locks
10273     self.share_locks = _ShareAll()
10274     self.needed_locks = {
10275       locking.LEVEL_INSTANCE: [],
10276       locking.LEVEL_NODEGROUP: [],
10277       locking.LEVEL_NODE: [],
10278       }
10279
10280     if self.op.remote_node is None:
10281       # Iallocator will choose any node(s) in the same group
10282       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10283     else:
10284       group_nodes = frozenset([self.op.remote_node])
10285
10286     # Determine nodes to be locked
10287     self.lock_nodes = set([self.op.node_name]) | group_nodes
10288
10289   def _DetermineInstances(self):
10290     """Builds list of instances to operate on.
10291
10292     """
10293     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10294
10295     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10296       # Primary instances only
10297       inst_fn = _GetNodePrimaryInstances
10298       assert self.op.remote_node is None, \
10299         "Evacuating primary instances requires iallocator"
10300     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10301       # Secondary instances only
10302       inst_fn = _GetNodeSecondaryInstances
10303     else:
10304       # All instances
10305       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10306       inst_fn = _GetNodeInstances
10307
10308     return inst_fn(self.cfg, self.op.node_name)
10309
10310   def DeclareLocks(self, level):
10311     if level == locking.LEVEL_INSTANCE:
10312       # Lock instances optimistically, needs verification once node and group
10313       # locks have been acquired
10314       self.needed_locks[locking.LEVEL_INSTANCE] = \
10315         set(i.name for i in self._DetermineInstances())
10316
10317     elif level == locking.LEVEL_NODEGROUP:
10318       # Lock node groups optimistically, needs verification once nodes have
10319       # been acquired
10320       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10321         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10322
10323     elif level == locking.LEVEL_NODE:
10324       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10325
10326   def CheckPrereq(self):
10327     # Verify locks
10328     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10329     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10330     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10331
10332     assert owned_nodes == self.lock_nodes
10333
10334     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10335     if owned_groups != wanted_groups:
10336       raise errors.OpExecError("Node groups changed since locks were acquired,"
10337                                " current groups are '%s', used to be '%s'" %
10338                                (utils.CommaJoin(wanted_groups),
10339                                 utils.CommaJoin(owned_groups)))
10340
10341     # Determine affected instances
10342     self.instances = self._DetermineInstances()
10343     self.instance_names = [i.name for i in self.instances]
10344
10345     if set(self.instance_names) != owned_instances:
10346       raise errors.OpExecError("Instances on node '%s' changed since locks"
10347                                " were acquired, current instances are '%s',"
10348                                " used to be '%s'" %
10349                                (self.op.node_name,
10350                                 utils.CommaJoin(self.instance_names),
10351                                 utils.CommaJoin(owned_instances)))
10352
10353     if self.instance_names:
10354       self.LogInfo("Evacuating instances from node '%s': %s",
10355                    self.op.node_name,
10356                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10357     else:
10358       self.LogInfo("No instances to evacuate from node '%s'",
10359                    self.op.node_name)
10360
10361     if self.op.remote_node is not None:
10362       for i in self.instances:
10363         if i.primary_node == self.op.remote_node:
10364           raise errors.OpPrereqError("Node %s is the primary node of"
10365                                      " instance %s, cannot use it as"
10366                                      " secondary" %
10367                                      (self.op.remote_node, i.name),
10368                                      errors.ECODE_INVAL)
10369
10370   def Exec(self, feedback_fn):
10371     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10372
10373     if not self.instance_names:
10374       # No instances to evacuate
10375       jobs = []
10376
10377     elif self.op.iallocator is not None:
10378       # TODO: Implement relocation to other group
10379       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10380                        evac_mode=self.op.mode,
10381                        instances=list(self.instance_names))
10382
10383       ial.Run(self.op.iallocator)
10384
10385       if not ial.success:
10386         raise errors.OpPrereqError("Can't compute node evacuation using"
10387                                    " iallocator '%s': %s" %
10388                                    (self.op.iallocator, ial.info),
10389                                    errors.ECODE_NORES)
10390
10391       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10392
10393     elif self.op.remote_node is not None:
10394       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10395       jobs = [
10396         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10397                                         remote_node=self.op.remote_node,
10398                                         disks=[],
10399                                         mode=constants.REPLACE_DISK_CHG,
10400                                         early_release=self.op.early_release)]
10401         for instance_name in self.instance_names
10402         ]
10403
10404     else:
10405       raise errors.ProgrammerError("No iallocator or remote node")
10406
10407     return ResultWithJobs(jobs)
10408
10409
10410 def _SetOpEarlyRelease(early_release, op):
10411   """Sets C{early_release} flag on opcodes if available.
10412
10413   """
10414   try:
10415     op.early_release = early_release
10416   except AttributeError:
10417     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10418
10419   return op
10420
10421
10422 def _NodeEvacDest(use_nodes, group, nodes):
10423   """Returns group or nodes depending on caller's choice.
10424
10425   """
10426   if use_nodes:
10427     return utils.CommaJoin(nodes)
10428   else:
10429     return group
10430
10431
10432 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10433   """Unpacks the result of change-group and node-evacuate iallocator requests.
10434
10435   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10436   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10437
10438   @type lu: L{LogicalUnit}
10439   @param lu: Logical unit instance
10440   @type alloc_result: tuple/list
10441   @param alloc_result: Result from iallocator
10442   @type early_release: bool
10443   @param early_release: Whether to release locks early if possible
10444   @type use_nodes: bool
10445   @param use_nodes: Whether to display node names instead of groups
10446
10447   """
10448   (moved, failed, jobs) = alloc_result
10449
10450   if failed:
10451     lu.LogWarning("Unable to evacuate instances %s",
10452                   utils.CommaJoin("%s (%s)" % (name, reason)
10453                                   for (name, reason) in failed))
10454
10455   if moved:
10456     lu.LogInfo("Instances to be moved: %s",
10457                utils.CommaJoin("%s (to %s)" %
10458                                (name, _NodeEvacDest(use_nodes, group, nodes))
10459                                for (name, group, nodes) in moved))
10460
10461   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10462               map(opcodes.OpCode.LoadOpCode, ops))
10463           for ops in jobs]
10464
10465
10466 class LUInstanceGrowDisk(LogicalUnit):
10467   """Grow a disk of an instance.
10468
10469   """
10470   HPATH = "disk-grow"
10471   HTYPE = constants.HTYPE_INSTANCE
10472   REQ_BGL = False
10473
10474   def ExpandNames(self):
10475     self._ExpandAndLockInstance()
10476     self.needed_locks[locking.LEVEL_NODE] = []
10477     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10478
10479   def DeclareLocks(self, level):
10480     if level == locking.LEVEL_NODE:
10481       self._LockInstancesNodes()
10482
10483   def BuildHooksEnv(self):
10484     """Build hooks env.
10485
10486     This runs on the master, the primary and all the secondaries.
10487
10488     """
10489     env = {
10490       "DISK": self.op.disk,
10491       "AMOUNT": self.op.amount,
10492       }
10493     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10494     return env
10495
10496   def BuildHooksNodes(self):
10497     """Build hooks nodes.
10498
10499     """
10500     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10501     return (nl, nl)
10502
10503   def CheckPrereq(self):
10504     """Check prerequisites.
10505
10506     This checks that the instance is in the cluster.
10507
10508     """
10509     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10510     assert instance is not None, \
10511       "Cannot retrieve locked instance %s" % self.op.instance_name
10512     nodenames = list(instance.all_nodes)
10513     for node in nodenames:
10514       _CheckNodeOnline(self, node)
10515
10516     self.instance = instance
10517
10518     if instance.disk_template not in constants.DTS_GROWABLE:
10519       raise errors.OpPrereqError("Instance's disk layout does not support"
10520                                  " growing", errors.ECODE_INVAL)
10521
10522     self.disk = instance.FindDisk(self.op.disk)
10523
10524     if instance.disk_template not in (constants.DT_FILE,
10525                                       constants.DT_SHARED_FILE):
10526       # TODO: check the free disk space for file, when that feature will be
10527       # supported
10528       _CheckNodesFreeDiskPerVG(self, nodenames,
10529                                self.disk.ComputeGrowth(self.op.amount))
10530
10531   def Exec(self, feedback_fn):
10532     """Execute disk grow.
10533
10534     """
10535     instance = self.instance
10536     disk = self.disk
10537
10538     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10539     if not disks_ok:
10540       raise errors.OpExecError("Cannot activate block device to grow")
10541
10542     # First run all grow ops in dry-run mode
10543     for node in instance.all_nodes:
10544       self.cfg.SetDiskID(disk, node)
10545       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10546       result.Raise("Grow request failed to node %s" % node)
10547
10548     # We know that (as far as we can test) operations across different
10549     # nodes will succeed, time to run it for real
10550     for node in instance.all_nodes:
10551       self.cfg.SetDiskID(disk, node)
10552       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10553       result.Raise("Grow request failed to node %s" % node)
10554
10555       # TODO: Rewrite code to work properly
10556       # DRBD goes into sync mode for a short amount of time after executing the
10557       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10558       # calling "resize" in sync mode fails. Sleeping for a short amount of
10559       # time is a work-around.
10560       time.sleep(5)
10561
10562     disk.RecordGrow(self.op.amount)
10563     self.cfg.Update(instance, feedback_fn)
10564     if self.op.wait_for_sync:
10565       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10566       if disk_abort:
10567         self.proc.LogWarning("Disk sync-ing has not returned a good"
10568                              " status; please check the instance")
10569       if not instance.admin_up:
10570         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10571     elif not instance.admin_up:
10572       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10573                            " not supposed to be running because no wait for"
10574                            " sync mode was requested")
10575
10576
10577 class LUInstanceQueryData(NoHooksLU):
10578   """Query runtime instance data.
10579
10580   """
10581   REQ_BGL = False
10582
10583   def ExpandNames(self):
10584     self.needed_locks = {}
10585
10586     # Use locking if requested or when non-static information is wanted
10587     if not (self.op.static or self.op.use_locking):
10588       self.LogWarning("Non-static data requested, locks need to be acquired")
10589       self.op.use_locking = True
10590
10591     if self.op.instances or not self.op.use_locking:
10592       # Expand instance names right here
10593       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10594     else:
10595       # Will use acquired locks
10596       self.wanted_names = None
10597
10598     if self.op.use_locking:
10599       self.share_locks = _ShareAll()
10600
10601       if self.wanted_names is None:
10602         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10603       else:
10604         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10605
10606       self.needed_locks[locking.LEVEL_NODE] = []
10607       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10608
10609   def DeclareLocks(self, level):
10610     if self.op.use_locking and level == locking.LEVEL_NODE:
10611       self._LockInstancesNodes()
10612
10613   def CheckPrereq(self):
10614     """Check prerequisites.
10615
10616     This only checks the optional instance list against the existing names.
10617
10618     """
10619     if self.wanted_names is None:
10620       assert self.op.use_locking, "Locking was not used"
10621       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10622
10623     self.wanted_instances = \
10624         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10625
10626   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10627     """Returns the status of a block device
10628
10629     """
10630     if self.op.static or not node:
10631       return None
10632
10633     self.cfg.SetDiskID(dev, node)
10634
10635     result = self.rpc.call_blockdev_find(node, dev)
10636     if result.offline:
10637       return None
10638
10639     result.Raise("Can't compute disk status for %s" % instance_name)
10640
10641     status = result.payload
10642     if status is None:
10643       return None
10644
10645     return (status.dev_path, status.major, status.minor,
10646             status.sync_percent, status.estimated_time,
10647             status.is_degraded, status.ldisk_status)
10648
10649   def _ComputeDiskStatus(self, instance, snode, dev):
10650     """Compute block device status.
10651
10652     """
10653     if dev.dev_type in constants.LDS_DRBD:
10654       # we change the snode then (otherwise we use the one passed in)
10655       if dev.logical_id[0] == instance.primary_node:
10656         snode = dev.logical_id[1]
10657       else:
10658         snode = dev.logical_id[0]
10659
10660     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10661                                               instance.name, dev)
10662     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10663
10664     if dev.children:
10665       dev_children = map(compat.partial(self._ComputeDiskStatus,
10666                                         instance, snode),
10667                          dev.children)
10668     else:
10669       dev_children = []
10670
10671     return {
10672       "iv_name": dev.iv_name,
10673       "dev_type": dev.dev_type,
10674       "logical_id": dev.logical_id,
10675       "physical_id": dev.physical_id,
10676       "pstatus": dev_pstatus,
10677       "sstatus": dev_sstatus,
10678       "children": dev_children,
10679       "mode": dev.mode,
10680       "size": dev.size,
10681       }
10682
10683   def Exec(self, feedback_fn):
10684     """Gather and return data"""
10685     result = {}
10686
10687     cluster = self.cfg.GetClusterInfo()
10688
10689     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10690                                           for i in self.wanted_instances)
10691     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10692       if self.op.static or pnode.offline:
10693         remote_state = None
10694         if pnode.offline:
10695           self.LogWarning("Primary node %s is marked offline, returning static"
10696                           " information only for instance %s" %
10697                           (pnode.name, instance.name))
10698       else:
10699         remote_info = self.rpc.call_instance_info(instance.primary_node,
10700                                                   instance.name,
10701                                                   instance.hypervisor)
10702         remote_info.Raise("Error checking node %s" % instance.primary_node)
10703         remote_info = remote_info.payload
10704         if remote_info and "state" in remote_info:
10705           remote_state = "up"
10706         else:
10707           remote_state = "down"
10708
10709       if instance.admin_up:
10710         config_state = "up"
10711       else:
10712         config_state = "down"
10713
10714       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10715                   instance.disks)
10716
10717       result[instance.name] = {
10718         "name": instance.name,
10719         "config_state": config_state,
10720         "run_state": remote_state,
10721         "pnode": instance.primary_node,
10722         "snodes": instance.secondary_nodes,
10723         "os": instance.os,
10724         # this happens to be the same format used for hooks
10725         "nics": _NICListToTuple(self, instance.nics),
10726         "disk_template": instance.disk_template,
10727         "disks": disks,
10728         "hypervisor": instance.hypervisor,
10729         "network_port": instance.network_port,
10730         "hv_instance": instance.hvparams,
10731         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10732         "be_instance": instance.beparams,
10733         "be_actual": cluster.FillBE(instance),
10734         "os_instance": instance.osparams,
10735         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10736         "serial_no": instance.serial_no,
10737         "mtime": instance.mtime,
10738         "ctime": instance.ctime,
10739         "uuid": instance.uuid,
10740         }
10741
10742     return result
10743
10744
10745 class LUInstanceSetParams(LogicalUnit):
10746   """Modifies an instances's parameters.
10747
10748   """
10749   HPATH = "instance-modify"
10750   HTYPE = constants.HTYPE_INSTANCE
10751   REQ_BGL = False
10752
10753   def CheckArguments(self):
10754     if not (self.op.nics or self.op.disks or self.op.disk_template or
10755             self.op.hvparams or self.op.beparams or self.op.os_name):
10756       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10757
10758     if self.op.hvparams:
10759       _CheckGlobalHvParams(self.op.hvparams)
10760
10761     # Disk validation
10762     disk_addremove = 0
10763     for disk_op, disk_dict in self.op.disks:
10764       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10765       if disk_op == constants.DDM_REMOVE:
10766         disk_addremove += 1
10767         continue
10768       elif disk_op == constants.DDM_ADD:
10769         disk_addremove += 1
10770       else:
10771         if not isinstance(disk_op, int):
10772           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10773         if not isinstance(disk_dict, dict):
10774           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10775           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10776
10777       if disk_op == constants.DDM_ADD:
10778         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10779         if mode not in constants.DISK_ACCESS_SET:
10780           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10781                                      errors.ECODE_INVAL)
10782         size = disk_dict.get(constants.IDISK_SIZE, None)
10783         if size is None:
10784           raise errors.OpPrereqError("Required disk parameter size missing",
10785                                      errors.ECODE_INVAL)
10786         try:
10787           size = int(size)
10788         except (TypeError, ValueError), err:
10789           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10790                                      str(err), errors.ECODE_INVAL)
10791         disk_dict[constants.IDISK_SIZE] = size
10792       else:
10793         # modification of disk
10794         if constants.IDISK_SIZE in disk_dict:
10795           raise errors.OpPrereqError("Disk size change not possible, use"
10796                                      " grow-disk", errors.ECODE_INVAL)
10797
10798     if disk_addremove > 1:
10799       raise errors.OpPrereqError("Only one disk add or remove operation"
10800                                  " supported at a time", errors.ECODE_INVAL)
10801
10802     if self.op.disks and self.op.disk_template is not None:
10803       raise errors.OpPrereqError("Disk template conversion and other disk"
10804                                  " changes not supported at the same time",
10805                                  errors.ECODE_INVAL)
10806
10807     if (self.op.disk_template and
10808         self.op.disk_template in constants.DTS_INT_MIRROR and
10809         self.op.remote_node is None):
10810       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10811                                  " one requires specifying a secondary node",
10812                                  errors.ECODE_INVAL)
10813
10814     # NIC validation
10815     nic_addremove = 0
10816     for nic_op, nic_dict in self.op.nics:
10817       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10818       if nic_op == constants.DDM_REMOVE:
10819         nic_addremove += 1
10820         continue
10821       elif nic_op == constants.DDM_ADD:
10822         nic_addremove += 1
10823       else:
10824         if not isinstance(nic_op, int):
10825           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10826         if not isinstance(nic_dict, dict):
10827           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10828           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10829
10830       # nic_dict should be a dict
10831       nic_ip = nic_dict.get(constants.INIC_IP, None)
10832       if nic_ip is not None:
10833         if nic_ip.lower() == constants.VALUE_NONE:
10834           nic_dict[constants.INIC_IP] = None
10835         else:
10836           if not netutils.IPAddress.IsValid(nic_ip):
10837             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10838                                        errors.ECODE_INVAL)
10839
10840       nic_bridge = nic_dict.get("bridge", None)
10841       nic_link = nic_dict.get(constants.INIC_LINK, None)
10842       if nic_bridge and nic_link:
10843         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10844                                    " at the same time", errors.ECODE_INVAL)
10845       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10846         nic_dict["bridge"] = None
10847       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10848         nic_dict[constants.INIC_LINK] = None
10849
10850       if nic_op == constants.DDM_ADD:
10851         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10852         if nic_mac is None:
10853           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10854
10855       if constants.INIC_MAC in nic_dict:
10856         nic_mac = nic_dict[constants.INIC_MAC]
10857         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10858           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10859
10860         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10861           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10862                                      " modifying an existing nic",
10863                                      errors.ECODE_INVAL)
10864
10865     if nic_addremove > 1:
10866       raise errors.OpPrereqError("Only one NIC add or remove operation"
10867                                  " supported at a time", errors.ECODE_INVAL)
10868
10869   def ExpandNames(self):
10870     self._ExpandAndLockInstance()
10871     self.needed_locks[locking.LEVEL_NODE] = []
10872     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10873
10874   def DeclareLocks(self, level):
10875     if level == locking.LEVEL_NODE:
10876       self._LockInstancesNodes()
10877       if self.op.disk_template and self.op.remote_node:
10878         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10879         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10880
10881   def BuildHooksEnv(self):
10882     """Build hooks env.
10883
10884     This runs on the master, primary and secondaries.
10885
10886     """
10887     args = dict()
10888     if constants.BE_MEMORY in self.be_new:
10889       args["memory"] = self.be_new[constants.BE_MEMORY]
10890     if constants.BE_VCPUS in self.be_new:
10891       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10892     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10893     # information at all.
10894     if self.op.nics:
10895       args["nics"] = []
10896       nic_override = dict(self.op.nics)
10897       for idx, nic in enumerate(self.instance.nics):
10898         if idx in nic_override:
10899           this_nic_override = nic_override[idx]
10900         else:
10901           this_nic_override = {}
10902         if constants.INIC_IP in this_nic_override:
10903           ip = this_nic_override[constants.INIC_IP]
10904         else:
10905           ip = nic.ip
10906         if constants.INIC_MAC in this_nic_override:
10907           mac = this_nic_override[constants.INIC_MAC]
10908         else:
10909           mac = nic.mac
10910         if idx in self.nic_pnew:
10911           nicparams = self.nic_pnew[idx]
10912         else:
10913           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10914         mode = nicparams[constants.NIC_MODE]
10915         link = nicparams[constants.NIC_LINK]
10916         args["nics"].append((ip, mac, mode, link))
10917       if constants.DDM_ADD in nic_override:
10918         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10919         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10920         nicparams = self.nic_pnew[constants.DDM_ADD]
10921         mode = nicparams[constants.NIC_MODE]
10922         link = nicparams[constants.NIC_LINK]
10923         args["nics"].append((ip, mac, mode, link))
10924       elif constants.DDM_REMOVE in nic_override:
10925         del args["nics"][-1]
10926
10927     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10928     if self.op.disk_template:
10929       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10930
10931     return env
10932
10933   def BuildHooksNodes(self):
10934     """Build hooks nodes.
10935
10936     """
10937     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10938     return (nl, nl)
10939
10940   def CheckPrereq(self):
10941     """Check prerequisites.
10942
10943     This only checks the instance list against the existing names.
10944
10945     """
10946     # checking the new params on the primary/secondary nodes
10947
10948     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10949     cluster = self.cluster = self.cfg.GetClusterInfo()
10950     assert self.instance is not None, \
10951       "Cannot retrieve locked instance %s" % self.op.instance_name
10952     pnode = instance.primary_node
10953     nodelist = list(instance.all_nodes)
10954
10955     # OS change
10956     if self.op.os_name and not self.op.force:
10957       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10958                       self.op.force_variant)
10959       instance_os = self.op.os_name
10960     else:
10961       instance_os = instance.os
10962
10963     if self.op.disk_template:
10964       if instance.disk_template == self.op.disk_template:
10965         raise errors.OpPrereqError("Instance already has disk template %s" %
10966                                    instance.disk_template, errors.ECODE_INVAL)
10967
10968       if (instance.disk_template,
10969           self.op.disk_template) not in self._DISK_CONVERSIONS:
10970         raise errors.OpPrereqError("Unsupported disk template conversion from"
10971                                    " %s to %s" % (instance.disk_template,
10972                                                   self.op.disk_template),
10973                                    errors.ECODE_INVAL)
10974       _CheckInstanceDown(self, instance, "cannot change disk template")
10975       if self.op.disk_template in constants.DTS_INT_MIRROR:
10976         if self.op.remote_node == pnode:
10977           raise errors.OpPrereqError("Given new secondary node %s is the same"
10978                                      " as the primary node of the instance" %
10979                                      self.op.remote_node, errors.ECODE_STATE)
10980         _CheckNodeOnline(self, self.op.remote_node)
10981         _CheckNodeNotDrained(self, self.op.remote_node)
10982         # FIXME: here we assume that the old instance type is DT_PLAIN
10983         assert instance.disk_template == constants.DT_PLAIN
10984         disks = [{constants.IDISK_SIZE: d.size,
10985                   constants.IDISK_VG: d.logical_id[0]}
10986                  for d in instance.disks]
10987         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10988         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10989
10990     # hvparams processing
10991     if self.op.hvparams:
10992       hv_type = instance.hypervisor
10993       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10994       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10995       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10996
10997       # local check
10998       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10999       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11000       self.hv_proposed = self.hv_new = hv_new # the new actual values
11001       self.hv_inst = i_hvdict # the new dict (without defaults)
11002     else:
11003       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11004                                               instance.hvparams)
11005       self.hv_new = self.hv_inst = {}
11006
11007     # beparams processing
11008     if self.op.beparams:
11009       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11010                                    use_none=True)
11011       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11012       be_new = cluster.SimpleFillBE(i_bedict)
11013       self.be_proposed = self.be_new = be_new # the new actual values
11014       self.be_inst = i_bedict # the new dict (without defaults)
11015     else:
11016       self.be_new = self.be_inst = {}
11017       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11018     be_old = cluster.FillBE(instance)
11019
11020     # CPU param validation -- checking every time a paramtere is
11021     # changed to cover all cases where either CPU mask or vcpus have
11022     # changed
11023     if (constants.BE_VCPUS in self.be_proposed and
11024         constants.HV_CPU_MASK in self.hv_proposed):
11025       cpu_list = \
11026         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11027       # Verify mask is consistent with number of vCPUs. Can skip this
11028       # test if only 1 entry in the CPU mask, which means same mask
11029       # is applied to all vCPUs.
11030       if (len(cpu_list) > 1 and
11031           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11032         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11033                                    " CPU mask [%s]" %
11034                                    (self.be_proposed[constants.BE_VCPUS],
11035                                     self.hv_proposed[constants.HV_CPU_MASK]),
11036                                    errors.ECODE_INVAL)
11037
11038       # Only perform this test if a new CPU mask is given
11039       if constants.HV_CPU_MASK in self.hv_new:
11040         # Calculate the largest CPU number requested
11041         max_requested_cpu = max(map(max, cpu_list))
11042         # Check that all of the instance's nodes have enough physical CPUs to
11043         # satisfy the requested CPU mask
11044         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11045                                 max_requested_cpu + 1, instance.hypervisor)
11046
11047     # osparams processing
11048     if self.op.osparams:
11049       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11050       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11051       self.os_inst = i_osdict # the new dict (without defaults)
11052     else:
11053       self.os_inst = {}
11054
11055     self.warn = []
11056
11057     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11058         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11059       mem_check_list = [pnode]
11060       if be_new[constants.BE_AUTO_BALANCE]:
11061         # either we changed auto_balance to yes or it was from before
11062         mem_check_list.extend(instance.secondary_nodes)
11063       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11064                                                   instance.hypervisor)
11065       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11066                                          instance.hypervisor)
11067       pninfo = nodeinfo[pnode]
11068       msg = pninfo.fail_msg
11069       if msg:
11070         # Assume the primary node is unreachable and go ahead
11071         self.warn.append("Can't get info from primary node %s: %s" %
11072                          (pnode, msg))
11073       elif not isinstance(pninfo.payload.get("memory_free", None), int):
11074         self.warn.append("Node data from primary node %s doesn't contain"
11075                          " free memory information" % pnode)
11076       elif instance_info.fail_msg:
11077         self.warn.append("Can't get instance runtime information: %s" %
11078                         instance_info.fail_msg)
11079       else:
11080         if instance_info.payload:
11081           current_mem = int(instance_info.payload["memory"])
11082         else:
11083           # Assume instance not running
11084           # (there is a slight race condition here, but it's not very probable,
11085           # and we have no other way to check)
11086           current_mem = 0
11087         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11088                     pninfo.payload["memory_free"])
11089         if miss_mem > 0:
11090           raise errors.OpPrereqError("This change will prevent the instance"
11091                                      " from starting, due to %d MB of memory"
11092                                      " missing on its primary node" % miss_mem,
11093                                      errors.ECODE_NORES)
11094
11095       if be_new[constants.BE_AUTO_BALANCE]:
11096         for node, nres in nodeinfo.items():
11097           if node not in instance.secondary_nodes:
11098             continue
11099           nres.Raise("Can't get info from secondary node %s" % node,
11100                      prereq=True, ecode=errors.ECODE_STATE)
11101           if not isinstance(nres.payload.get("memory_free", None), int):
11102             raise errors.OpPrereqError("Secondary node %s didn't return free"
11103                                        " memory information" % node,
11104                                        errors.ECODE_STATE)
11105           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11106             raise errors.OpPrereqError("This change will prevent the instance"
11107                                        " from failover to its secondary node"
11108                                        " %s, due to not enough memory" % node,
11109                                        errors.ECODE_STATE)
11110
11111     # NIC processing
11112     self.nic_pnew = {}
11113     self.nic_pinst = {}
11114     for nic_op, nic_dict in self.op.nics:
11115       if nic_op == constants.DDM_REMOVE:
11116         if not instance.nics:
11117           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11118                                      errors.ECODE_INVAL)
11119         continue
11120       if nic_op != constants.DDM_ADD:
11121         # an existing nic
11122         if not instance.nics:
11123           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11124                                      " no NICs" % nic_op,
11125                                      errors.ECODE_INVAL)
11126         if nic_op < 0 or nic_op >= len(instance.nics):
11127           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11128                                      " are 0 to %d" %
11129                                      (nic_op, len(instance.nics) - 1),
11130                                      errors.ECODE_INVAL)
11131         old_nic_params = instance.nics[nic_op].nicparams
11132         old_nic_ip = instance.nics[nic_op].ip
11133       else:
11134         old_nic_params = {}
11135         old_nic_ip = None
11136
11137       update_params_dict = dict([(key, nic_dict[key])
11138                                  for key in constants.NICS_PARAMETERS
11139                                  if key in nic_dict])
11140
11141       if "bridge" in nic_dict:
11142         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11143
11144       new_nic_params = _GetUpdatedParams(old_nic_params,
11145                                          update_params_dict)
11146       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11147       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11148       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11149       self.nic_pinst[nic_op] = new_nic_params
11150       self.nic_pnew[nic_op] = new_filled_nic_params
11151       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11152
11153       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11154         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11155         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11156         if msg:
11157           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11158           if self.op.force:
11159             self.warn.append(msg)
11160           else:
11161             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11162       if new_nic_mode == constants.NIC_MODE_ROUTED:
11163         if constants.INIC_IP in nic_dict:
11164           nic_ip = nic_dict[constants.INIC_IP]
11165         else:
11166           nic_ip = old_nic_ip
11167         if nic_ip is None:
11168           raise errors.OpPrereqError("Cannot set the nic ip to None"
11169                                      " on a routed nic", errors.ECODE_INVAL)
11170       if constants.INIC_MAC in nic_dict:
11171         nic_mac = nic_dict[constants.INIC_MAC]
11172         if nic_mac is None:
11173           raise errors.OpPrereqError("Cannot set the nic mac to None",
11174                                      errors.ECODE_INVAL)
11175         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11176           # otherwise generate the mac
11177           nic_dict[constants.INIC_MAC] = \
11178             self.cfg.GenerateMAC(self.proc.GetECId())
11179         else:
11180           # or validate/reserve the current one
11181           try:
11182             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11183           except errors.ReservationError:
11184             raise errors.OpPrereqError("MAC address %s already in use"
11185                                        " in cluster" % nic_mac,
11186                                        errors.ECODE_NOTUNIQUE)
11187
11188     # DISK processing
11189     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11190       raise errors.OpPrereqError("Disk operations not supported for"
11191                                  " diskless instances",
11192                                  errors.ECODE_INVAL)
11193     for disk_op, _ in self.op.disks:
11194       if disk_op == constants.DDM_REMOVE:
11195         if len(instance.disks) == 1:
11196           raise errors.OpPrereqError("Cannot remove the last disk of"
11197                                      " an instance", errors.ECODE_INVAL)
11198         _CheckInstanceDown(self, instance, "cannot remove disks")
11199
11200       if (disk_op == constants.DDM_ADD and
11201           len(instance.disks) >= constants.MAX_DISKS):
11202         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11203                                    " add more" % constants.MAX_DISKS,
11204                                    errors.ECODE_STATE)
11205       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11206         # an existing disk
11207         if disk_op < 0 or disk_op >= len(instance.disks):
11208           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11209                                      " are 0 to %d" %
11210                                      (disk_op, len(instance.disks)),
11211                                      errors.ECODE_INVAL)
11212
11213     return
11214
11215   def _ConvertPlainToDrbd(self, feedback_fn):
11216     """Converts an instance from plain to drbd.
11217
11218     """
11219     feedback_fn("Converting template to drbd")
11220     instance = self.instance
11221     pnode = instance.primary_node
11222     snode = self.op.remote_node
11223
11224     # create a fake disk info for _GenerateDiskTemplate
11225     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11226                   constants.IDISK_VG: d.logical_id[0]}
11227                  for d in instance.disks]
11228     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11229                                       instance.name, pnode, [snode],
11230                                       disk_info, None, None, 0, feedback_fn)
11231     info = _GetInstanceInfoText(instance)
11232     feedback_fn("Creating aditional volumes...")
11233     # first, create the missing data and meta devices
11234     for disk in new_disks:
11235       # unfortunately this is... not too nice
11236       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11237                             info, True)
11238       for child in disk.children:
11239         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11240     # at this stage, all new LVs have been created, we can rename the
11241     # old ones
11242     feedback_fn("Renaming original volumes...")
11243     rename_list = [(o, n.children[0].logical_id)
11244                    for (o, n) in zip(instance.disks, new_disks)]
11245     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11246     result.Raise("Failed to rename original LVs")
11247
11248     feedback_fn("Initializing DRBD devices...")
11249     # all child devices are in place, we can now create the DRBD devices
11250     for disk in new_disks:
11251       for node in [pnode, snode]:
11252         f_create = node == pnode
11253         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11254
11255     # at this point, the instance has been modified
11256     instance.disk_template = constants.DT_DRBD8
11257     instance.disks = new_disks
11258     self.cfg.Update(instance, feedback_fn)
11259
11260     # disks are created, waiting for sync
11261     disk_abort = not _WaitForSync(self, instance,
11262                                   oneshot=not self.op.wait_for_sync)
11263     if disk_abort:
11264       raise errors.OpExecError("There are some degraded disks for"
11265                                " this instance, please cleanup manually")
11266
11267   def _ConvertDrbdToPlain(self, feedback_fn):
11268     """Converts an instance from drbd to plain.
11269
11270     """
11271     instance = self.instance
11272     assert len(instance.secondary_nodes) == 1
11273     pnode = instance.primary_node
11274     snode = instance.secondary_nodes[0]
11275     feedback_fn("Converting template to plain")
11276
11277     old_disks = instance.disks
11278     new_disks = [d.children[0] for d in old_disks]
11279
11280     # copy over size and mode
11281     for parent, child in zip(old_disks, new_disks):
11282       child.size = parent.size
11283       child.mode = parent.mode
11284
11285     # update instance structure
11286     instance.disks = new_disks
11287     instance.disk_template = constants.DT_PLAIN
11288     self.cfg.Update(instance, feedback_fn)
11289
11290     feedback_fn("Removing volumes on the secondary node...")
11291     for disk in old_disks:
11292       self.cfg.SetDiskID(disk, snode)
11293       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11294       if msg:
11295         self.LogWarning("Could not remove block device %s on node %s,"
11296                         " continuing anyway: %s", disk.iv_name, snode, msg)
11297
11298     feedback_fn("Removing unneeded volumes on the primary node...")
11299     for idx, disk in enumerate(old_disks):
11300       meta = disk.children[1]
11301       self.cfg.SetDiskID(meta, pnode)
11302       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11303       if msg:
11304         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11305                         " continuing anyway: %s", idx, pnode, msg)
11306
11307   def Exec(self, feedback_fn):
11308     """Modifies an instance.
11309
11310     All parameters take effect only at the next restart of the instance.
11311
11312     """
11313     # Process here the warnings from CheckPrereq, as we don't have a
11314     # feedback_fn there.
11315     for warn in self.warn:
11316       feedback_fn("WARNING: %s" % warn)
11317
11318     result = []
11319     instance = self.instance
11320     # disk changes
11321     for disk_op, disk_dict in self.op.disks:
11322       if disk_op == constants.DDM_REMOVE:
11323         # remove the last disk
11324         device = instance.disks.pop()
11325         device_idx = len(instance.disks)
11326         for node, disk in device.ComputeNodeTree(instance.primary_node):
11327           self.cfg.SetDiskID(disk, node)
11328           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11329           if msg:
11330             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11331                             " continuing anyway", device_idx, node, msg)
11332         result.append(("disk/%d" % device_idx, "remove"))
11333       elif disk_op == constants.DDM_ADD:
11334         # add a new disk
11335         if instance.disk_template in (constants.DT_FILE,
11336                                         constants.DT_SHARED_FILE):
11337           file_driver, file_path = instance.disks[0].logical_id
11338           file_path = os.path.dirname(file_path)
11339         else:
11340           file_driver = file_path = None
11341         disk_idx_base = len(instance.disks)
11342         new_disk = _GenerateDiskTemplate(self,
11343                                          instance.disk_template,
11344                                          instance.name, instance.primary_node,
11345                                          instance.secondary_nodes,
11346                                          [disk_dict],
11347                                          file_path,
11348                                          file_driver,
11349                                          disk_idx_base, feedback_fn)[0]
11350         instance.disks.append(new_disk)
11351         info = _GetInstanceInfoText(instance)
11352
11353         logging.info("Creating volume %s for instance %s",
11354                      new_disk.iv_name, instance.name)
11355         # Note: this needs to be kept in sync with _CreateDisks
11356         #HARDCODE
11357         for node in instance.all_nodes:
11358           f_create = node == instance.primary_node
11359           try:
11360             _CreateBlockDev(self, node, instance, new_disk,
11361                             f_create, info, f_create)
11362           except errors.OpExecError, err:
11363             self.LogWarning("Failed to create volume %s (%s) on"
11364                             " node %s: %s",
11365                             new_disk.iv_name, new_disk, node, err)
11366         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11367                        (new_disk.size, new_disk.mode)))
11368       else:
11369         # change a given disk
11370         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11371         result.append(("disk.mode/%d" % disk_op,
11372                        disk_dict[constants.IDISK_MODE]))
11373
11374     if self.op.disk_template:
11375       r_shut = _ShutdownInstanceDisks(self, instance)
11376       if not r_shut:
11377         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11378                                  " proceed with disk template conversion")
11379       mode = (instance.disk_template, self.op.disk_template)
11380       try:
11381         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11382       except:
11383         self.cfg.ReleaseDRBDMinors(instance.name)
11384         raise
11385       result.append(("disk_template", self.op.disk_template))
11386
11387     # NIC changes
11388     for nic_op, nic_dict in self.op.nics:
11389       if nic_op == constants.DDM_REMOVE:
11390         # remove the last nic
11391         del instance.nics[-1]
11392         result.append(("nic.%d" % len(instance.nics), "remove"))
11393       elif nic_op == constants.DDM_ADD:
11394         # mac and bridge should be set, by now
11395         mac = nic_dict[constants.INIC_MAC]
11396         ip = nic_dict.get(constants.INIC_IP, None)
11397         nicparams = self.nic_pinst[constants.DDM_ADD]
11398         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11399         instance.nics.append(new_nic)
11400         result.append(("nic.%d" % (len(instance.nics) - 1),
11401                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11402                        (new_nic.mac, new_nic.ip,
11403                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11404                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11405                        )))
11406       else:
11407         for key in (constants.INIC_MAC, constants.INIC_IP):
11408           if key in nic_dict:
11409             setattr(instance.nics[nic_op], key, nic_dict[key])
11410         if nic_op in self.nic_pinst:
11411           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11412         for key, val in nic_dict.iteritems():
11413           result.append(("nic.%s/%d" % (key, nic_op), val))
11414
11415     # hvparams changes
11416     if self.op.hvparams:
11417       instance.hvparams = self.hv_inst
11418       for key, val in self.op.hvparams.iteritems():
11419         result.append(("hv/%s" % key, val))
11420
11421     # beparams changes
11422     if self.op.beparams:
11423       instance.beparams = self.be_inst
11424       for key, val in self.op.beparams.iteritems():
11425         result.append(("be/%s" % key, val))
11426
11427     # OS change
11428     if self.op.os_name:
11429       instance.os = self.op.os_name
11430
11431     # osparams changes
11432     if self.op.osparams:
11433       instance.osparams = self.os_inst
11434       for key, val in self.op.osparams.iteritems():
11435         result.append(("os/%s" % key, val))
11436
11437     self.cfg.Update(instance, feedback_fn)
11438
11439     return result
11440
11441   _DISK_CONVERSIONS = {
11442     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11443     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11444     }
11445
11446
11447 class LUInstanceChangeGroup(LogicalUnit):
11448   HPATH = "instance-change-group"
11449   HTYPE = constants.HTYPE_INSTANCE
11450   REQ_BGL = False
11451
11452   def ExpandNames(self):
11453     self.share_locks = _ShareAll()
11454     self.needed_locks = {
11455       locking.LEVEL_NODEGROUP: [],
11456       locking.LEVEL_NODE: [],
11457       }
11458
11459     self._ExpandAndLockInstance()
11460
11461     if self.op.target_groups:
11462       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11463                                   self.op.target_groups)
11464     else:
11465       self.req_target_uuids = None
11466
11467     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11468
11469   def DeclareLocks(self, level):
11470     if level == locking.LEVEL_NODEGROUP:
11471       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11472
11473       if self.req_target_uuids:
11474         lock_groups = set(self.req_target_uuids)
11475
11476         # Lock all groups used by instance optimistically; this requires going
11477         # via the node before it's locked, requiring verification later on
11478         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11479         lock_groups.update(instance_groups)
11480       else:
11481         # No target groups, need to lock all of them
11482         lock_groups = locking.ALL_SET
11483
11484       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11485
11486     elif level == locking.LEVEL_NODE:
11487       if self.req_target_uuids:
11488         # Lock all nodes used by instances
11489         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11490         self._LockInstancesNodes()
11491
11492         # Lock all nodes in all potential target groups
11493         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11494                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11495         member_nodes = [node_name
11496                         for group in lock_groups
11497                         for node_name in self.cfg.GetNodeGroup(group).members]
11498         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11499       else:
11500         # Lock all nodes as all groups are potential targets
11501         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11502
11503   def CheckPrereq(self):
11504     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11505     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11506     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11507
11508     assert (self.req_target_uuids is None or
11509             owned_groups.issuperset(self.req_target_uuids))
11510     assert owned_instances == set([self.op.instance_name])
11511
11512     # Get instance information
11513     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11514
11515     # Check if node groups for locked instance are still correct
11516     assert owned_nodes.issuperset(self.instance.all_nodes), \
11517       ("Instance %s's nodes changed while we kept the lock" %
11518        self.op.instance_name)
11519
11520     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11521                                            owned_groups)
11522
11523     if self.req_target_uuids:
11524       # User requested specific target groups
11525       self.target_uuids = self.req_target_uuids
11526     else:
11527       # All groups except those used by the instance are potential targets
11528       self.target_uuids = owned_groups - inst_groups
11529
11530     conflicting_groups = self.target_uuids & inst_groups
11531     if conflicting_groups:
11532       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11533                                  " used by the instance '%s'" %
11534                                  (utils.CommaJoin(conflicting_groups),
11535                                   self.op.instance_name),
11536                                  errors.ECODE_INVAL)
11537
11538     if not self.target_uuids:
11539       raise errors.OpPrereqError("There are no possible target groups",
11540                                  errors.ECODE_INVAL)
11541
11542   def BuildHooksEnv(self):
11543     """Build hooks env.
11544
11545     """
11546     assert self.target_uuids
11547
11548     env = {
11549       "TARGET_GROUPS": " ".join(self.target_uuids),
11550       }
11551
11552     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11553
11554     return env
11555
11556   def BuildHooksNodes(self):
11557     """Build hooks nodes.
11558
11559     """
11560     mn = self.cfg.GetMasterNode()
11561     return ([mn], [mn])
11562
11563   def Exec(self, feedback_fn):
11564     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11565
11566     assert instances == [self.op.instance_name], "Instance not locked"
11567
11568     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11569                      instances=instances, target_groups=list(self.target_uuids))
11570
11571     ial.Run(self.op.iallocator)
11572
11573     if not ial.success:
11574       raise errors.OpPrereqError("Can't compute solution for changing group of"
11575                                  " instance '%s' using iallocator '%s': %s" %
11576                                  (self.op.instance_name, self.op.iallocator,
11577                                   ial.info),
11578                                  errors.ECODE_NORES)
11579
11580     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11581
11582     self.LogInfo("Iallocator returned %s job(s) for changing group of"
11583                  " instance '%s'", len(jobs), self.op.instance_name)
11584
11585     return ResultWithJobs(jobs)
11586
11587
11588 class LUBackupQuery(NoHooksLU):
11589   """Query the exports list
11590
11591   """
11592   REQ_BGL = False
11593
11594   def ExpandNames(self):
11595     self.needed_locks = {}
11596     self.share_locks[locking.LEVEL_NODE] = 1
11597     if not self.op.nodes:
11598       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11599     else:
11600       self.needed_locks[locking.LEVEL_NODE] = \
11601         _GetWantedNodes(self, self.op.nodes)
11602
11603   def Exec(self, feedback_fn):
11604     """Compute the list of all the exported system images.
11605
11606     @rtype: dict
11607     @return: a dictionary with the structure node->(export-list)
11608         where export-list is a list of the instances exported on
11609         that node.
11610
11611     """
11612     self.nodes = self.owned_locks(locking.LEVEL_NODE)
11613     rpcresult = self.rpc.call_export_list(self.nodes)
11614     result = {}
11615     for node in rpcresult:
11616       if rpcresult[node].fail_msg:
11617         result[node] = False
11618       else:
11619         result[node] = rpcresult[node].payload
11620
11621     return result
11622
11623
11624 class LUBackupPrepare(NoHooksLU):
11625   """Prepares an instance for an export and returns useful information.
11626
11627   """
11628   REQ_BGL = False
11629
11630   def ExpandNames(self):
11631     self._ExpandAndLockInstance()
11632
11633   def CheckPrereq(self):
11634     """Check prerequisites.
11635
11636     """
11637     instance_name = self.op.instance_name
11638
11639     self.instance = self.cfg.GetInstanceInfo(instance_name)
11640     assert self.instance is not None, \
11641           "Cannot retrieve locked instance %s" % self.op.instance_name
11642     _CheckNodeOnline(self, self.instance.primary_node)
11643
11644     self._cds = _GetClusterDomainSecret()
11645
11646   def Exec(self, feedback_fn):
11647     """Prepares an instance for an export.
11648
11649     """
11650     instance = self.instance
11651
11652     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11653       salt = utils.GenerateSecret(8)
11654
11655       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11656       result = self.rpc.call_x509_cert_create(instance.primary_node,
11657                                               constants.RIE_CERT_VALIDITY)
11658       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11659
11660       (name, cert_pem) = result.payload
11661
11662       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11663                                              cert_pem)
11664
11665       return {
11666         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11667         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11668                           salt),
11669         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11670         }
11671
11672     return None
11673
11674
11675 class LUBackupExport(LogicalUnit):
11676   """Export an instance to an image in the cluster.
11677
11678   """
11679   HPATH = "instance-export"
11680   HTYPE = constants.HTYPE_INSTANCE
11681   REQ_BGL = False
11682
11683   def CheckArguments(self):
11684     """Check the arguments.
11685
11686     """
11687     self.x509_key_name = self.op.x509_key_name
11688     self.dest_x509_ca_pem = self.op.destination_x509_ca
11689
11690     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11691       if not self.x509_key_name:
11692         raise errors.OpPrereqError("Missing X509 key name for encryption",
11693                                    errors.ECODE_INVAL)
11694
11695       if not self.dest_x509_ca_pem:
11696         raise errors.OpPrereqError("Missing destination X509 CA",
11697                                    errors.ECODE_INVAL)
11698
11699   def ExpandNames(self):
11700     self._ExpandAndLockInstance()
11701
11702     # Lock all nodes for local exports
11703     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11704       # FIXME: lock only instance primary and destination node
11705       #
11706       # Sad but true, for now we have do lock all nodes, as we don't know where
11707       # the previous export might be, and in this LU we search for it and
11708       # remove it from its current node. In the future we could fix this by:
11709       #  - making a tasklet to search (share-lock all), then create the
11710       #    new one, then one to remove, after
11711       #  - removing the removal operation altogether
11712       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11713
11714   def DeclareLocks(self, level):
11715     """Last minute lock declaration."""
11716     # All nodes are locked anyway, so nothing to do here.
11717
11718   def BuildHooksEnv(self):
11719     """Build hooks env.
11720
11721     This will run on the master, primary node and target node.
11722
11723     """
11724     env = {
11725       "EXPORT_MODE": self.op.mode,
11726       "EXPORT_NODE": self.op.target_node,
11727       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11728       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11729       # TODO: Generic function for boolean env variables
11730       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11731       }
11732
11733     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11734
11735     return env
11736
11737   def BuildHooksNodes(self):
11738     """Build hooks nodes.
11739
11740     """
11741     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11742
11743     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11744       nl.append(self.op.target_node)
11745
11746     return (nl, nl)
11747
11748   def CheckPrereq(self):
11749     """Check prerequisites.
11750
11751     This checks that the instance and node names are valid.
11752
11753     """
11754     instance_name = self.op.instance_name
11755
11756     self.instance = self.cfg.GetInstanceInfo(instance_name)
11757     assert self.instance is not None, \
11758           "Cannot retrieve locked instance %s" % self.op.instance_name
11759     _CheckNodeOnline(self, self.instance.primary_node)
11760
11761     if (self.op.remove_instance and self.instance.admin_up and
11762         not self.op.shutdown):
11763       raise errors.OpPrereqError("Can not remove instance without shutting it"
11764                                  " down before")
11765
11766     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11767       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11768       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11769       assert self.dst_node is not None
11770
11771       _CheckNodeOnline(self, self.dst_node.name)
11772       _CheckNodeNotDrained(self, self.dst_node.name)
11773
11774       self._cds = None
11775       self.dest_disk_info = None
11776       self.dest_x509_ca = None
11777
11778     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11779       self.dst_node = None
11780
11781       if len(self.op.target_node) != len(self.instance.disks):
11782         raise errors.OpPrereqError(("Received destination information for %s"
11783                                     " disks, but instance %s has %s disks") %
11784                                    (len(self.op.target_node), instance_name,
11785                                     len(self.instance.disks)),
11786                                    errors.ECODE_INVAL)
11787
11788       cds = _GetClusterDomainSecret()
11789
11790       # Check X509 key name
11791       try:
11792         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11793       except (TypeError, ValueError), err:
11794         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11795
11796       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11797         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11798                                    errors.ECODE_INVAL)
11799
11800       # Load and verify CA
11801       try:
11802         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11803       except OpenSSL.crypto.Error, err:
11804         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11805                                    (err, ), errors.ECODE_INVAL)
11806
11807       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11808       if errcode is not None:
11809         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11810                                    (msg, ), errors.ECODE_INVAL)
11811
11812       self.dest_x509_ca = cert
11813
11814       # Verify target information
11815       disk_info = []
11816       for idx, disk_data in enumerate(self.op.target_node):
11817         try:
11818           (host, port, magic) = \
11819             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11820         except errors.GenericError, err:
11821           raise errors.OpPrereqError("Target info for disk %s: %s" %
11822                                      (idx, err), errors.ECODE_INVAL)
11823
11824         disk_info.append((host, port, magic))
11825
11826       assert len(disk_info) == len(self.op.target_node)
11827       self.dest_disk_info = disk_info
11828
11829     else:
11830       raise errors.ProgrammerError("Unhandled export mode %r" %
11831                                    self.op.mode)
11832
11833     # instance disk type verification
11834     # TODO: Implement export support for file-based disks
11835     for disk in self.instance.disks:
11836       if disk.dev_type == constants.LD_FILE:
11837         raise errors.OpPrereqError("Export not supported for instances with"
11838                                    " file-based disks", errors.ECODE_INVAL)
11839
11840   def _CleanupExports(self, feedback_fn):
11841     """Removes exports of current instance from all other nodes.
11842
11843     If an instance in a cluster with nodes A..D was exported to node C, its
11844     exports will be removed from the nodes A, B and D.
11845
11846     """
11847     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11848
11849     nodelist = self.cfg.GetNodeList()
11850     nodelist.remove(self.dst_node.name)
11851
11852     # on one-node clusters nodelist will be empty after the removal
11853     # if we proceed the backup would be removed because OpBackupQuery
11854     # substitutes an empty list with the full cluster node list.
11855     iname = self.instance.name
11856     if nodelist:
11857       feedback_fn("Removing old exports for instance %s" % iname)
11858       exportlist = self.rpc.call_export_list(nodelist)
11859       for node in exportlist:
11860         if exportlist[node].fail_msg:
11861           continue
11862         if iname in exportlist[node].payload:
11863           msg = self.rpc.call_export_remove(node, iname).fail_msg
11864           if msg:
11865             self.LogWarning("Could not remove older export for instance %s"
11866                             " on node %s: %s", iname, node, msg)
11867
11868   def Exec(self, feedback_fn):
11869     """Export an instance to an image in the cluster.
11870
11871     """
11872     assert self.op.mode in constants.EXPORT_MODES
11873
11874     instance = self.instance
11875     src_node = instance.primary_node
11876
11877     if self.op.shutdown:
11878       # shutdown the instance, but not the disks
11879       feedback_fn("Shutting down instance %s" % instance.name)
11880       result = self.rpc.call_instance_shutdown(src_node, instance,
11881                                                self.op.shutdown_timeout)
11882       # TODO: Maybe ignore failures if ignore_remove_failures is set
11883       result.Raise("Could not shutdown instance %s on"
11884                    " node %s" % (instance.name, src_node))
11885
11886     # set the disks ID correctly since call_instance_start needs the
11887     # correct drbd minor to create the symlinks
11888     for disk in instance.disks:
11889       self.cfg.SetDiskID(disk, src_node)
11890
11891     activate_disks = (not instance.admin_up)
11892
11893     if activate_disks:
11894       # Activate the instance disks if we'exporting a stopped instance
11895       feedback_fn("Activating disks for %s" % instance.name)
11896       _StartInstanceDisks(self, instance, None)
11897
11898     try:
11899       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11900                                                      instance)
11901
11902       helper.CreateSnapshots()
11903       try:
11904         if (self.op.shutdown and instance.admin_up and
11905             not self.op.remove_instance):
11906           assert not activate_disks
11907           feedback_fn("Starting instance %s" % instance.name)
11908           result = self.rpc.call_instance_start(src_node,
11909                                                 (instance, None, None), False)
11910           msg = result.fail_msg
11911           if msg:
11912             feedback_fn("Failed to start instance: %s" % msg)
11913             _ShutdownInstanceDisks(self, instance)
11914             raise errors.OpExecError("Could not start instance: %s" % msg)
11915
11916         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11917           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11918         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11919           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11920           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11921
11922           (key_name, _, _) = self.x509_key_name
11923
11924           dest_ca_pem = \
11925             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11926                                             self.dest_x509_ca)
11927
11928           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11929                                                      key_name, dest_ca_pem,
11930                                                      timeouts)
11931       finally:
11932         helper.Cleanup()
11933
11934       # Check for backwards compatibility
11935       assert len(dresults) == len(instance.disks)
11936       assert compat.all(isinstance(i, bool) for i in dresults), \
11937              "Not all results are boolean: %r" % dresults
11938
11939     finally:
11940       if activate_disks:
11941         feedback_fn("Deactivating disks for %s" % instance.name)
11942         _ShutdownInstanceDisks(self, instance)
11943
11944     if not (compat.all(dresults) and fin_resu):
11945       failures = []
11946       if not fin_resu:
11947         failures.append("export finalization")
11948       if not compat.all(dresults):
11949         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11950                                if not dsk)
11951         failures.append("disk export: disk(s) %s" % fdsk)
11952
11953       raise errors.OpExecError("Export failed, errors in %s" %
11954                                utils.CommaJoin(failures))
11955
11956     # At this point, the export was successful, we can cleanup/finish
11957
11958     # Remove instance if requested
11959     if self.op.remove_instance:
11960       feedback_fn("Removing instance %s" % instance.name)
11961       _RemoveInstance(self, feedback_fn, instance,
11962                       self.op.ignore_remove_failures)
11963
11964     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11965       self._CleanupExports(feedback_fn)
11966
11967     return fin_resu, dresults
11968
11969
11970 class LUBackupRemove(NoHooksLU):
11971   """Remove exports related to the named instance.
11972
11973   """
11974   REQ_BGL = False
11975
11976   def ExpandNames(self):
11977     self.needed_locks = {}
11978     # We need all nodes to be locked in order for RemoveExport to work, but we
11979     # don't need to lock the instance itself, as nothing will happen to it (and
11980     # we can remove exports also for a removed instance)
11981     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11982
11983   def Exec(self, feedback_fn):
11984     """Remove any export.
11985
11986     """
11987     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11988     # If the instance was not found we'll try with the name that was passed in.
11989     # This will only work if it was an FQDN, though.
11990     fqdn_warn = False
11991     if not instance_name:
11992       fqdn_warn = True
11993       instance_name = self.op.instance_name
11994
11995     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11996     exportlist = self.rpc.call_export_list(locked_nodes)
11997     found = False
11998     for node in exportlist:
11999       msg = exportlist[node].fail_msg
12000       if msg:
12001         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12002         continue
12003       if instance_name in exportlist[node].payload:
12004         found = True
12005         result = self.rpc.call_export_remove(node, instance_name)
12006         msg = result.fail_msg
12007         if msg:
12008           logging.error("Could not remove export for instance %s"
12009                         " on node %s: %s", instance_name, node, msg)
12010
12011     if fqdn_warn and not found:
12012       feedback_fn("Export not found. If trying to remove an export belonging"
12013                   " to a deleted instance please use its Fully Qualified"
12014                   " Domain Name.")
12015
12016
12017 class LUGroupAdd(LogicalUnit):
12018   """Logical unit for creating node groups.
12019
12020   """
12021   HPATH = "group-add"
12022   HTYPE = constants.HTYPE_GROUP
12023   REQ_BGL = False
12024
12025   def ExpandNames(self):
12026     # We need the new group's UUID here so that we can create and acquire the
12027     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12028     # that it should not check whether the UUID exists in the configuration.
12029     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12030     self.needed_locks = {}
12031     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12032
12033   def CheckPrereq(self):
12034     """Check prerequisites.
12035
12036     This checks that the given group name is not an existing node group
12037     already.
12038
12039     """
12040     try:
12041       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12042     except errors.OpPrereqError:
12043       pass
12044     else:
12045       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12046                                  " node group (UUID: %s)" %
12047                                  (self.op.group_name, existing_uuid),
12048                                  errors.ECODE_EXISTS)
12049
12050     if self.op.ndparams:
12051       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12052
12053   def BuildHooksEnv(self):
12054     """Build hooks env.
12055
12056     """
12057     return {
12058       "GROUP_NAME": self.op.group_name,
12059       }
12060
12061   def BuildHooksNodes(self):
12062     """Build hooks nodes.
12063
12064     """
12065     mn = self.cfg.GetMasterNode()
12066     return ([mn], [mn])
12067
12068   def Exec(self, feedback_fn):
12069     """Add the node group to the cluster.
12070
12071     """
12072     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12073                                   uuid=self.group_uuid,
12074                                   alloc_policy=self.op.alloc_policy,
12075                                   ndparams=self.op.ndparams)
12076
12077     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12078     del self.remove_locks[locking.LEVEL_NODEGROUP]
12079
12080
12081 class LUGroupAssignNodes(NoHooksLU):
12082   """Logical unit for assigning nodes to groups.
12083
12084   """
12085   REQ_BGL = False
12086
12087   def ExpandNames(self):
12088     # These raise errors.OpPrereqError on their own:
12089     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12090     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12091
12092     # We want to lock all the affected nodes and groups. We have readily
12093     # available the list of nodes, and the *destination* group. To gather the
12094     # list of "source" groups, we need to fetch node information later on.
12095     self.needed_locks = {
12096       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12097       locking.LEVEL_NODE: self.op.nodes,
12098       }
12099
12100   def DeclareLocks(self, level):
12101     if level == locking.LEVEL_NODEGROUP:
12102       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12103
12104       # Try to get all affected nodes' groups without having the group or node
12105       # lock yet. Needs verification later in the code flow.
12106       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12107
12108       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12109
12110   def CheckPrereq(self):
12111     """Check prerequisites.
12112
12113     """
12114     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12115     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12116             frozenset(self.op.nodes))
12117
12118     expected_locks = (set([self.group_uuid]) |
12119                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12120     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12121     if actual_locks != expected_locks:
12122       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12123                                " current groups are '%s', used to be '%s'" %
12124                                (utils.CommaJoin(expected_locks),
12125                                 utils.CommaJoin(actual_locks)))
12126
12127     self.node_data = self.cfg.GetAllNodesInfo()
12128     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12129     instance_data = self.cfg.GetAllInstancesInfo()
12130
12131     if self.group is None:
12132       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12133                                (self.op.group_name, self.group_uuid))
12134
12135     (new_splits, previous_splits) = \
12136       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12137                                              for node in self.op.nodes],
12138                                             self.node_data, instance_data)
12139
12140     if new_splits:
12141       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12142
12143       if not self.op.force:
12144         raise errors.OpExecError("The following instances get split by this"
12145                                  " change and --force was not given: %s" %
12146                                  fmt_new_splits)
12147       else:
12148         self.LogWarning("This operation will split the following instances: %s",
12149                         fmt_new_splits)
12150
12151         if previous_splits:
12152           self.LogWarning("In addition, these already-split instances continue"
12153                           " to be split across groups: %s",
12154                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12155
12156   def Exec(self, feedback_fn):
12157     """Assign nodes to a new group.
12158
12159     """
12160     for node in self.op.nodes:
12161       self.node_data[node].group = self.group_uuid
12162
12163     # FIXME: Depends on side-effects of modifying the result of
12164     # C{cfg.GetAllNodesInfo}
12165
12166     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12167
12168   @staticmethod
12169   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12170     """Check for split instances after a node assignment.
12171
12172     This method considers a series of node assignments as an atomic operation,
12173     and returns information about split instances after applying the set of
12174     changes.
12175
12176     In particular, it returns information about newly split instances, and
12177     instances that were already split, and remain so after the change.
12178
12179     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12180     considered.
12181
12182     @type changes: list of (node_name, new_group_uuid) pairs.
12183     @param changes: list of node assignments to consider.
12184     @param node_data: a dict with data for all nodes
12185     @param instance_data: a dict with all instances to consider
12186     @rtype: a two-tuple
12187     @return: a list of instances that were previously okay and result split as a
12188       consequence of this change, and a list of instances that were previously
12189       split and this change does not fix.
12190
12191     """
12192     changed_nodes = dict((node, group) for node, group in changes
12193                          if node_data[node].group != group)
12194
12195     all_split_instances = set()
12196     previously_split_instances = set()
12197
12198     def InstanceNodes(instance):
12199       return [instance.primary_node] + list(instance.secondary_nodes)
12200
12201     for inst in instance_data.values():
12202       if inst.disk_template not in constants.DTS_INT_MIRROR:
12203         continue
12204
12205       instance_nodes = InstanceNodes(inst)
12206
12207       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12208         previously_split_instances.add(inst.name)
12209
12210       if len(set(changed_nodes.get(node, node_data[node].group)
12211                  for node in instance_nodes)) > 1:
12212         all_split_instances.add(inst.name)
12213
12214     return (list(all_split_instances - previously_split_instances),
12215             list(previously_split_instances & all_split_instances))
12216
12217
12218 class _GroupQuery(_QueryBase):
12219   FIELDS = query.GROUP_FIELDS
12220
12221   def ExpandNames(self, lu):
12222     lu.needed_locks = {}
12223
12224     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12225     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12226
12227     if not self.names:
12228       self.wanted = [name_to_uuid[name]
12229                      for name in utils.NiceSort(name_to_uuid.keys())]
12230     else:
12231       # Accept names to be either names or UUIDs.
12232       missing = []
12233       self.wanted = []
12234       all_uuid = frozenset(self._all_groups.keys())
12235
12236       for name in self.names:
12237         if name in all_uuid:
12238           self.wanted.append(name)
12239         elif name in name_to_uuid:
12240           self.wanted.append(name_to_uuid[name])
12241         else:
12242           missing.append(name)
12243
12244       if missing:
12245         raise errors.OpPrereqError("Some groups do not exist: %s" %
12246                                    utils.CommaJoin(missing),
12247                                    errors.ECODE_NOENT)
12248
12249   def DeclareLocks(self, lu, level):
12250     pass
12251
12252   def _GetQueryData(self, lu):
12253     """Computes the list of node groups and their attributes.
12254
12255     """
12256     do_nodes = query.GQ_NODE in self.requested_data
12257     do_instances = query.GQ_INST in self.requested_data
12258
12259     group_to_nodes = None
12260     group_to_instances = None
12261
12262     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12263     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12264     # latter GetAllInstancesInfo() is not enough, for we have to go through
12265     # instance->node. Hence, we will need to process nodes even if we only need
12266     # instance information.
12267     if do_nodes or do_instances:
12268       all_nodes = lu.cfg.GetAllNodesInfo()
12269       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12270       node_to_group = {}
12271
12272       for node in all_nodes.values():
12273         if node.group in group_to_nodes:
12274           group_to_nodes[node.group].append(node.name)
12275           node_to_group[node.name] = node.group
12276
12277       if do_instances:
12278         all_instances = lu.cfg.GetAllInstancesInfo()
12279         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12280
12281         for instance in all_instances.values():
12282           node = instance.primary_node
12283           if node in node_to_group:
12284             group_to_instances[node_to_group[node]].append(instance.name)
12285
12286         if not do_nodes:
12287           # Do not pass on node information if it was not requested.
12288           group_to_nodes = None
12289
12290     return query.GroupQueryData([self._all_groups[uuid]
12291                                  for uuid in self.wanted],
12292                                 group_to_nodes, group_to_instances)
12293
12294
12295 class LUGroupQuery(NoHooksLU):
12296   """Logical unit for querying node groups.
12297
12298   """
12299   REQ_BGL = False
12300
12301   def CheckArguments(self):
12302     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12303                           self.op.output_fields, False)
12304
12305   def ExpandNames(self):
12306     self.gq.ExpandNames(self)
12307
12308   def DeclareLocks(self, level):
12309     self.gq.DeclareLocks(self, level)
12310
12311   def Exec(self, feedback_fn):
12312     return self.gq.OldStyleQuery(self)
12313
12314
12315 class LUGroupSetParams(LogicalUnit):
12316   """Modifies the parameters of a node group.
12317
12318   """
12319   HPATH = "group-modify"
12320   HTYPE = constants.HTYPE_GROUP
12321   REQ_BGL = False
12322
12323   def CheckArguments(self):
12324     all_changes = [
12325       self.op.ndparams,
12326       self.op.alloc_policy,
12327       ]
12328
12329     if all_changes.count(None) == len(all_changes):
12330       raise errors.OpPrereqError("Please pass at least one modification",
12331                                  errors.ECODE_INVAL)
12332
12333   def ExpandNames(self):
12334     # This raises errors.OpPrereqError on its own:
12335     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12336
12337     self.needed_locks = {
12338       locking.LEVEL_NODEGROUP: [self.group_uuid],
12339       }
12340
12341   def CheckPrereq(self):
12342     """Check prerequisites.
12343
12344     """
12345     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12346
12347     if self.group is None:
12348       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12349                                (self.op.group_name, self.group_uuid))
12350
12351     if self.op.ndparams:
12352       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12353       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12354       self.new_ndparams = new_ndparams
12355
12356   def BuildHooksEnv(self):
12357     """Build hooks env.
12358
12359     """
12360     return {
12361       "GROUP_NAME": self.op.group_name,
12362       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12363       }
12364
12365   def BuildHooksNodes(self):
12366     """Build hooks nodes.
12367
12368     """
12369     mn = self.cfg.GetMasterNode()
12370     return ([mn], [mn])
12371
12372   def Exec(self, feedback_fn):
12373     """Modifies the node group.
12374
12375     """
12376     result = []
12377
12378     if self.op.ndparams:
12379       self.group.ndparams = self.new_ndparams
12380       result.append(("ndparams", str(self.group.ndparams)))
12381
12382     if self.op.alloc_policy:
12383       self.group.alloc_policy = self.op.alloc_policy
12384
12385     self.cfg.Update(self.group, feedback_fn)
12386     return result
12387
12388
12389 class LUGroupRemove(LogicalUnit):
12390   HPATH = "group-remove"
12391   HTYPE = constants.HTYPE_GROUP
12392   REQ_BGL = False
12393
12394   def ExpandNames(self):
12395     # This will raises errors.OpPrereqError on its own:
12396     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12397     self.needed_locks = {
12398       locking.LEVEL_NODEGROUP: [self.group_uuid],
12399       }
12400
12401   def CheckPrereq(self):
12402     """Check prerequisites.
12403
12404     This checks that the given group name exists as a node group, that is
12405     empty (i.e., contains no nodes), and that is not the last group of the
12406     cluster.
12407
12408     """
12409     # Verify that the group is empty.
12410     group_nodes = [node.name
12411                    for node in self.cfg.GetAllNodesInfo().values()
12412                    if node.group == self.group_uuid]
12413
12414     if group_nodes:
12415       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12416                                  " nodes: %s" %
12417                                  (self.op.group_name,
12418                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12419                                  errors.ECODE_STATE)
12420
12421     # Verify the cluster would not be left group-less.
12422     if len(self.cfg.GetNodeGroupList()) == 1:
12423       raise errors.OpPrereqError("Group '%s' is the only group,"
12424                                  " cannot be removed" %
12425                                  self.op.group_name,
12426                                  errors.ECODE_STATE)
12427
12428   def BuildHooksEnv(self):
12429     """Build hooks env.
12430
12431     """
12432     return {
12433       "GROUP_NAME": self.op.group_name,
12434       }
12435
12436   def BuildHooksNodes(self):
12437     """Build hooks nodes.
12438
12439     """
12440     mn = self.cfg.GetMasterNode()
12441     return ([mn], [mn])
12442
12443   def Exec(self, feedback_fn):
12444     """Remove the node group.
12445
12446     """
12447     try:
12448       self.cfg.RemoveNodeGroup(self.group_uuid)
12449     except errors.ConfigurationError:
12450       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12451                                (self.op.group_name, self.group_uuid))
12452
12453     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12454
12455
12456 class LUGroupRename(LogicalUnit):
12457   HPATH = "group-rename"
12458   HTYPE = constants.HTYPE_GROUP
12459   REQ_BGL = False
12460
12461   def ExpandNames(self):
12462     # This raises errors.OpPrereqError on its own:
12463     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12464
12465     self.needed_locks = {
12466       locking.LEVEL_NODEGROUP: [self.group_uuid],
12467       }
12468
12469   def CheckPrereq(self):
12470     """Check prerequisites.
12471
12472     Ensures requested new name is not yet used.
12473
12474     """
12475     try:
12476       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12477     except errors.OpPrereqError:
12478       pass
12479     else:
12480       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12481                                  " node group (UUID: %s)" %
12482                                  (self.op.new_name, new_name_uuid),
12483                                  errors.ECODE_EXISTS)
12484
12485   def BuildHooksEnv(self):
12486     """Build hooks env.
12487
12488     """
12489     return {
12490       "OLD_NAME": self.op.group_name,
12491       "NEW_NAME": self.op.new_name,
12492       }
12493
12494   def BuildHooksNodes(self):
12495     """Build hooks nodes.
12496
12497     """
12498     mn = self.cfg.GetMasterNode()
12499
12500     all_nodes = self.cfg.GetAllNodesInfo()
12501     all_nodes.pop(mn, None)
12502
12503     run_nodes = [mn]
12504     run_nodes.extend(node.name for node in all_nodes.values()
12505                      if node.group == self.group_uuid)
12506
12507     return (run_nodes, run_nodes)
12508
12509   def Exec(self, feedback_fn):
12510     """Rename the node group.
12511
12512     """
12513     group = self.cfg.GetNodeGroup(self.group_uuid)
12514
12515     if group is None:
12516       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12517                                (self.op.group_name, self.group_uuid))
12518
12519     group.name = self.op.new_name
12520     self.cfg.Update(group, feedback_fn)
12521
12522     return self.op.new_name
12523
12524
12525 class LUGroupEvacuate(LogicalUnit):
12526   HPATH = "group-evacuate"
12527   HTYPE = constants.HTYPE_GROUP
12528   REQ_BGL = False
12529
12530   def ExpandNames(self):
12531     # This raises errors.OpPrereqError on its own:
12532     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12533
12534     if self.op.target_groups:
12535       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12536                                   self.op.target_groups)
12537     else:
12538       self.req_target_uuids = []
12539
12540     if self.group_uuid in self.req_target_uuids:
12541       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12542                                  " as a target group (targets are %s)" %
12543                                  (self.group_uuid,
12544                                   utils.CommaJoin(self.req_target_uuids)),
12545                                  errors.ECODE_INVAL)
12546
12547     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12548
12549     self.share_locks = _ShareAll()
12550     self.needed_locks = {
12551       locking.LEVEL_INSTANCE: [],
12552       locking.LEVEL_NODEGROUP: [],
12553       locking.LEVEL_NODE: [],
12554       }
12555
12556   def DeclareLocks(self, level):
12557     if level == locking.LEVEL_INSTANCE:
12558       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12559
12560       # Lock instances optimistically, needs verification once node and group
12561       # locks have been acquired
12562       self.needed_locks[locking.LEVEL_INSTANCE] = \
12563         self.cfg.GetNodeGroupInstances(self.group_uuid)
12564
12565     elif level == locking.LEVEL_NODEGROUP:
12566       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12567
12568       if self.req_target_uuids:
12569         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12570
12571         # Lock all groups used by instances optimistically; this requires going
12572         # via the node before it's locked, requiring verification later on
12573         lock_groups.update(group_uuid
12574                            for instance_name in
12575                              self.owned_locks(locking.LEVEL_INSTANCE)
12576                            for group_uuid in
12577                              self.cfg.GetInstanceNodeGroups(instance_name))
12578       else:
12579         # No target groups, need to lock all of them
12580         lock_groups = locking.ALL_SET
12581
12582       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12583
12584     elif level == locking.LEVEL_NODE:
12585       # This will only lock the nodes in the group to be evacuated which
12586       # contain actual instances
12587       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12588       self._LockInstancesNodes()
12589
12590       # Lock all nodes in group to be evacuated and target groups
12591       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12592       assert self.group_uuid in owned_groups
12593       member_nodes = [node_name
12594                       for group in owned_groups
12595                       for node_name in self.cfg.GetNodeGroup(group).members]
12596       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12597
12598   def CheckPrereq(self):
12599     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12600     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12601     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12602
12603     assert owned_groups.issuperset(self.req_target_uuids)
12604     assert self.group_uuid in owned_groups
12605
12606     # Check if locked instances are still correct
12607     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12608
12609     # Get instance information
12610     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12611
12612     # Check if node groups for locked instances are still correct
12613     for instance_name in owned_instances:
12614       inst = self.instances[instance_name]
12615       assert owned_nodes.issuperset(inst.all_nodes), \
12616         "Instance %s's nodes changed while we kept the lock" % instance_name
12617
12618       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12619                                              owned_groups)
12620
12621       assert self.group_uuid in inst_groups, \
12622         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12623
12624     if self.req_target_uuids:
12625       # User requested specific target groups
12626       self.target_uuids = self.req_target_uuids
12627     else:
12628       # All groups except the one to be evacuated are potential targets
12629       self.target_uuids = [group_uuid for group_uuid in owned_groups
12630                            if group_uuid != self.group_uuid]
12631
12632       if not self.target_uuids:
12633         raise errors.OpPrereqError("There are no possible target groups",
12634                                    errors.ECODE_INVAL)
12635
12636   def BuildHooksEnv(self):
12637     """Build hooks env.
12638
12639     """
12640     return {
12641       "GROUP_NAME": self.op.group_name,
12642       "TARGET_GROUPS": " ".join(self.target_uuids),
12643       }
12644
12645   def BuildHooksNodes(self):
12646     """Build hooks nodes.
12647
12648     """
12649     mn = self.cfg.GetMasterNode()
12650
12651     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12652
12653     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12654
12655     return (run_nodes, run_nodes)
12656
12657   def Exec(self, feedback_fn):
12658     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12659
12660     assert self.group_uuid not in self.target_uuids
12661
12662     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12663                      instances=instances, target_groups=self.target_uuids)
12664
12665     ial.Run(self.op.iallocator)
12666
12667     if not ial.success:
12668       raise errors.OpPrereqError("Can't compute group evacuation using"
12669                                  " iallocator '%s': %s" %
12670                                  (self.op.iallocator, ial.info),
12671                                  errors.ECODE_NORES)
12672
12673     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12674
12675     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12676                  len(jobs), self.op.group_name)
12677
12678     return ResultWithJobs(jobs)
12679
12680
12681 class TagsLU(NoHooksLU): # pylint: disable=W0223
12682   """Generic tags LU.
12683
12684   This is an abstract class which is the parent of all the other tags LUs.
12685
12686   """
12687   def ExpandNames(self):
12688     self.group_uuid = None
12689     self.needed_locks = {}
12690     if self.op.kind == constants.TAG_NODE:
12691       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12692       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12693     elif self.op.kind == constants.TAG_INSTANCE:
12694       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12695       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12696     elif self.op.kind == constants.TAG_NODEGROUP:
12697       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12698
12699     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12700     # not possible to acquire the BGL based on opcode parameters)
12701
12702   def CheckPrereq(self):
12703     """Check prerequisites.
12704
12705     """
12706     if self.op.kind == constants.TAG_CLUSTER:
12707       self.target = self.cfg.GetClusterInfo()
12708     elif self.op.kind == constants.TAG_NODE:
12709       self.target = self.cfg.GetNodeInfo(self.op.name)
12710     elif self.op.kind == constants.TAG_INSTANCE:
12711       self.target = self.cfg.GetInstanceInfo(self.op.name)
12712     elif self.op.kind == constants.TAG_NODEGROUP:
12713       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12714     else:
12715       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12716                                  str(self.op.kind), errors.ECODE_INVAL)
12717
12718
12719 class LUTagsGet(TagsLU):
12720   """Returns the tags of a given object.
12721
12722   """
12723   REQ_BGL = False
12724
12725   def ExpandNames(self):
12726     TagsLU.ExpandNames(self)
12727
12728     # Share locks as this is only a read operation
12729     self.share_locks = _ShareAll()
12730
12731   def Exec(self, feedback_fn):
12732     """Returns the tag list.
12733
12734     """
12735     return list(self.target.GetTags())
12736
12737
12738 class LUTagsSearch(NoHooksLU):
12739   """Searches the tags for a given pattern.
12740
12741   """
12742   REQ_BGL = False
12743
12744   def ExpandNames(self):
12745     self.needed_locks = {}
12746
12747   def CheckPrereq(self):
12748     """Check prerequisites.
12749
12750     This checks the pattern passed for validity by compiling it.
12751
12752     """
12753     try:
12754       self.re = re.compile(self.op.pattern)
12755     except re.error, err:
12756       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12757                                  (self.op.pattern, err), errors.ECODE_INVAL)
12758
12759   def Exec(self, feedback_fn):
12760     """Returns the tag list.
12761
12762     """
12763     cfg = self.cfg
12764     tgts = [("/cluster", cfg.GetClusterInfo())]
12765     ilist = cfg.GetAllInstancesInfo().values()
12766     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12767     nlist = cfg.GetAllNodesInfo().values()
12768     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12769     tgts.extend(("/nodegroup/%s" % n.name, n)
12770                 for n in cfg.GetAllNodeGroupsInfo().values())
12771     results = []
12772     for path, target in tgts:
12773       for tag in target.GetTags():
12774         if self.re.search(tag):
12775           results.append((path, tag))
12776     return results
12777
12778
12779 class LUTagsSet(TagsLU):
12780   """Sets a tag on a given object.
12781
12782   """
12783   REQ_BGL = False
12784
12785   def CheckPrereq(self):
12786     """Check prerequisites.
12787
12788     This checks the type and length of the tag name and value.
12789
12790     """
12791     TagsLU.CheckPrereq(self)
12792     for tag in self.op.tags:
12793       objects.TaggableObject.ValidateTag(tag)
12794
12795   def Exec(self, feedback_fn):
12796     """Sets the tag.
12797
12798     """
12799     try:
12800       for tag in self.op.tags:
12801         self.target.AddTag(tag)
12802     except errors.TagError, err:
12803       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12804     self.cfg.Update(self.target, feedback_fn)
12805
12806
12807 class LUTagsDel(TagsLU):
12808   """Delete a list of tags from a given object.
12809
12810   """
12811   REQ_BGL = False
12812
12813   def CheckPrereq(self):
12814     """Check prerequisites.
12815
12816     This checks that we have the given tag.
12817
12818     """
12819     TagsLU.CheckPrereq(self)
12820     for tag in self.op.tags:
12821       objects.TaggableObject.ValidateTag(tag)
12822     del_tags = frozenset(self.op.tags)
12823     cur_tags = self.target.GetTags()
12824
12825     diff_tags = del_tags - cur_tags
12826     if diff_tags:
12827       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12828       raise errors.OpPrereqError("Tag(s) %s not found" %
12829                                  (utils.CommaJoin(diff_names), ),
12830                                  errors.ECODE_NOENT)
12831
12832   def Exec(self, feedback_fn):
12833     """Remove the tag from the object.
12834
12835     """
12836     for tag in self.op.tags:
12837       self.target.RemoveTag(tag)
12838     self.cfg.Update(self.target, feedback_fn)
12839
12840
12841 class LUTestDelay(NoHooksLU):
12842   """Sleep for a specified amount of time.
12843
12844   This LU sleeps on the master and/or nodes for a specified amount of
12845   time.
12846
12847   """
12848   REQ_BGL = False
12849
12850   def ExpandNames(self):
12851     """Expand names and set required locks.
12852
12853     This expands the node list, if any.
12854
12855     """
12856     self.needed_locks = {}
12857     if self.op.on_nodes:
12858       # _GetWantedNodes can be used here, but is not always appropriate to use
12859       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12860       # more information.
12861       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12862       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12863
12864   def _TestDelay(self):
12865     """Do the actual sleep.
12866
12867     """
12868     if self.op.on_master:
12869       if not utils.TestDelay(self.op.duration):
12870         raise errors.OpExecError("Error during master delay test")
12871     if self.op.on_nodes:
12872       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12873       for node, node_result in result.items():
12874         node_result.Raise("Failure during rpc call to node %s" % node)
12875
12876   def Exec(self, feedback_fn):
12877     """Execute the test delay opcode, with the wanted repetitions.
12878
12879     """
12880     if self.op.repeat == 0:
12881       self._TestDelay()
12882     else:
12883       top_value = self.op.repeat - 1
12884       for i in range(self.op.repeat):
12885         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12886         self._TestDelay()
12887
12888
12889 class LUTestJqueue(NoHooksLU):
12890   """Utility LU to test some aspects of the job queue.
12891
12892   """
12893   REQ_BGL = False
12894
12895   # Must be lower than default timeout for WaitForJobChange to see whether it
12896   # notices changed jobs
12897   _CLIENT_CONNECT_TIMEOUT = 20.0
12898   _CLIENT_CONFIRM_TIMEOUT = 60.0
12899
12900   @classmethod
12901   def _NotifyUsingSocket(cls, cb, errcls):
12902     """Opens a Unix socket and waits for another program to connect.
12903
12904     @type cb: callable
12905     @param cb: Callback to send socket name to client
12906     @type errcls: class
12907     @param errcls: Exception class to use for errors
12908
12909     """
12910     # Using a temporary directory as there's no easy way to create temporary
12911     # sockets without writing a custom loop around tempfile.mktemp and
12912     # socket.bind
12913     tmpdir = tempfile.mkdtemp()
12914     try:
12915       tmpsock = utils.PathJoin(tmpdir, "sock")
12916
12917       logging.debug("Creating temporary socket at %s", tmpsock)
12918       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12919       try:
12920         sock.bind(tmpsock)
12921         sock.listen(1)
12922
12923         # Send details to client
12924         cb(tmpsock)
12925
12926         # Wait for client to connect before continuing
12927         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12928         try:
12929           (conn, _) = sock.accept()
12930         except socket.error, err:
12931           raise errcls("Client didn't connect in time (%s)" % err)
12932       finally:
12933         sock.close()
12934     finally:
12935       # Remove as soon as client is connected
12936       shutil.rmtree(tmpdir)
12937
12938     # Wait for client to close
12939     try:
12940       try:
12941         # pylint: disable=E1101
12942         # Instance of '_socketobject' has no ... member
12943         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12944         conn.recv(1)
12945       except socket.error, err:
12946         raise errcls("Client failed to confirm notification (%s)" % err)
12947     finally:
12948       conn.close()
12949
12950   def _SendNotification(self, test, arg, sockname):
12951     """Sends a notification to the client.
12952
12953     @type test: string
12954     @param test: Test name
12955     @param arg: Test argument (depends on test)
12956     @type sockname: string
12957     @param sockname: Socket path
12958
12959     """
12960     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12961
12962   def _Notify(self, prereq, test, arg):
12963     """Notifies the client of a test.
12964
12965     @type prereq: bool
12966     @param prereq: Whether this is a prereq-phase test
12967     @type test: string
12968     @param test: Test name
12969     @param arg: Test argument (depends on test)
12970
12971     """
12972     if prereq:
12973       errcls = errors.OpPrereqError
12974     else:
12975       errcls = errors.OpExecError
12976
12977     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12978                                                   test, arg),
12979                                    errcls)
12980
12981   def CheckArguments(self):
12982     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12983     self.expandnames_calls = 0
12984
12985   def ExpandNames(self):
12986     checkargs_calls = getattr(self, "checkargs_calls", 0)
12987     if checkargs_calls < 1:
12988       raise errors.ProgrammerError("CheckArguments was not called")
12989
12990     self.expandnames_calls += 1
12991
12992     if self.op.notify_waitlock:
12993       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12994
12995     self.LogInfo("Expanding names")
12996
12997     # Get lock on master node (just to get a lock, not for a particular reason)
12998     self.needed_locks = {
12999       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13000       }
13001
13002   def Exec(self, feedback_fn):
13003     if self.expandnames_calls < 1:
13004       raise errors.ProgrammerError("ExpandNames was not called")
13005
13006     if self.op.notify_exec:
13007       self._Notify(False, constants.JQT_EXEC, None)
13008
13009     self.LogInfo("Executing")
13010
13011     if self.op.log_messages:
13012       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13013       for idx, msg in enumerate(self.op.log_messages):
13014         self.LogInfo("Sending log message %s", idx + 1)
13015         feedback_fn(constants.JQT_MSGPREFIX + msg)
13016         # Report how many test messages have been sent
13017         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13018
13019     if self.op.fail:
13020       raise errors.OpExecError("Opcode failure was requested")
13021
13022     return True
13023
13024
13025 class IAllocator(object):
13026   """IAllocator framework.
13027
13028   An IAllocator instance has three sets of attributes:
13029     - cfg that is needed to query the cluster
13030     - input data (all members of the _KEYS class attribute are required)
13031     - four buffer attributes (in|out_data|text), that represent the
13032       input (to the external script) in text and data structure format,
13033       and the output from it, again in two formats
13034     - the result variables from the script (success, info, nodes) for
13035       easy usage
13036
13037   """
13038   # pylint: disable=R0902
13039   # lots of instance attributes
13040
13041   def __init__(self, cfg, rpc, mode, **kwargs):
13042     self.cfg = cfg
13043     self.rpc = rpc
13044     # init buffer variables
13045     self.in_text = self.out_text = self.in_data = self.out_data = None
13046     # init all input fields so that pylint is happy
13047     self.mode = mode
13048     self.memory = self.disks = self.disk_template = None
13049     self.os = self.tags = self.nics = self.vcpus = None
13050     self.hypervisor = None
13051     self.relocate_from = None
13052     self.name = None
13053     self.instances = None
13054     self.evac_mode = None
13055     self.target_groups = []
13056     # computed fields
13057     self.required_nodes = None
13058     # init result fields
13059     self.success = self.info = self.result = None
13060
13061     try:
13062       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13063     except KeyError:
13064       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13065                                    " IAllocator" % self.mode)
13066
13067     keyset = [n for (n, _) in keydata]
13068
13069     for key in kwargs:
13070       if key not in keyset:
13071         raise errors.ProgrammerError("Invalid input parameter '%s' to"
13072                                      " IAllocator" % key)
13073       setattr(self, key, kwargs[key])
13074
13075     for key in keyset:
13076       if key not in kwargs:
13077         raise errors.ProgrammerError("Missing input parameter '%s' to"
13078                                      " IAllocator" % key)
13079     self._BuildInputData(compat.partial(fn, self), keydata)
13080
13081   def _ComputeClusterData(self):
13082     """Compute the generic allocator input data.
13083
13084     This is the data that is independent of the actual operation.
13085
13086     """
13087     cfg = self.cfg
13088     cluster_info = cfg.GetClusterInfo()
13089     # cluster data
13090     data = {
13091       "version": constants.IALLOCATOR_VERSION,
13092       "cluster_name": cfg.GetClusterName(),
13093       "cluster_tags": list(cluster_info.GetTags()),
13094       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13095       # we don't have job IDs
13096       }
13097     ninfo = cfg.GetAllNodesInfo()
13098     iinfo = cfg.GetAllInstancesInfo().values()
13099     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13100
13101     # node data
13102     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13103
13104     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13105       hypervisor_name = self.hypervisor
13106     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13107       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13108     else:
13109       hypervisor_name = cluster_info.enabled_hypervisors[0]
13110
13111     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13112                                         hypervisor_name)
13113     node_iinfo = \
13114       self.rpc.call_all_instances_info(node_list,
13115                                        cluster_info.enabled_hypervisors)
13116
13117     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13118
13119     config_ndata = self._ComputeBasicNodeData(ninfo)
13120     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13121                                                  i_list, config_ndata)
13122     assert len(data["nodes"]) == len(ninfo), \
13123         "Incomplete node data computed"
13124
13125     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13126
13127     self.in_data = data
13128
13129   @staticmethod
13130   def _ComputeNodeGroupData(cfg):
13131     """Compute node groups data.
13132
13133     """
13134     ng = dict((guuid, {
13135       "name": gdata.name,
13136       "alloc_policy": gdata.alloc_policy,
13137       })
13138       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13139
13140     return ng
13141
13142   @staticmethod
13143   def _ComputeBasicNodeData(node_cfg):
13144     """Compute global node data.
13145
13146     @rtype: dict
13147     @returns: a dict of name: (node dict, node config)
13148
13149     """
13150     # fill in static (config-based) values
13151     node_results = dict((ninfo.name, {
13152       "tags": list(ninfo.GetTags()),
13153       "primary_ip": ninfo.primary_ip,
13154       "secondary_ip": ninfo.secondary_ip,
13155       "offline": ninfo.offline,
13156       "drained": ninfo.drained,
13157       "master_candidate": ninfo.master_candidate,
13158       "group": ninfo.group,
13159       "master_capable": ninfo.master_capable,
13160       "vm_capable": ninfo.vm_capable,
13161       })
13162       for ninfo in node_cfg.values())
13163
13164     return node_results
13165
13166   @staticmethod
13167   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13168                               node_results):
13169     """Compute global node data.
13170
13171     @param node_results: the basic node structures as filled from the config
13172
13173     """
13174     # make a copy of the current dict
13175     node_results = dict(node_results)
13176     for nname, nresult in node_data.items():
13177       assert nname in node_results, "Missing basic data for node %s" % nname
13178       ninfo = node_cfg[nname]
13179
13180       if not (ninfo.offline or ninfo.drained):
13181         nresult.Raise("Can't get data for node %s" % nname)
13182         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13183                                 nname)
13184         remote_info = nresult.payload
13185
13186         for attr in ["memory_total", "memory_free", "memory_dom0",
13187                      "vg_size", "vg_free", "cpu_total"]:
13188           if attr not in remote_info:
13189             raise errors.OpExecError("Node '%s' didn't return attribute"
13190                                      " '%s'" % (nname, attr))
13191           if not isinstance(remote_info[attr], int):
13192             raise errors.OpExecError("Node '%s' returned invalid value"
13193                                      " for '%s': %s" %
13194                                      (nname, attr, remote_info[attr]))
13195         # compute memory used by primary instances
13196         i_p_mem = i_p_up_mem = 0
13197         for iinfo, beinfo in i_list:
13198           if iinfo.primary_node == nname:
13199             i_p_mem += beinfo[constants.BE_MEMORY]
13200             if iinfo.name not in node_iinfo[nname].payload:
13201               i_used_mem = 0
13202             else:
13203               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13204             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13205             remote_info["memory_free"] -= max(0, i_mem_diff)
13206
13207             if iinfo.admin_up:
13208               i_p_up_mem += beinfo[constants.BE_MEMORY]
13209
13210         # compute memory used by instances
13211         pnr_dyn = {
13212           "total_memory": remote_info["memory_total"],
13213           "reserved_memory": remote_info["memory_dom0"],
13214           "free_memory": remote_info["memory_free"],
13215           "total_disk": remote_info["vg_size"],
13216           "free_disk": remote_info["vg_free"],
13217           "total_cpus": remote_info["cpu_total"],
13218           "i_pri_memory": i_p_mem,
13219           "i_pri_up_memory": i_p_up_mem,
13220           }
13221         pnr_dyn.update(node_results[nname])
13222         node_results[nname] = pnr_dyn
13223
13224     return node_results
13225
13226   @staticmethod
13227   def _ComputeInstanceData(cluster_info, i_list):
13228     """Compute global instance data.
13229
13230     """
13231     instance_data = {}
13232     for iinfo, beinfo in i_list:
13233       nic_data = []
13234       for nic in iinfo.nics:
13235         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13236         nic_dict = {
13237           "mac": nic.mac,
13238           "ip": nic.ip,
13239           "mode": filled_params[constants.NIC_MODE],
13240           "link": filled_params[constants.NIC_LINK],
13241           }
13242         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13243           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13244         nic_data.append(nic_dict)
13245       pir = {
13246         "tags": list(iinfo.GetTags()),
13247         "admin_up": iinfo.admin_up,
13248         "vcpus": beinfo[constants.BE_VCPUS],
13249         "memory": beinfo[constants.BE_MEMORY],
13250         "os": iinfo.os,
13251         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13252         "nics": nic_data,
13253         "disks": [{constants.IDISK_SIZE: dsk.size,
13254                    constants.IDISK_MODE: dsk.mode}
13255                   for dsk in iinfo.disks],
13256         "disk_template": iinfo.disk_template,
13257         "hypervisor": iinfo.hypervisor,
13258         }
13259       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13260                                                  pir["disks"])
13261       instance_data[iinfo.name] = pir
13262
13263     return instance_data
13264
13265   def _AddNewInstance(self):
13266     """Add new instance data to allocator structure.
13267
13268     This in combination with _AllocatorGetClusterData will create the
13269     correct structure needed as input for the allocator.
13270
13271     The checks for the completeness of the opcode must have already been
13272     done.
13273
13274     """
13275     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13276
13277     if self.disk_template in constants.DTS_INT_MIRROR:
13278       self.required_nodes = 2
13279     else:
13280       self.required_nodes = 1
13281
13282     request = {
13283       "name": self.name,
13284       "disk_template": self.disk_template,
13285       "tags": self.tags,
13286       "os": self.os,
13287       "vcpus": self.vcpus,
13288       "memory": self.memory,
13289       "disks": self.disks,
13290       "disk_space_total": disk_space,
13291       "nics": self.nics,
13292       "required_nodes": self.required_nodes,
13293       "hypervisor": self.hypervisor,
13294       }
13295
13296     return request
13297
13298   def _AddRelocateInstance(self):
13299     """Add relocate instance data to allocator structure.
13300
13301     This in combination with _IAllocatorGetClusterData will create the
13302     correct structure needed as input for the allocator.
13303
13304     The checks for the completeness of the opcode must have already been
13305     done.
13306
13307     """
13308     instance = self.cfg.GetInstanceInfo(self.name)
13309     if instance is None:
13310       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13311                                    " IAllocator" % self.name)
13312
13313     if instance.disk_template not in constants.DTS_MIRRORED:
13314       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13315                                  errors.ECODE_INVAL)
13316
13317     if instance.disk_template in constants.DTS_INT_MIRROR and \
13318         len(instance.secondary_nodes) != 1:
13319       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13320                                  errors.ECODE_STATE)
13321
13322     self.required_nodes = 1
13323     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13324     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13325
13326     request = {
13327       "name": self.name,
13328       "disk_space_total": disk_space,
13329       "required_nodes": self.required_nodes,
13330       "relocate_from": self.relocate_from,
13331       }
13332     return request
13333
13334   def _AddNodeEvacuate(self):
13335     """Get data for node-evacuate requests.
13336
13337     """
13338     return {
13339       "instances": self.instances,
13340       "evac_mode": self.evac_mode,
13341       }
13342
13343   def _AddChangeGroup(self):
13344     """Get data for node-evacuate requests.
13345
13346     """
13347     return {
13348       "instances": self.instances,
13349       "target_groups": self.target_groups,
13350       }
13351
13352   def _BuildInputData(self, fn, keydata):
13353     """Build input data structures.
13354
13355     """
13356     self._ComputeClusterData()
13357
13358     request = fn()
13359     request["type"] = self.mode
13360     for keyname, keytype in keydata:
13361       if keyname not in request:
13362         raise errors.ProgrammerError("Request parameter %s is missing" %
13363                                      keyname)
13364       val = request[keyname]
13365       if not keytype(val):
13366         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13367                                      " validation, value %s, expected"
13368                                      " type %s" % (keyname, val, keytype))
13369     self.in_data["request"] = request
13370
13371     self.in_text = serializer.Dump(self.in_data)
13372
13373   _STRING_LIST = ht.TListOf(ht.TString)
13374   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13375      # pylint: disable=E1101
13376      # Class '...' has no 'OP_ID' member
13377      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13378                           opcodes.OpInstanceMigrate.OP_ID,
13379                           opcodes.OpInstanceReplaceDisks.OP_ID])
13380      })))
13381
13382   _NEVAC_MOVED = \
13383     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13384                        ht.TItems([ht.TNonEmptyString,
13385                                   ht.TNonEmptyString,
13386                                   ht.TListOf(ht.TNonEmptyString),
13387                                  ])))
13388   _NEVAC_FAILED = \
13389     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13390                        ht.TItems([ht.TNonEmptyString,
13391                                   ht.TMaybeString,
13392                                  ])))
13393   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13394                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13395
13396   _MODE_DATA = {
13397     constants.IALLOCATOR_MODE_ALLOC:
13398       (_AddNewInstance,
13399        [
13400         ("name", ht.TString),
13401         ("memory", ht.TInt),
13402         ("disks", ht.TListOf(ht.TDict)),
13403         ("disk_template", ht.TString),
13404         ("os", ht.TString),
13405         ("tags", _STRING_LIST),
13406         ("nics", ht.TListOf(ht.TDict)),
13407         ("vcpus", ht.TInt),
13408         ("hypervisor", ht.TString),
13409         ], ht.TList),
13410     constants.IALLOCATOR_MODE_RELOC:
13411       (_AddRelocateInstance,
13412        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13413        ht.TList),
13414      constants.IALLOCATOR_MODE_NODE_EVAC:
13415       (_AddNodeEvacuate, [
13416         ("instances", _STRING_LIST),
13417         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13418         ], _NEVAC_RESULT),
13419      constants.IALLOCATOR_MODE_CHG_GROUP:
13420       (_AddChangeGroup, [
13421         ("instances", _STRING_LIST),
13422         ("target_groups", _STRING_LIST),
13423         ], _NEVAC_RESULT),
13424     }
13425
13426   def Run(self, name, validate=True, call_fn=None):
13427     """Run an instance allocator and return the results.
13428
13429     """
13430     if call_fn is None:
13431       call_fn = self.rpc.call_iallocator_runner
13432
13433     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13434     result.Raise("Failure while running the iallocator script")
13435
13436     self.out_text = result.payload
13437     if validate:
13438       self._ValidateResult()
13439
13440   def _ValidateResult(self):
13441     """Process the allocator results.
13442
13443     This will process and if successful save the result in
13444     self.out_data and the other parameters.
13445
13446     """
13447     try:
13448       rdict = serializer.Load(self.out_text)
13449     except Exception, err:
13450       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13451
13452     if not isinstance(rdict, dict):
13453       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13454
13455     # TODO: remove backwards compatiblity in later versions
13456     if "nodes" in rdict and "result" not in rdict:
13457       rdict["result"] = rdict["nodes"]
13458       del rdict["nodes"]
13459
13460     for key in "success", "info", "result":
13461       if key not in rdict:
13462         raise errors.OpExecError("Can't parse iallocator results:"
13463                                  " missing key '%s'" % key)
13464       setattr(self, key, rdict[key])
13465
13466     if not self._result_check(self.result):
13467       raise errors.OpExecError("Iallocator returned invalid result,"
13468                                " expected %s, got %s" %
13469                                (self._result_check, self.result),
13470                                errors.ECODE_INVAL)
13471
13472     if self.mode == constants.IALLOCATOR_MODE_RELOC:
13473       assert self.relocate_from is not None
13474       assert self.required_nodes == 1
13475
13476       node2group = dict((name, ndata["group"])
13477                         for (name, ndata) in self.in_data["nodes"].items())
13478
13479       fn = compat.partial(self._NodesToGroups, node2group,
13480                           self.in_data["nodegroups"])
13481
13482       instance = self.cfg.GetInstanceInfo(self.name)
13483       request_groups = fn(self.relocate_from + [instance.primary_node])
13484       result_groups = fn(rdict["result"] + [instance.primary_node])
13485
13486       if self.success and not set(result_groups).issubset(request_groups):
13487         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13488                                  " differ from original groups (%s)" %
13489                                  (utils.CommaJoin(result_groups),
13490                                   utils.CommaJoin(request_groups)))
13491
13492     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13493       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13494
13495     self.out_data = rdict
13496
13497   @staticmethod
13498   def _NodesToGroups(node2group, groups, nodes):
13499     """Returns a list of unique group names for a list of nodes.
13500
13501     @type node2group: dict
13502     @param node2group: Map from node name to group UUID
13503     @type groups: dict
13504     @param groups: Group information
13505     @type nodes: list
13506     @param nodes: Node names
13507
13508     """
13509     result = set()
13510
13511     for node in nodes:
13512       try:
13513         group_uuid = node2group[node]
13514       except KeyError:
13515         # Ignore unknown node
13516         pass
13517       else:
13518         try:
13519           group = groups[group_uuid]
13520         except KeyError:
13521           # Can't find group, let's use UUID
13522           group_name = group_uuid
13523         else:
13524           group_name = group["name"]
13525
13526         result.add(group_name)
13527
13528     return sorted(result)
13529
13530
13531 class LUTestAllocator(NoHooksLU):
13532   """Run allocator tests.
13533
13534   This LU runs the allocator tests
13535
13536   """
13537   def CheckPrereq(self):
13538     """Check prerequisites.
13539
13540     This checks the opcode parameters depending on the director and mode test.
13541
13542     """
13543     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13544       for attr in ["memory", "disks", "disk_template",
13545                    "os", "tags", "nics", "vcpus"]:
13546         if not hasattr(self.op, attr):
13547           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13548                                      attr, errors.ECODE_INVAL)
13549       iname = self.cfg.ExpandInstanceName(self.op.name)
13550       if iname is not None:
13551         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13552                                    iname, errors.ECODE_EXISTS)
13553       if not isinstance(self.op.nics, list):
13554         raise errors.OpPrereqError("Invalid parameter 'nics'",
13555                                    errors.ECODE_INVAL)
13556       if not isinstance(self.op.disks, list):
13557         raise errors.OpPrereqError("Invalid parameter 'disks'",
13558                                    errors.ECODE_INVAL)
13559       for row in self.op.disks:
13560         if (not isinstance(row, dict) or
13561             constants.IDISK_SIZE not in row or
13562             not isinstance(row[constants.IDISK_SIZE], int) or
13563             constants.IDISK_MODE not in row or
13564             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13565           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13566                                      " parameter", errors.ECODE_INVAL)
13567       if self.op.hypervisor is None:
13568         self.op.hypervisor = self.cfg.GetHypervisorType()
13569     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13570       fname = _ExpandInstanceName(self.cfg, self.op.name)
13571       self.op.name = fname
13572       self.relocate_from = \
13573           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13574     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13575                           constants.IALLOCATOR_MODE_NODE_EVAC):
13576       if not self.op.instances:
13577         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13578       self.op.instances = _GetWantedInstances(self, self.op.instances)
13579     else:
13580       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13581                                  self.op.mode, errors.ECODE_INVAL)
13582
13583     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13584       if self.op.allocator is None:
13585         raise errors.OpPrereqError("Missing allocator name",
13586                                    errors.ECODE_INVAL)
13587     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13588       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13589                                  self.op.direction, errors.ECODE_INVAL)
13590
13591   def Exec(self, feedback_fn):
13592     """Run the allocator test.
13593
13594     """
13595     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13596       ial = IAllocator(self.cfg, self.rpc,
13597                        mode=self.op.mode,
13598                        name=self.op.name,
13599                        memory=self.op.memory,
13600                        disks=self.op.disks,
13601                        disk_template=self.op.disk_template,
13602                        os=self.op.os,
13603                        tags=self.op.tags,
13604                        nics=self.op.nics,
13605                        vcpus=self.op.vcpus,
13606                        hypervisor=self.op.hypervisor,
13607                        )
13608     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13609       ial = IAllocator(self.cfg, self.rpc,
13610                        mode=self.op.mode,
13611                        name=self.op.name,
13612                        relocate_from=list(self.relocate_from),
13613                        )
13614     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13615       ial = IAllocator(self.cfg, self.rpc,
13616                        mode=self.op.mode,
13617                        instances=self.op.instances,
13618                        target_groups=self.op.target_groups)
13619     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13620       ial = IAllocator(self.cfg, self.rpc,
13621                        mode=self.op.mode,
13622                        instances=self.op.instances,
13623                        evac_mode=self.op.evac_mode)
13624     else:
13625       raise errors.ProgrammerError("Uncatched mode %s in"
13626                                    " LUTestAllocator.Exec", self.op.mode)
13627
13628     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13629       result = ial.in_text
13630     else:
13631       ial.Run(self.op.allocator, validate=False)
13632       result = ial.out_text
13633     return result
13634
13635
13636 #: Query type implementations
13637 _QUERY_IMPL = {
13638   constants.QR_INSTANCE: _InstanceQuery,
13639   constants.QR_NODE: _NodeQuery,
13640   constants.QR_GROUP: _GroupQuery,
13641   constants.QR_OS: _OsQuery,
13642   }
13643
13644 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13645
13646
13647 def _GetQueryImplementation(name):
13648   """Returns the implemtnation for a query type.
13649
13650   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13651
13652   """
13653   try:
13654     return _QUERY_IMPL[name]
13655   except KeyError:
13656     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13657                                errors.ECODE_INVAL)