code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60 from ganeti import ht
  61
  62 import ganeti.masterd.instance # pylint: disable-msg=W0611
  63
  64
  65 def _SupportsOob(cfg, node):
  66   """Tells if node supports OOB.
  67
  68   @type cfg: L{config.ConfigWriter}
  69   @param cfg: The cluster configuration
  70   @type node: L{objects.Node}
  71   @param node: The node
  72   @return: The OOB script if supported or an empty string otherwise
  73
  74   """
  75   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     self.context = context
 135     self.rpc = rpc
 136     # Dicts used to declare locking needs to mcpu
 137     self.needed_locks = None
 138     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 139     self.add_locks = {}
 140     self.remove_locks = {}
 141     # Used to force good behavior when calling helper functions
 142     self.recalculate_locks = {}
 143     # logging
 144     self.Log = processor.Log # pylint: disable-msg=C0103
 145     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 146     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 147     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 148     # support for dry-run
 149     self.dry_run_result = None
 150     # support for generic debug attribute
 151     if (not hasattr(self.op, "debug_level") or
 152         not isinstance(self.op.debug_level, int)):
 153       self.op.debug_level = 0
 154
 155     # Tasklets
 156     self.tasklets = None
 157
 158     # Validate opcode parameters and set defaults
 159     self.op.Validate(True)
 160
 161     self.CheckArguments()
 162
 163   def CheckArguments(self):
 164     """Check syntactic validity for the opcode arguments.
 165
 166     This method is for doing a simple syntactic check and ensure
 167     validity of opcode parameters, without any cluster-related
 168     checks. While the same can be accomplished in ExpandNames and/or
 169     CheckPrereq, doing these separate is better because:
 170
 171       - ExpandNames is left as as purely a lock-related function
 172       - CheckPrereq is run after we have acquired locks (and possible
 173         waited for them)
 174
 175     The function is allowed to change the self.op attribute so that
 176     later methods can no longer worry about missing parameters.
 177
 178     """
 179     pass
 180
 181   def ExpandNames(self):
 182     """Expand names for this LU.
 183
 184     This method is called before starting to execute the opcode, and it should
 185     update all the parameters of the opcode to their canonical form (e.g. a
 186     short node name must be fully expanded after this method has successfully
 187     completed). This way locking, hooks, logging, etc. can work correctly.
 188
 189     LUs which implement this method must also populate the self.needed_locks
 190     member, as a dict with lock levels as keys, and a list of needed lock names
 191     as values. Rules:
 192
 193       - use an empty dict if you don't need any lock
 194       - if you don't need any lock at a particular level omit that level
 195       - don't put anything for the BGL level
 196       - if you want all locks at a level use locking.ALL_SET as a value
 197
 198     If you need to share locks (rather than acquire them exclusively) at one
 199     level you can modify self.share_locks, setting a true value (usually 1) for
 200     that level. By default locks are not shared.
 201
 202     This function can also define a list of tasklets, which then will be
 203     executed in order instead of the usual LU-level CheckPrereq and Exec
 204     functions, if those are not defined by the LU.
 205
 206     Examples::
 207
 208       # Acquire all nodes and one instance
 209       self.needed_locks = {
 210         locking.LEVEL_NODE: locking.ALL_SET,
 211         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 212       }
 213       # Acquire just two nodes
 214       self.needed_locks = {
 215         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 216       }
 217       # Acquire no locks
 218       self.needed_locks = {} # No, you can't leave it to the default value None
 219
 220     """
 221     # The implementation of this method is mandatory only if the new LU is
 222     # concurrent, so that old LUs don't need to be changed all at the same
 223     # time.
 224     if self.REQ_BGL:
 225       self.needed_locks = {} # Exclusive LUs don't need locks.
 226     else:
 227       raise NotImplementedError
 228
 229   def DeclareLocks(self, level):
 230     """Declare LU locking needs for a level
 231
 232     While most LUs can just declare their locking needs at ExpandNames time,
 233     sometimes there's the need to calculate some locks after having acquired
 234     the ones before. This function is called just before acquiring locks at a
 235     particular level, but after acquiring the ones at lower levels, and permits
 236     such calculations. It can be used to modify self.needed_locks, and by
 237     default it does nothing.
 238
 239     This function is only called if you have something already set in
 240     self.needed_locks for the level.
 241
 242     @param level: Locking level which is going to be locked
 243     @type level: member of ganeti.locking.LEVELS
 244
 245     """
 246
 247   def CheckPrereq(self):
 248     """Check prerequisites for this LU.
 249
 250     This method should check that the prerequisites for the execution
 251     of this LU are fulfilled. It can do internode communication, but
 252     it should be idempotent - no cluster or system changes are
 253     allowed.
 254
 255     The method should raise errors.OpPrereqError in case something is
 256     not fulfilled. Its return value is ignored.
 257
 258     This method should also update all the parameters of the opcode to
 259     their canonical form if it hasn't been done by ExpandNames before.
 260
 261     """
 262     if self.tasklets is not None:
 263       for (idx, tl) in enumerate(self.tasklets):
 264         logging.debug("Checking prerequisites for tasklet %s/%s",
 265                       idx + 1, len(self.tasklets))
 266         tl.CheckPrereq()
 267     else:
 268       pass
 269
 270   def Exec(self, feedback_fn):
 271     """Execute the LU.
 272
 273     This method should implement the actual work. It should raise
 274     errors.OpExecError for failures that are somewhat dealt with in
 275     code, or expected.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 281         tl.Exec(feedback_fn)
 282     else:
 283       raise NotImplementedError
 284
 285   def BuildHooksEnv(self):
 286     """Build hooks environment for this LU.
 287
 288     @rtype: dict
 289     @return: Dictionary containing the environment that will be used for
 290       running the hooks for this LU. The keys of the dict must not be prefixed
 291       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 292       will extend the environment with additional variables. If no environment
 293       should be defined, an empty dictionary should be returned (not C{None}).
 294     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 295       will not be called.
 296
 297     """
 298     raise NotImplementedError
 299
 300   def BuildHooksNodes(self):
 301     """Build list of nodes to run LU's hooks.
 302
 303     @rtype: tuple; (list, list)
 304     @return: Tuple containing a list of node names on which the hook
 305       should run before the execution and a list of node names on which the
 306       hook should run after the execution. No nodes should be returned as an
 307       empty list (and not None).
 308     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 309       will not be called.
 310
 311     """
 312     raise NotImplementedError
 313
 314   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 315     """Notify the LU about the results of its hooks.
 316
 317     This method is called every time a hooks phase is executed, and notifies
 318     the Logical Unit about the hooks' result. The LU can then use it to alter
 319     its result based on the hooks.  By default the method does nothing and the
 320     previous result is passed back unchanged but any LU can define it if it
 321     wants to use the local cluster hook-scripts somehow.
 322
 323     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 324         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 325     @param hook_results: the results of the multi-node hooks rpc call
 326     @param feedback_fn: function used send feedback back to the caller
 327     @param lu_result: the previous Exec result this LU had, or None
 328         in the PRE phase
 329     @return: the new Exec result, based on the previous result
 330         and hook results
 331
 332     """
 333     # API must be kept, thus we ignore the unused argument and could
 334     # be a function warnings
 335     # pylint: disable-msg=W0613,R0201
 336     return lu_result
 337
 338   def _ExpandAndLockInstance(self):
 339     """Helper function to expand and lock an instance.
 340
 341     Many LUs that work on an instance take its name in self.op.instance_name
 342     and need to expand it and then declare the expanded name for locking. This
 343     function does it, and then updates self.op.instance_name to the expanded
 344     name. It also initializes needed_locks as a dict, if this hasn't been done
 345     before.
 346
 347     """
 348     if self.needed_locks is None:
 349       self.needed_locks = {}
 350     else:
 351       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 352         "_ExpandAndLockInstance called with instance-level locks set"
 353     self.op.instance_name = _ExpandInstanceName(self.cfg,
 354                                                 self.op.instance_name)
 355     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 356
 357   def _LockInstancesNodes(self, primary_only=False):
 358     """Helper function to declare instances' nodes for locking.
 359
 360     This function should be called after locking one or more instances to lock
 361     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 362     with all primary or secondary nodes for instances already locked and
 363     present in self.needed_locks[locking.LEVEL_INSTANCE].
 364
 365     It should be called from DeclareLocks, and for safety only works if
 366     self.recalculate_locks[locking.LEVEL_NODE] is set.
 367
 368     In the future it may grow parameters to just lock some instance's nodes, or
 369     to just lock primaries or secondary nodes, if needed.
 370
 371     If should be called in DeclareLocks in a way similar to::
 372
 373       if level == locking.LEVEL_NODE:
 374         self._LockInstancesNodes()
 375
 376     @type primary_only: boolean
 377     @param primary_only: only lock primary nodes of locked instances
 378
 379     """
 380     assert locking.LEVEL_NODE in self.recalculate_locks, \
 381       "_LockInstancesNodes helper function called with no nodes to recalculate"
 382
 383     # TODO: check if we're really been called with the instance locks held
 384
 385     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 386     # future we might want to have different behaviors depending on the value
 387     # of self.recalculate_locks[locking.LEVEL_NODE]
 388     wanted_nodes = []
 389     for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
 390       instance = self.context.cfg.GetInstanceInfo(instance_name)
 391       wanted_nodes.append(instance.primary_node)
 392       if not primary_only:
 393         wanted_nodes.extend(instance.secondary_nodes)
 394
 395     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 396       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 397     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 398       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 399
 400     del self.recalculate_locks[locking.LEVEL_NODE]
 401
 402
 403 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 404   """Simple LU which runs no hooks.
 405
 406   This LU is intended as a parent for other LogicalUnits which will
 407   run no hooks, in order to reduce duplicate code.
 408
 409   """
 410   HPATH = None
 411   HTYPE = None
 412
 413   def BuildHooksEnv(self):
 414     """Empty BuildHooksEnv for NoHooksLu.
 415
 416     This just raises an error.
 417
 418     """
 419     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 420
 421   def BuildHooksNodes(self):
 422     """Empty BuildHooksNodes for NoHooksLU.
 423
 424     """
 425     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 426
 427
 428 class Tasklet:
 429   """Tasklet base class.
 430
 431   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 432   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 433   tasklets know nothing about locks.
 434
 435   Subclasses must follow these rules:
 436     - Implement CheckPrereq
 437     - Implement Exec
 438
 439   """
 440   def __init__(self, lu):
 441     self.lu = lu
 442
 443     # Shortcuts
 444     self.cfg = lu.cfg
 445     self.rpc = lu.rpc
 446
 447   def CheckPrereq(self):
 448     """Check prerequisites for this tasklets.
 449
 450     This method should check whether the prerequisites for the execution of
 451     this tasklet are fulfilled. It can do internode communication, but it
 452     should be idempotent - no cluster or system changes are allowed.
 453
 454     The method should raise errors.OpPrereqError in case something is not
 455     fulfilled. Its return value is ignored.
 456
 457     This method should also update all parameters to their canonical form if it
 458     hasn't been done before.
 459
 460     """
 461     pass
 462
 463   def Exec(self, feedback_fn):
 464     """Execute the tasklet.
 465
 466     This method should implement the actual work. It should raise
 467     errors.OpExecError for failures that are somewhat dealt with in code, or
 468     expected.
 469
 470     """
 471     raise NotImplementedError
 472
 473
 474 class _QueryBase:
 475   """Base for query utility classes.
 476
 477   """
 478   #: Attribute holding field definitions
 479   FIELDS = None
 480
 481   def __init__(self, filter_, fields, use_locking):
 482     """Initializes this class.
 483
 484     """
 485     self.use_locking = use_locking
 486
 487     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 488                              namefield="name")
 489     self.requested_data = self.query.RequestedData()
 490     self.names = self.query.RequestedNames()
 491
 492     # Sort only if no names were requested
 493     self.sort_by_name = not self.names
 494
 495     self.do_locking = None
 496     self.wanted = None
 497
 498   def _GetNames(self, lu, all_names, lock_level):
 499     """Helper function to determine names asked for in the query.
 500
 501     """
 502     if self.do_locking:
 503       names = lu.glm.list_owned(lock_level)
 504     else:
 505       names = all_names
 506
 507     if self.wanted == locking.ALL_SET:
 508       assert not self.names
 509       # caller didn't specify names, so ordering is not important
 510       return utils.NiceSort(names)
 511
 512     # caller specified names and we must keep the same order
 513     assert self.names
 514     assert not self.do_locking or lu.glm.is_owned(lock_level)
 515
 516     missing = set(self.wanted).difference(names)
 517     if missing:
 518       raise errors.OpExecError("Some items were removed before retrieving"
 519                                " their data: %s" % missing)
 520
 521     # Return expanded names
 522     return self.wanted
 523
 524   def ExpandNames(self, lu):
 525     """Expand names for this query.
 526
 527     See L{LogicalUnit.ExpandNames}.
 528
 529     """
 530     raise NotImplementedError()
 531
 532   def DeclareLocks(self, lu, level):
 533     """Declare locks for this query.
 534
 535     See L{LogicalUnit.DeclareLocks}.
 536
 537     """
 538     raise NotImplementedError()
 539
 540   def _GetQueryData(self, lu):
 541     """Collects all data for this query.
 542
 543     @return: Query data object
 544
 545     """
 546     raise NotImplementedError()
 547
 548   def NewStyleQuery(self, lu):
 549     """Collect data and execute query.
 550
 551     """
 552     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 553                                   sort_by_name=self.sort_by_name)
 554
 555   def OldStyleQuery(self, lu):
 556     """Collect data and execute query.
 557
 558     """
 559     return self.query.OldStyleQuery(self._GetQueryData(lu),
 560                                     sort_by_name=self.sort_by_name)
 561
 562
 563 def _GetWantedNodes(lu, nodes):
 564   """Returns list of checked and expanded node names.
 565
 566   @type lu: L{LogicalUnit}
 567   @param lu: the logical unit on whose behalf we execute
 568   @type nodes: list
 569   @param nodes: list of node names or None for all nodes
 570   @rtype: list
 571   @return: the list of nodes, sorted
 572   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 573
 574   """
 575   if nodes:
 576     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 577
 578   return utils.NiceSort(lu.cfg.GetNodeList())
 579
 580
 581 def _GetWantedInstances(lu, instances):
 582   """Returns list of checked and expanded instance names.
 583
 584   @type lu: L{LogicalUnit}
 585   @param lu: the logical unit on whose behalf we execute
 586   @type instances: list
 587   @param instances: list of instance names or None for all instances
 588   @rtype: list
 589   @return: the list of instances, sorted
 590   @raise errors.OpPrereqError: if the instances parameter is wrong type
 591   @raise errors.OpPrereqError: if any of the passed instances is not found
 592
 593   """
 594   if instances:
 595     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 596   else:
 597     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 598   return wanted
 599
 600
 601 def _GetUpdatedParams(old_params, update_dict,
 602                       use_default=True, use_none=False):
 603   """Return the new version of a parameter dictionary.
 604
 605   @type old_params: dict
 606   @param old_params: old parameters
 607   @type update_dict: dict
 608   @param update_dict: dict containing new parameter values, or
 609       constants.VALUE_DEFAULT to reset the parameter to its default
 610       value
 611   @param use_default: boolean
 612   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 613       values as 'to be deleted' values
 614   @param use_none: boolean
 615   @type use_none: whether to recognise C{None} values as 'to be
 616       deleted' values
 617   @rtype: dict
 618   @return: the new parameter dictionary
 619
 620   """
 621   params_copy = copy.deepcopy(old_params)
 622   for key, val in update_dict.iteritems():
 623     if ((use_default and val == constants.VALUE_DEFAULT) or
 624         (use_none and val is None)):
 625       try:
 626         del params_copy[key]
 627       except KeyError:
 628         pass
 629     else:
 630       params_copy[key] = val
 631   return params_copy
 632
 633
 634 def _ReleaseLocks(lu, level, names=None, keep=None):
 635   """Releases locks owned by an LU.
 636
 637   @type lu: L{LogicalUnit}
 638   @param level: Lock level
 639   @type names: list or None
 640   @param names: Names of locks to release
 641   @type keep: list or None
 642   @param keep: Names of locks to retain
 643
 644   """
 645   assert not (keep is not None and names is not None), \
 646          "Only one of the 'names' and the 'keep' parameters can be given"
 647
 648   if names is not None:
 649     should_release = names.__contains__
 650   elif keep:
 651     should_release = lambda name: name not in keep
 652   else:
 653     should_release = None
 654
 655   if should_release:
 656     retain = []
 657     release = []
 658
 659     # Determine which locks to release
 660     for name in lu.glm.list_owned(level):
 661       if should_release(name):
 662         release.append(name)
 663       else:
 664         retain.append(name)
 665
 666     assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
 667
 668     # Release just some locks
 669     lu.glm.release(level, names=release)
 670
 671     assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
 672   else:
 673     # Release everything
 674     lu.glm.release(level)
 675
 676     assert not lu.glm.is_owned(level), "No locks should be owned"
 677
 678
 679 def _RunPostHook(lu, node_name):
 680   """Runs the post-hook for an opcode on a single node.
 681
 682   """
 683   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 684   try:
 685     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 686   except:
 687     # pylint: disable-msg=W0702
 688     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 689
 690
 691 def _CheckOutputFields(static, dynamic, selected):
 692   """Checks whether all selected fields are valid.
 693
 694   @type static: L{utils.FieldSet}
 695   @param static: static fields set
 696   @type dynamic: L{utils.FieldSet}
 697   @param dynamic: dynamic fields set
 698
 699   """
 700   f = utils.FieldSet()
 701   f.Extend(static)
 702   f.Extend(dynamic)
 703
 704   delta = f.NonMatching(selected)
 705   if delta:
 706     raise errors.OpPrereqError("Unknown output fields selected: %s"
 707                                % ",".join(delta), errors.ECODE_INVAL)
 708
 709
 710 def _CheckGlobalHvParams(params):
 711   """Validates that given hypervisor params are not global ones.
 712
 713   This will ensure that instances don't get customised versions of
 714   global params.
 715
 716   """
 717   used_globals = constants.HVC_GLOBALS.intersection(params)
 718   if used_globals:
 719     msg = ("The following hypervisor parameters are global and cannot"
 720            " be customized at instance level, please modify them at"
 721            " cluster level: %s" % utils.CommaJoin(used_globals))
 722     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 723
 724
 725 def _CheckNodeOnline(lu, node, msg=None):
 726   """Ensure that a given node is online.
 727
 728   @param lu: the LU on behalf of which we make the check
 729   @param node: the node to check
 730   @param msg: if passed, should be a message to replace the default one
 731   @raise errors.OpPrereqError: if the node is offline
 732
 733   """
 734   if msg is None:
 735     msg = "Can't use offline node"
 736   if lu.cfg.GetNodeInfo(node).offline:
 737     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 738
 739
 740 def _CheckNodeNotDrained(lu, node):
 741   """Ensure that a given node is not drained.
 742
 743   @param lu: the LU on behalf of which we make the check
 744   @param node: the node to check
 745   @raise errors.OpPrereqError: if the node is drained
 746
 747   """
 748   if lu.cfg.GetNodeInfo(node).drained:
 749     raise errors.OpPrereqError("Can't use drained node %s" % node,
 750                                errors.ECODE_STATE)
 751
 752
 753 def _CheckNodeVmCapable(lu, node):
 754   """Ensure that a given node is vm capable.
 755
 756   @param lu: the LU on behalf of which we make the check
 757   @param node: the node to check
 758   @raise errors.OpPrereqError: if the node is not vm capable
 759
 760   """
 761   if not lu.cfg.GetNodeInfo(node).vm_capable:
 762     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 763                                errors.ECODE_STATE)
 764
 765
 766 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 767   """Ensure that a node supports a given OS.
 768
 769   @param lu: the LU on behalf of which we make the check
 770   @param node: the node to check
 771   @param os_name: the OS to query about
 772   @param force_variant: whether to ignore variant errors
 773   @raise errors.OpPrereqError: if the node is not supporting the OS
 774
 775   """
 776   result = lu.rpc.call_os_get(node, os_name)
 777   result.Raise("OS '%s' not in supported OS list for node %s" %
 778                (os_name, node),
 779                prereq=True, ecode=errors.ECODE_INVAL)
 780   if not force_variant:
 781     _CheckOSVariant(result.payload, os_name)
 782
 783
 784 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 785   """Ensure that a node has the given secondary ip.
 786
 787   @type lu: L{LogicalUnit}
 788   @param lu: the LU on behalf of which we make the check
 789   @type node: string
 790   @param node: the node to check
 791   @type secondary_ip: string
 792   @param secondary_ip: the ip to check
 793   @type prereq: boolean
 794   @param prereq: whether to throw a prerequisite or an execute error
 795   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 796   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 797
 798   """
 799   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 800   result.Raise("Failure checking secondary ip on node %s" % node,
 801                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 802   if not result.payload:
 803     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 804            " please fix and re-run this command" % secondary_ip)
 805     if prereq:
 806       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 807     else:
 808       raise errors.OpExecError(msg)
 809
 810
 811 def _GetClusterDomainSecret():
 812   """Reads the cluster domain secret.
 813
 814   """
 815   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 816                                strict=True)
 817
 818
 819 def _CheckInstanceDown(lu, instance, reason):
 820   """Ensure that an instance is not running."""
 821   if instance.admin_up:
 822     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 823                                (instance.name, reason), errors.ECODE_STATE)
 824
 825   pnode = instance.primary_node
 826   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 827   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 828               prereq=True, ecode=errors.ECODE_ENVIRON)
 829
 830   if instance.name in ins_l.payload:
 831     raise errors.OpPrereqError("Instance %s is running, %s" %
 832                                (instance.name, reason), errors.ECODE_STATE)
 833
 834
 835 def _ExpandItemName(fn, name, kind):
 836   """Expand an item name.
 837
 838   @param fn: the function to use for expansion
 839   @param name: requested item name
 840   @param kind: text description ('Node' or 'Instance')
 841   @return: the resolved (full) name
 842   @raise errors.OpPrereqError: if the item is not found
 843
 844   """
 845   full_name = fn(name)
 846   if full_name is None:
 847     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 848                                errors.ECODE_NOENT)
 849   return full_name
 850
 851
 852 def _ExpandNodeName(cfg, name):
 853   """Wrapper over L{_ExpandItemName} for nodes."""
 854   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 855
 856
 857 def _ExpandInstanceName(cfg, name):
 858   """Wrapper over L{_ExpandItemName} for instance."""
 859   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 860
 861
 862 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 863                           memory, vcpus, nics, disk_template, disks,
 864                           bep, hvp, hypervisor_name, tags):
 865   """Builds instance related env variables for hooks
 866
 867   This builds the hook environment from individual variables.
 868
 869   @type name: string
 870   @param name: the name of the instance
 871   @type primary_node: string
 872   @param primary_node: the name of the instance's primary node
 873   @type secondary_nodes: list
 874   @param secondary_nodes: list of secondary nodes as strings
 875   @type os_type: string
 876   @param os_type: the name of the instance's OS
 877   @type status: boolean
 878   @param status: the should_run status of the instance
 879   @type memory: string
 880   @param memory: the memory size of the instance
 881   @type vcpus: string
 882   @param vcpus: the count of VCPUs the instance has
 883   @type nics: list
 884   @param nics: list of tuples (ip, mac, mode, link) representing
 885       the NICs the instance has
 886   @type disk_template: string
 887   @param disk_template: the disk template of the instance
 888   @type disks: list
 889   @param disks: the list of (size, mode) pairs
 890   @type bep: dict
 891   @param bep: the backend parameters for the instance
 892   @type hvp: dict
 893   @param hvp: the hypervisor parameters for the instance
 894   @type hypervisor_name: string
 895   @param hypervisor_name: the hypervisor for the instance
 896   @type tags: list
 897   @param tags: list of instance tags as strings
 898   @rtype: dict
 899   @return: the hook environment for this instance
 900
 901   """
 902   if status:
 903     str_status = "up"
 904   else:
 905     str_status = "down"
 906   env = {
 907     "OP_TARGET": name,
 908     "INSTANCE_NAME": name,
 909     "INSTANCE_PRIMARY": primary_node,
 910     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 911     "INSTANCE_OS_TYPE": os_type,
 912     "INSTANCE_STATUS": str_status,
 913     "INSTANCE_MEMORY": memory,
 914     "INSTANCE_VCPUS": vcpus,
 915     "INSTANCE_DISK_TEMPLATE": disk_template,
 916     "INSTANCE_HYPERVISOR": hypervisor_name,
 917   }
 918
 919   if nics:
 920     nic_count = len(nics)
 921     for idx, (ip, mac, mode, link) in enumerate(nics):
 922       if ip is None:
 923         ip = ""
 924       env["INSTANCE_NIC%d_IP" % idx] = ip
 925       env["INSTANCE_NIC%d_MAC" % idx] = mac
 926       env["INSTANCE_NIC%d_MODE" % idx] = mode
 927       env["INSTANCE_NIC%d_LINK" % idx] = link
 928       if mode == constants.NIC_MODE_BRIDGED:
 929         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 930   else:
 931     nic_count = 0
 932
 933   env["INSTANCE_NIC_COUNT"] = nic_count
 934
 935   if disks:
 936     disk_count = len(disks)
 937     for idx, (size, mode) in enumerate(disks):
 938       env["INSTANCE_DISK%d_SIZE" % idx] = size
 939       env["INSTANCE_DISK%d_MODE" % idx] = mode
 940   else:
 941     disk_count = 0
 942
 943   env["INSTANCE_DISK_COUNT"] = disk_count
 944
 945   if not tags:
 946     tags = []
 947
 948   env["INSTANCE_TAGS"] = " ".join(tags)
 949
 950   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 951     for key, value in source.items():
 952       env["INSTANCE_%s_%s" % (kind, key)] = value
 953
 954   return env
 955
 956
 957 def _NICListToTuple(lu, nics):
 958   """Build a list of nic information tuples.
 959
 960   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 961   value in LUInstanceQueryData.
 962
 963   @type lu:  L{LogicalUnit}
 964   @param lu: the logical unit on whose behalf we execute
 965   @type nics: list of L{objects.NIC}
 966   @param nics: list of nics to convert to hooks tuples
 967
 968   """
 969   hooks_nics = []
 970   cluster = lu.cfg.GetClusterInfo()
 971   for nic in nics:
 972     ip = nic.ip
 973     mac = nic.mac
 974     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 975     mode = filled_params[constants.NIC_MODE]
 976     link = filled_params[constants.NIC_LINK]
 977     hooks_nics.append((ip, mac, mode, link))
 978   return hooks_nics
 979
 980
 981 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 982   """Builds instance related env variables for hooks from an object.
 983
 984   @type lu: L{LogicalUnit}
 985   @param lu: the logical unit on whose behalf we execute
 986   @type instance: L{objects.Instance}
 987   @param instance: the instance for which we should build the
 988       environment
 989   @type override: dict
 990   @param override: dictionary with key/values that will override
 991       our values
 992   @rtype: dict
 993   @return: the hook environment dictionary
 994
 995   """
 996   cluster = lu.cfg.GetClusterInfo()
 997   bep = cluster.FillBE(instance)
 998   hvp = cluster.FillHV(instance)
 999   args = {
1000     'name': instance.name,
1001     'primary_node': instance.primary_node,
1002     'secondary_nodes': instance.secondary_nodes,
1003     'os_type': instance.os,
1004     'status': instance.admin_up,
1005     'memory': bep[constants.BE_MEMORY],
1006     'vcpus': bep[constants.BE_VCPUS],
1007     'nics': _NICListToTuple(lu, instance.nics),
1008     'disk_template': instance.disk_template,
1009     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1010     'bep': bep,
1011     'hvp': hvp,
1012     'hypervisor_name': instance.hypervisor,
1013     'tags': instance.tags,
1014   }
1015   if override:
1016     args.update(override)
1017   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1018
1019
1020 def _AdjustCandidatePool(lu, exceptions):
1021   """Adjust the candidate pool after node operations.
1022
1023   """
1024   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1025   if mod_list:
1026     lu.LogInfo("Promoted nodes to master candidate role: %s",
1027                utils.CommaJoin(node.name for node in mod_list))
1028     for name in mod_list:
1029       lu.context.ReaddNode(name)
1030   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1031   if mc_now > mc_max:
1032     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1033                (mc_now, mc_max))
1034
1035
1036 def _DecideSelfPromotion(lu, exceptions=None):
1037   """Decide whether I should promote myself as a master candidate.
1038
1039   """
1040   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1041   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1042   # the new node will increase mc_max with one, so:
1043   mc_should = min(mc_should + 1, cp_size)
1044   return mc_now < mc_should
1045
1046
1047 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1048   """Check that the brigdes needed by a list of nics exist.
1049
1050   """
1051   cluster = lu.cfg.GetClusterInfo()
1052   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1053   brlist = [params[constants.NIC_LINK] for params in paramslist
1054             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1055   if brlist:
1056     result = lu.rpc.call_bridges_exist(target_node, brlist)
1057     result.Raise("Error checking bridges on destination node '%s'" %
1058                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1059
1060
1061 def _CheckInstanceBridgesExist(lu, instance, node=None):
1062   """Check that the brigdes needed by an instance exist.
1063
1064   """
1065   if node is None:
1066     node = instance.primary_node
1067   _CheckNicsBridgesExist(lu, instance.nics, node)
1068
1069
1070 def _CheckOSVariant(os_obj, name):
1071   """Check whether an OS name conforms to the os variants specification.
1072
1073   @type os_obj: L{objects.OS}
1074   @param os_obj: OS object to check
1075   @type name: string
1076   @param name: OS name passed by the user, to check for validity
1077
1078   """
1079   if not os_obj.supported_variants:
1080     return
1081   variant = objects.OS.GetVariant(name)
1082   if not variant:
1083     raise errors.OpPrereqError("OS name must include a variant",
1084                                errors.ECODE_INVAL)
1085
1086   if variant not in os_obj.supported_variants:
1087     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1088
1089
1090 def _GetNodeInstancesInner(cfg, fn):
1091   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1092
1093
1094 def _GetNodeInstances(cfg, node_name):
1095   """Returns a list of all primary and secondary instances on a node.
1096
1097   """
1098
1099   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1100
1101
1102 def _GetNodePrimaryInstances(cfg, node_name):
1103   """Returns primary instances on a node.
1104
1105   """
1106   return _GetNodeInstancesInner(cfg,
1107                                 lambda inst: node_name == inst.primary_node)
1108
1109
1110 def _GetNodeSecondaryInstances(cfg, node_name):
1111   """Returns secondary instances on a node.
1112
1113   """
1114   return _GetNodeInstancesInner(cfg,
1115                                 lambda inst: node_name in inst.secondary_nodes)
1116
1117
1118 def _GetStorageTypeArgs(cfg, storage_type):
1119   """Returns the arguments for a storage type.
1120
1121   """
1122   # Special case for file storage
1123   if storage_type == constants.ST_FILE:
1124     # storage.FileStorage wants a list of storage directories
1125     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1126
1127   return []
1128
1129
1130 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1131   faulty = []
1132
1133   for dev in instance.disks:
1134     cfg.SetDiskID(dev, node_name)
1135
1136   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1137   result.Raise("Failed to get disk status from node %s" % node_name,
1138                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1139
1140   for idx, bdev_status in enumerate(result.payload):
1141     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1142       faulty.append(idx)
1143
1144   return faulty
1145
1146
1147 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1148   """Check the sanity of iallocator and node arguments and use the
1149   cluster-wide iallocator if appropriate.
1150
1151   Check that at most one of (iallocator, node) is specified. If none is
1152   specified, then the LU's opcode's iallocator slot is filled with the
1153   cluster-wide default iallocator.
1154
1155   @type iallocator_slot: string
1156   @param iallocator_slot: the name of the opcode iallocator slot
1157   @type node_slot: string
1158   @param node_slot: the name of the opcode target node slot
1159
1160   """
1161   node = getattr(lu.op, node_slot, None)
1162   iallocator = getattr(lu.op, iallocator_slot, None)
1163
1164   if node is not None and iallocator is not None:
1165     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1166                                errors.ECODE_INVAL)
1167   elif node is None and iallocator is None:
1168     default_iallocator = lu.cfg.GetDefaultIAllocator()
1169     if default_iallocator:
1170       setattr(lu.op, iallocator_slot, default_iallocator)
1171     else:
1172       raise errors.OpPrereqError("No iallocator or node given and no"
1173                                  " cluster-wide default iallocator found;"
1174                                  " please specify either an iallocator or a"
1175                                  " node, or set a cluster-wide default"
1176                                  " iallocator")
1177
1178
1179 class LUClusterPostInit(LogicalUnit):
1180   """Logical unit for running hooks after cluster initialization.
1181
1182   """
1183   HPATH = "cluster-init"
1184   HTYPE = constants.HTYPE_CLUSTER
1185
1186   def BuildHooksEnv(self):
1187     """Build hooks env.
1188
1189     """
1190     return {
1191       "OP_TARGET": self.cfg.GetClusterName(),
1192       }
1193
1194   def BuildHooksNodes(self):
1195     """Build hooks nodes.
1196
1197     """
1198     return ([], [self.cfg.GetMasterNode()])
1199
1200   def Exec(self, feedback_fn):
1201     """Nothing to do.
1202
1203     """
1204     return True
1205
1206
1207 class LUClusterDestroy(LogicalUnit):
1208   """Logical unit for destroying the cluster.
1209
1210   """
1211   HPATH = "cluster-destroy"
1212   HTYPE = constants.HTYPE_CLUSTER
1213
1214   def BuildHooksEnv(self):
1215     """Build hooks env.
1216
1217     """
1218     return {
1219       "OP_TARGET": self.cfg.GetClusterName(),
1220       }
1221
1222   def BuildHooksNodes(self):
1223     """Build hooks nodes.
1224
1225     """
1226     return ([], [])
1227
1228   def CheckPrereq(self):
1229     """Check prerequisites.
1230
1231     This checks whether the cluster is empty.
1232
1233     Any errors are signaled by raising errors.OpPrereqError.
1234
1235     """
1236     master = self.cfg.GetMasterNode()
1237
1238     nodelist = self.cfg.GetNodeList()
1239     if len(nodelist) != 1 or nodelist[0] != master:
1240       raise errors.OpPrereqError("There are still %d node(s) in"
1241                                  " this cluster." % (len(nodelist) - 1),
1242                                  errors.ECODE_INVAL)
1243     instancelist = self.cfg.GetInstanceList()
1244     if instancelist:
1245       raise errors.OpPrereqError("There are still %d instance(s) in"
1246                                  " this cluster." % len(instancelist),
1247                                  errors.ECODE_INVAL)
1248
1249   def Exec(self, feedback_fn):
1250     """Destroys the cluster.
1251
1252     """
1253     master = self.cfg.GetMasterNode()
1254
1255     # Run post hooks on master node before it's removed
1256     _RunPostHook(self, master)
1257
1258     result = self.rpc.call_node_stop_master(master, False)
1259     result.Raise("Could not disable the master role")
1260
1261     return master
1262
1263
1264 def _VerifyCertificate(filename):
1265   """Verifies a certificate for L{LUClusterVerifyConfig}.
1266
1267   @type filename: string
1268   @param filename: Path to PEM file
1269
1270   """
1271   try:
1272     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1273                                            utils.ReadFile(filename))
1274   except Exception, err: # pylint: disable-msg=W0703
1275     return (LUClusterVerifyConfig.ETYPE_ERROR,
1276             "Failed to load X509 certificate %s: %s" % (filename, err))
1277
1278   (errcode, msg) = \
1279     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1280                                 constants.SSL_CERT_EXPIRATION_ERROR)
1281
1282   if msg:
1283     fnamemsg = "While verifying %s: %s" % (filename, msg)
1284   else:
1285     fnamemsg = None
1286
1287   if errcode is None:
1288     return (None, fnamemsg)
1289   elif errcode == utils.CERT_WARNING:
1290     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1291   elif errcode == utils.CERT_ERROR:
1292     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1293
1294   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1295
1296
1297 def _GetAllHypervisorParameters(cluster, instances):
1298   """Compute the set of all hypervisor parameters.
1299
1300   @type cluster: L{objects.Cluster}
1301   @param cluster: the cluster object
1302   @param instances: list of L{objects.Instance}
1303   @param instances: additional instances from which to obtain parameters
1304   @rtype: list of (origin, hypervisor, parameters)
1305   @return: a list with all parameters found, indicating the hypervisor they
1306        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1307
1308   """
1309   hvp_data = []
1310
1311   for hv_name in cluster.enabled_hypervisors:
1312     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1313
1314   for os_name, os_hvp in cluster.os_hvp.items():
1315     for hv_name, hv_params in os_hvp.items():
1316       if hv_params:
1317         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1318         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1319
1320   # TODO: collapse identical parameter values in a single one
1321   for instance in instances:
1322     if instance.hvparams:
1323       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1324                        cluster.FillHV(instance)))
1325
1326   return hvp_data
1327
1328
1329 class _VerifyErrors(object):
1330   """Mix-in for cluster/group verify LUs.
1331
1332   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1333   self.op and self._feedback_fn to be available.)
1334
1335   """
1336   TCLUSTER = "cluster"
1337   TNODE = "node"
1338   TINSTANCE = "instance"
1339
1340   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1341   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1342   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1343   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1344   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1345   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1346   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1347   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1348   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1349   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1350   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1351   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1352   ENODEDRBD = (TNODE, "ENODEDRBD")
1353   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1354   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1355   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1356   ENODEHV = (TNODE, "ENODEHV")
1357   ENODELVM = (TNODE, "ENODELVM")
1358   ENODEN1 = (TNODE, "ENODEN1")
1359   ENODENET = (TNODE, "ENODENET")
1360   ENODEOS = (TNODE, "ENODEOS")
1361   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1362   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1363   ENODERPC = (TNODE, "ENODERPC")
1364   ENODESSH = (TNODE, "ENODESSH")
1365   ENODEVERSION = (TNODE, "ENODEVERSION")
1366   ENODESETUP = (TNODE, "ENODESETUP")
1367   ENODETIME = (TNODE, "ENODETIME")
1368   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1369
1370   ETYPE_FIELD = "code"
1371   ETYPE_ERROR = "ERROR"
1372   ETYPE_WARNING = "WARNING"
1373
1374   def _Error(self, ecode, item, msg, *args, **kwargs):
1375     """Format an error message.
1376
1377     Based on the opcode's error_codes parameter, either format a
1378     parseable error code, or a simpler error string.
1379
1380     This must be called only from Exec and functions called from Exec.
1381
1382     """
1383     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1384     itype, etxt = ecode
1385     # first complete the msg
1386     if args:
1387       msg = msg % args
1388     # then format the whole message
1389     if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1390       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1391     else:
1392       if item:
1393         item = " " + item
1394       else:
1395         item = ""
1396       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1397     # and finally report it via the feedback_fn
1398     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1399
1400   def _ErrorIf(self, cond, *args, **kwargs):
1401     """Log an error message if the passed condition is True.
1402
1403     """
1404     cond = (bool(cond)
1405             or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1406     if cond:
1407       self._Error(*args, **kwargs)
1408     # do not mark the operation as failed for WARN cases only
1409     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1410       self.bad = self.bad or cond
1411
1412
1413 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1414   """Verifies the cluster config.
1415
1416   """
1417   REQ_BGL = True
1418
1419   def _VerifyHVP(self, hvp_data):
1420     """Verifies locally the syntax of the hypervisor parameters.
1421
1422     """
1423     for item, hv_name, hv_params in hvp_data:
1424       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1425              (item, hv_name))
1426       try:
1427         hv_class = hypervisor.GetHypervisor(hv_name)
1428         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1429         hv_class.CheckParameterSyntax(hv_params)
1430       except errors.GenericError, err:
1431         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1432
1433   def ExpandNames(self):
1434     # Information can be safely retrieved as the BGL is acquired in exclusive
1435     # mode
1436     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1437     self.all_node_info = self.cfg.GetAllNodesInfo()
1438     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1439     self.needed_locks = {}
1440
1441   def Exec(self, feedback_fn):
1442     """Verify integrity of cluster, performing various test on nodes.
1443
1444     """
1445     self.bad = False
1446     self._feedback_fn = feedback_fn
1447
1448     feedback_fn("* Verifying cluster config")
1449
1450     for msg in self.cfg.VerifyConfig():
1451       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1452
1453     feedback_fn("* Verifying cluster certificate files")
1454
1455     for cert_filename in constants.ALL_CERT_FILES:
1456       (errcode, msg) = _VerifyCertificate(cert_filename)
1457       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1458
1459     feedback_fn("* Verifying hypervisor parameters")
1460
1461     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1462                                                 self.all_inst_info.values()))
1463
1464     feedback_fn("* Verifying all nodes belong to an existing group")
1465
1466     # We do this verification here because, should this bogus circumstance
1467     # occur, it would never be caught by VerifyGroup, which only acts on
1468     # nodes/instances reachable from existing node groups.
1469
1470     dangling_nodes = set(node.name for node in self.all_node_info.values()
1471                          if node.group not in self.all_group_info)
1472
1473     dangling_instances = {}
1474     no_node_instances = []
1475
1476     for inst in self.all_inst_info.values():
1477       if inst.primary_node in dangling_nodes:
1478         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1479       elif inst.primary_node not in self.all_node_info:
1480         no_node_instances.append(inst.name)
1481
1482     pretty_dangling = [
1483         "%s (%s)" %
1484         (node.name,
1485          utils.CommaJoin(dangling_instances.get(node.name,
1486                                                 ["no instances"])))
1487         for node in dangling_nodes]
1488
1489     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1490                   "the following nodes (and their instances) belong to a non"
1491                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1492
1493     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1494                   "the following instances have a non-existing primary-node:"
1495                   " %s", utils.CommaJoin(no_node_instances))
1496
1497     return (not self.bad, [g.name for g in self.all_group_info.values()])
1498
1499
1500 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1501   """Verifies the status of a node group.
1502
1503   """
1504   HPATH = "cluster-verify"
1505   HTYPE = constants.HTYPE_CLUSTER
1506   REQ_BGL = False
1507
1508   _HOOKS_INDENT_RE = re.compile("^", re.M)
1509
1510   class NodeImage(object):
1511     """A class representing the logical and physical status of a node.
1512
1513     @type name: string
1514     @ivar name: the node name to which this object refers
1515     @ivar volumes: a structure as returned from
1516         L{ganeti.backend.GetVolumeList} (runtime)
1517     @ivar instances: a list of running instances (runtime)
1518     @ivar pinst: list of configured primary instances (config)
1519     @ivar sinst: list of configured secondary instances (config)
1520     @ivar sbp: dictionary of {primary-node: list of instances} for all
1521         instances for which this node is secondary (config)
1522     @ivar mfree: free memory, as reported by hypervisor (runtime)
1523     @ivar dfree: free disk, as reported by the node (runtime)
1524     @ivar offline: the offline status (config)
1525     @type rpc_fail: boolean
1526     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1527         not whether the individual keys were correct) (runtime)
1528     @type lvm_fail: boolean
1529     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1530     @type hyp_fail: boolean
1531     @ivar hyp_fail: whether the RPC call didn't return the instance list
1532     @type ghost: boolean
1533     @ivar ghost: whether this is a known node or not (config)
1534     @type os_fail: boolean
1535     @ivar os_fail: whether the RPC call didn't return valid OS data
1536     @type oslist: list
1537     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1538     @type vm_capable: boolean
1539     @ivar vm_capable: whether the node can host instances
1540
1541     """
1542     def __init__(self, offline=False, name=None, vm_capable=True):
1543       self.name = name
1544       self.volumes = {}
1545       self.instances = []
1546       self.pinst = []
1547       self.sinst = []
1548       self.sbp = {}
1549       self.mfree = 0
1550       self.dfree = 0
1551       self.offline = offline
1552       self.vm_capable = vm_capable
1553       self.rpc_fail = False
1554       self.lvm_fail = False
1555       self.hyp_fail = False
1556       self.ghost = False
1557       self.os_fail = False
1558       self.oslist = {}
1559
1560   def ExpandNames(self):
1561     # This raises errors.OpPrereqError on its own:
1562     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1563
1564     # Get instances in node group; this is unsafe and needs verification later
1565     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1566
1567     self.needed_locks = {
1568       locking.LEVEL_INSTANCE: inst_names,
1569       locking.LEVEL_NODEGROUP: [self.group_uuid],
1570       locking.LEVEL_NODE: [],
1571       }
1572
1573     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1574
1575   def DeclareLocks(self, level):
1576     if level == locking.LEVEL_NODE:
1577       # Get members of node group; this is unsafe and needs verification later
1578       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1579
1580       all_inst_info = self.cfg.GetAllInstancesInfo()
1581
1582       # In Exec(), we warn about mirrored instances that have primary and
1583       # secondary living in separate node groups. To fully verify that
1584       # volumes for these instances are healthy, we will need to do an
1585       # extra call to their secondaries. We ensure here those nodes will
1586       # be locked.
1587       for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1588         # Important: access only the instances whose lock is owned
1589         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1590           nodes.update(all_inst_info[inst].secondary_nodes)
1591
1592       self.needed_locks[locking.LEVEL_NODE] = nodes
1593
1594   def CheckPrereq(self):
1595     group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1596     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1597
1598     unlocked_nodes = \
1599         group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1600
1601     unlocked_instances = \
1602         group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1603
1604     if unlocked_nodes:
1605       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1606                                  utils.CommaJoin(unlocked_nodes))
1607
1608     if unlocked_instances:
1609       raise errors.OpPrereqError("Missing lock for instances: %s" %
1610                                  utils.CommaJoin(unlocked_instances))
1611
1612     self.all_node_info = self.cfg.GetAllNodesInfo()
1613     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1614
1615     self.my_node_names = utils.NiceSort(group_nodes)
1616     self.my_inst_names = utils.NiceSort(group_instances)
1617
1618     self.my_node_info = dict((name, self.all_node_info[name])
1619                              for name in self.my_node_names)
1620
1621     self.my_inst_info = dict((name, self.all_inst_info[name])
1622                              for name in self.my_inst_names)
1623
1624     # We detect here the nodes that will need the extra RPC calls for verifying
1625     # split LV volumes; they should be locked.
1626     extra_lv_nodes = set()
1627
1628     for inst in self.my_inst_info.values():
1629       if inst.disk_template in constants.DTS_INT_MIRROR:
1630         group = self.my_node_info[inst.primary_node].group
1631         for nname in inst.secondary_nodes:
1632           if self.all_node_info[nname].group != group:
1633             extra_lv_nodes.add(nname)
1634
1635     unlocked_lv_nodes = \
1636         extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1637
1638     if unlocked_lv_nodes:
1639       raise errors.OpPrereqError("these nodes could be locked: %s" %
1640                                  utils.CommaJoin(unlocked_lv_nodes))
1641     self.extra_lv_nodes = list(extra_lv_nodes)
1642
1643   def _VerifyNode(self, ninfo, nresult):
1644     """Perform some basic validation on data returned from a node.
1645
1646       - check the result data structure is well formed and has all the
1647         mandatory fields
1648       - check ganeti version
1649
1650     @type ninfo: L{objects.Node}
1651     @param ninfo: the node to check
1652     @param nresult: the results from the node
1653     @rtype: boolean
1654     @return: whether overall this call was successful (and we can expect
1655          reasonable values in the respose)
1656
1657     """
1658     node = ninfo.name
1659     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1660
1661     # main result, nresult should be a non-empty dict
1662     test = not nresult or not isinstance(nresult, dict)
1663     _ErrorIf(test, self.ENODERPC, node,
1664                   "unable to verify node: no data returned")
1665     if test:
1666       return False
1667
1668     # compares ganeti version
1669     local_version = constants.PROTOCOL_VERSION
1670     remote_version = nresult.get("version", None)
1671     test = not (remote_version and
1672                 isinstance(remote_version, (list, tuple)) and
1673                 len(remote_version) == 2)
1674     _ErrorIf(test, self.ENODERPC, node,
1675              "connection to node returned invalid data")
1676     if test:
1677       return False
1678
1679     test = local_version != remote_version[0]
1680     _ErrorIf(test, self.ENODEVERSION, node,
1681              "incompatible protocol versions: master %s,"
1682              " node %s", local_version, remote_version[0])
1683     if test:
1684       return False
1685
1686     # node seems compatible, we can actually try to look into its results
1687
1688     # full package version
1689     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1690                   self.ENODEVERSION, node,
1691                   "software version mismatch: master %s, node %s",
1692                   constants.RELEASE_VERSION, remote_version[1],
1693                   code=self.ETYPE_WARNING)
1694
1695     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1696     if ninfo.vm_capable and isinstance(hyp_result, dict):
1697       for hv_name, hv_result in hyp_result.iteritems():
1698         test = hv_result is not None
1699         _ErrorIf(test, self.ENODEHV, node,
1700                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1701
1702     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1703     if ninfo.vm_capable and isinstance(hvp_result, list):
1704       for item, hv_name, hv_result in hvp_result:
1705         _ErrorIf(True, self.ENODEHV, node,
1706                  "hypervisor %s parameter verify failure (source %s): %s",
1707                  hv_name, item, hv_result)
1708
1709     test = nresult.get(constants.NV_NODESETUP,
1710                        ["Missing NODESETUP results"])
1711     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1712              "; ".join(test))
1713
1714     return True
1715
1716   def _VerifyNodeTime(self, ninfo, nresult,
1717                       nvinfo_starttime, nvinfo_endtime):
1718     """Check the node time.
1719
1720     @type ninfo: L{objects.Node}
1721     @param ninfo: the node to check
1722     @param nresult: the remote results for the node
1723     @param nvinfo_starttime: the start time of the RPC call
1724     @param nvinfo_endtime: the end time of the RPC call
1725
1726     """
1727     node = ninfo.name
1728     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1729
1730     ntime = nresult.get(constants.NV_TIME, None)
1731     try:
1732       ntime_merged = utils.MergeTime(ntime)
1733     except (ValueError, TypeError):
1734       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1735       return
1736
1737     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1738       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1739     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1740       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1741     else:
1742       ntime_diff = None
1743
1744     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1745              "Node time diverges by at least %s from master node time",
1746              ntime_diff)
1747
1748   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1749     """Check the node LVM results.
1750
1751     @type ninfo: L{objects.Node}
1752     @param ninfo: the node to check
1753     @param nresult: the remote results for the node
1754     @param vg_name: the configured VG name
1755
1756     """
1757     if vg_name is None:
1758       return
1759
1760     node = ninfo.name
1761     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1762
1763     # checks vg existence and size > 20G
1764     vglist = nresult.get(constants.NV_VGLIST, None)
1765     test = not vglist
1766     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1767     if not test:
1768       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1769                                             constants.MIN_VG_SIZE)
1770       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1771
1772     # check pv names
1773     pvlist = nresult.get(constants.NV_PVLIST, None)
1774     test = pvlist is None
1775     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1776     if not test:
1777       # check that ':' is not present in PV names, since it's a
1778       # special character for lvcreate (denotes the range of PEs to
1779       # use on the PV)
1780       for _, pvname, owner_vg in pvlist:
1781         test = ":" in pvname
1782         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1783                  " '%s' of VG '%s'", pvname, owner_vg)
1784
1785   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1786     """Check the node bridges.
1787
1788     @type ninfo: L{objects.Node}
1789     @param ninfo: the node to check
1790     @param nresult: the remote results for the node
1791     @param bridges: the expected list of bridges
1792
1793     """
1794     if not bridges:
1795       return
1796
1797     node = ninfo.name
1798     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1799
1800     missing = nresult.get(constants.NV_BRIDGES, None)
1801     test = not isinstance(missing, list)
1802     _ErrorIf(test, self.ENODENET, node,
1803              "did not return valid bridge information")
1804     if not test:
1805       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1806                utils.CommaJoin(sorted(missing)))
1807
1808   def _VerifyNodeNetwork(self, ninfo, nresult):
1809     """Check the node network connectivity results.
1810
1811     @type ninfo: L{objects.Node}
1812     @param ninfo: the node to check
1813     @param nresult: the remote results for the node
1814
1815     """
1816     node = ninfo.name
1817     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1818
1819     test = constants.NV_NODELIST not in nresult
1820     _ErrorIf(test, self.ENODESSH, node,
1821              "node hasn't returned node ssh connectivity data")
1822     if not test:
1823       if nresult[constants.NV_NODELIST]:
1824         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1825           _ErrorIf(True, self.ENODESSH, node,
1826                    "ssh communication with node '%s': %s", a_node, a_msg)
1827
1828     test = constants.NV_NODENETTEST not in nresult
1829     _ErrorIf(test, self.ENODENET, node,
1830              "node hasn't returned node tcp connectivity data")
1831     if not test:
1832       if nresult[constants.NV_NODENETTEST]:
1833         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1834         for anode in nlist:
1835           _ErrorIf(True, self.ENODENET, node,
1836                    "tcp communication with node '%s': %s",
1837                    anode, nresult[constants.NV_NODENETTEST][anode])
1838
1839     test = constants.NV_MASTERIP not in nresult
1840     _ErrorIf(test, self.ENODENET, node,
1841              "node hasn't returned node master IP reachability data")
1842     if not test:
1843       if not nresult[constants.NV_MASTERIP]:
1844         if node == self.master_node:
1845           msg = "the master node cannot reach the master IP (not configured?)"
1846         else:
1847           msg = "cannot reach the master IP"
1848         _ErrorIf(True, self.ENODENET, node, msg)
1849
1850   def _VerifyInstance(self, instance, instanceconfig, node_image,
1851                       diskstatus):
1852     """Verify an instance.
1853
1854     This function checks to see if the required block devices are
1855     available on the instance's node.
1856
1857     """
1858     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1859     node_current = instanceconfig.primary_node
1860
1861     node_vol_should = {}
1862     instanceconfig.MapLVsByNode(node_vol_should)
1863
1864     for node in node_vol_should:
1865       n_img = node_image[node]
1866       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1867         # ignore missing volumes on offline or broken nodes
1868         continue
1869       for volume in node_vol_should[node]:
1870         test = volume not in n_img.volumes
1871         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1872                  "volume %s missing on node %s", volume, node)
1873
1874     if instanceconfig.admin_up:
1875       pri_img = node_image[node_current]
1876       test = instance not in pri_img.instances and not pri_img.offline
1877       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1878                "instance not running on its primary node %s",
1879                node_current)
1880
1881     diskdata = [(nname, success, status, idx)
1882                 for (nname, disks) in diskstatus.items()
1883                 for idx, (success, status) in enumerate(disks)]
1884
1885     for nname, success, bdev_status, idx in diskdata:
1886       # the 'ghost node' construction in Exec() ensures that we have a
1887       # node here
1888       snode = node_image[nname]
1889       bad_snode = snode.ghost or snode.offline
1890       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1891                self.EINSTANCEFAULTYDISK, instance,
1892                "couldn't retrieve status for disk/%s on %s: %s",
1893                idx, nname, bdev_status)
1894       _ErrorIf((instanceconfig.admin_up and success and
1895                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1896                self.EINSTANCEFAULTYDISK, instance,
1897                "disk/%s on %s is faulty", idx, nname)
1898
1899   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1900     """Verify if there are any unknown volumes in the cluster.
1901
1902     The .os, .swap and backup volumes are ignored. All other volumes are
1903     reported as unknown.
1904
1905     @type reserved: L{ganeti.utils.FieldSet}
1906     @param reserved: a FieldSet of reserved volume names
1907
1908     """
1909     for node, n_img in node_image.items():
1910       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1911         # skip non-healthy nodes
1912         continue
1913       for volume in n_img.volumes:
1914         test = ((node not in node_vol_should or
1915                 volume not in node_vol_should[node]) and
1916                 not reserved.Matches(volume))
1917         self._ErrorIf(test, self.ENODEORPHANLV, node,
1918                       "volume %s is unknown", volume)
1919
1920   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1921     """Verify N+1 Memory Resilience.
1922
1923     Check that if one single node dies we can still start all the
1924     instances it was primary for.
1925
1926     """
1927     cluster_info = self.cfg.GetClusterInfo()
1928     for node, n_img in node_image.items():
1929       # This code checks that every node which is now listed as
1930       # secondary has enough memory to host all instances it is
1931       # supposed to should a single other node in the cluster fail.
1932       # FIXME: not ready for failover to an arbitrary node
1933       # FIXME: does not support file-backed instances
1934       # WARNING: we currently take into account down instances as well
1935       # as up ones, considering that even if they're down someone
1936       # might want to start them even in the event of a node failure.
1937       if n_img.offline:
1938         # we're skipping offline nodes from the N+1 warning, since
1939         # most likely we don't have good memory infromation from them;
1940         # we already list instances living on such nodes, and that's
1941         # enough warning
1942         continue
1943       for prinode, instances in n_img.sbp.items():
1944         needed_mem = 0
1945         for instance in instances:
1946           bep = cluster_info.FillBE(instance_cfg[instance])
1947           if bep[constants.BE_AUTO_BALANCE]:
1948             needed_mem += bep[constants.BE_MEMORY]
1949         test = n_img.mfree < needed_mem
1950         self._ErrorIf(test, self.ENODEN1, node,
1951                       "not enough memory to accomodate instance failovers"
1952                       " should node %s fail (%dMiB needed, %dMiB available)",
1953                       prinode, needed_mem, n_img.mfree)
1954
1955   @classmethod
1956   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1957                    (files_all, files_all_opt, files_mc, files_vm)):
1958     """Verifies file checksums collected from all nodes.
1959
1960     @param errorif: Callback for reporting errors
1961     @param nodeinfo: List of L{objects.Node} objects
1962     @param master_node: Name of master node
1963     @param all_nvinfo: RPC results
1964
1965     """
1966     node_names = frozenset(node.name for node in nodeinfo)
1967
1968     assert master_node in node_names
1969     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1970             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1971            "Found file listed in more than one file list"
1972
1973     # Define functions determining which nodes to consider for a file
1974     file2nodefn = dict([(filename, fn)
1975       for (files, fn) in [(files_all, None),
1976                           (files_all_opt, None),
1977                           (files_mc, lambda node: (node.master_candidate or
1978                                                    node.name == master_node)),
1979                           (files_vm, lambda node: node.vm_capable)]
1980       for filename in files])
1981
1982     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1983
1984     for node in nodeinfo:
1985       nresult = all_nvinfo[node.name]
1986
1987       if nresult.fail_msg or not nresult.payload:
1988         node_files = None
1989       else:
1990         node_files = nresult.payload.get(constants.NV_FILELIST, None)
1991
1992       test = not (node_files and isinstance(node_files, dict))
1993       errorif(test, cls.ENODEFILECHECK, node.name,
1994               "Node did not return file checksum data")
1995       if test:
1996         continue
1997
1998       for (filename, checksum) in node_files.items():
1999         # Check if the file should be considered for a node
2000         fn = file2nodefn[filename]
2001         if fn is None or fn(node):
2002           fileinfo[filename].setdefault(checksum, set()).add(node.name)
2003
2004     for (filename, checksums) in fileinfo.items():
2005       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2006
2007       # Nodes having the file
2008       with_file = frozenset(node_name
2009                             for nodes in fileinfo[filename].values()
2010                             for node_name in nodes)
2011
2012       # Nodes missing file
2013       missing_file = node_names - with_file
2014
2015       if filename in files_all_opt:
2016         # All or no nodes
2017         errorif(missing_file and missing_file != node_names,
2018                 cls.ECLUSTERFILECHECK, None,
2019                 "File %s is optional, but it must exist on all or no nodes (not"
2020                 " found on %s)",
2021                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2022       else:
2023         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2024                 "File %s is missing from node(s) %s", filename,
2025                 utils.CommaJoin(utils.NiceSort(missing_file)))
2026
2027       # See if there are multiple versions of the file
2028       test = len(checksums) > 1
2029       if test:
2030         variants = ["variant %s on %s" %
2031                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2032                     for (idx, (checksum, nodes)) in
2033                       enumerate(sorted(checksums.items()))]
2034       else:
2035         variants = []
2036
2037       errorif(test, cls.ECLUSTERFILECHECK, None,
2038               "File %s found with %s different checksums (%s)",
2039               filename, len(checksums), "; ".join(variants))
2040
2041   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2042                       drbd_map):
2043     """Verifies and the node DRBD status.
2044
2045     @type ninfo: L{objects.Node}
2046     @param ninfo: the node to check
2047     @param nresult: the remote results for the node
2048     @param instanceinfo: the dict of instances
2049     @param drbd_helper: the configured DRBD usermode helper
2050     @param drbd_map: the DRBD map as returned by
2051         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2052
2053     """
2054     node = ninfo.name
2055     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2056
2057     if drbd_helper:
2058       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2059       test = (helper_result == None)
2060       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2061                "no drbd usermode helper returned")
2062       if helper_result:
2063         status, payload = helper_result
2064         test = not status
2065         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2066                  "drbd usermode helper check unsuccessful: %s", payload)
2067         test = status and (payload != drbd_helper)
2068         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2069                  "wrong drbd usermode helper: %s", payload)
2070
2071     # compute the DRBD minors
2072     node_drbd = {}
2073     for minor, instance in drbd_map[node].items():
2074       test = instance not in instanceinfo
2075       _ErrorIf(test, self.ECLUSTERCFG, None,
2076                "ghost instance '%s' in temporary DRBD map", instance)
2077         # ghost instance should not be running, but otherwise we
2078         # don't give double warnings (both ghost instance and
2079         # unallocated minor in use)
2080       if test:
2081         node_drbd[minor] = (instance, False)
2082       else:
2083         instance = instanceinfo[instance]
2084         node_drbd[minor] = (instance.name, instance.admin_up)
2085
2086     # and now check them
2087     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2088     test = not isinstance(used_minors, (tuple, list))
2089     _ErrorIf(test, self.ENODEDRBD, node,
2090              "cannot parse drbd status file: %s", str(used_minors))
2091     if test:
2092       # we cannot check drbd status
2093       return
2094
2095     for minor, (iname, must_exist) in node_drbd.items():
2096       test = minor not in used_minors and must_exist
2097       _ErrorIf(test, self.ENODEDRBD, node,
2098                "drbd minor %d of instance %s is not active", minor, iname)
2099     for minor in used_minors:
2100       test = minor not in node_drbd
2101       _ErrorIf(test, self.ENODEDRBD, node,
2102                "unallocated drbd minor %d is in use", minor)
2103
2104   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2105     """Builds the node OS structures.
2106
2107     @type ninfo: L{objects.Node}
2108     @param ninfo: the node to check
2109     @param nresult: the remote results for the node
2110     @param nimg: the node image object
2111
2112     """
2113     node = ninfo.name
2114     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2115
2116     remote_os = nresult.get(constants.NV_OSLIST, None)
2117     test = (not isinstance(remote_os, list) or
2118             not compat.all(isinstance(v, list) and len(v) == 7
2119                            for v in remote_os))
2120
2121     _ErrorIf(test, self.ENODEOS, node,
2122              "node hasn't returned valid OS data")
2123
2124     nimg.os_fail = test
2125
2126     if test:
2127       return
2128
2129     os_dict = {}
2130
2131     for (name, os_path, status, diagnose,
2132          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2133
2134       if name not in os_dict:
2135         os_dict[name] = []
2136
2137       # parameters is a list of lists instead of list of tuples due to
2138       # JSON lacking a real tuple type, fix it:
2139       parameters = [tuple(v) for v in parameters]
2140       os_dict[name].append((os_path, status, diagnose,
2141                             set(variants), set(parameters), set(api_ver)))
2142
2143     nimg.oslist = os_dict
2144
2145   def _VerifyNodeOS(self, ninfo, nimg, base):
2146     """Verifies the node OS list.
2147
2148     @type ninfo: L{objects.Node}
2149     @param ninfo: the node to check
2150     @param nimg: the node image object
2151     @param base: the 'template' node we match against (e.g. from the master)
2152
2153     """
2154     node = ninfo.name
2155     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2156
2157     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2158
2159     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2160     for os_name, os_data in nimg.oslist.items():
2161       assert os_data, "Empty OS status for OS %s?!" % os_name
2162       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2163       _ErrorIf(not f_status, self.ENODEOS, node,
2164                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2165       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2166                "OS '%s' has multiple entries (first one shadows the rest): %s",
2167                os_name, utils.CommaJoin([v[0] for v in os_data]))
2168       # this will catched in backend too
2169       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2170                and not f_var, self.ENODEOS, node,
2171                "OS %s with API at least %d does not declare any variant",
2172                os_name, constants.OS_API_V15)
2173       # comparisons with the 'base' image
2174       test = os_name not in base.oslist
2175       _ErrorIf(test, self.ENODEOS, node,
2176                "Extra OS %s not present on reference node (%s)",
2177                os_name, base.name)
2178       if test:
2179         continue
2180       assert base.oslist[os_name], "Base node has empty OS status?"
2181       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2182       if not b_status:
2183         # base OS is invalid, skipping
2184         continue
2185       for kind, a, b in [("API version", f_api, b_api),
2186                          ("variants list", f_var, b_var),
2187                          ("parameters", beautify_params(f_param),
2188                           beautify_params(b_param))]:
2189         _ErrorIf(a != b, self.ENODEOS, node,
2190                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2191                  kind, os_name, base.name,
2192                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2193
2194     # check any missing OSes
2195     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2196     _ErrorIf(missing, self.ENODEOS, node,
2197              "OSes present on reference node %s but missing on this node: %s",
2198              base.name, utils.CommaJoin(missing))
2199
2200   def _VerifyOob(self, ninfo, nresult):
2201     """Verifies out of band functionality of a node.
2202
2203     @type ninfo: L{objects.Node}
2204     @param ninfo: the node to check
2205     @param nresult: the remote results for the node
2206
2207     """
2208     node = ninfo.name
2209     # We just have to verify the paths on master and/or master candidates
2210     # as the oob helper is invoked on the master
2211     if ((ninfo.master_candidate or ninfo.master_capable) and
2212         constants.NV_OOB_PATHS in nresult):
2213       for path_result in nresult[constants.NV_OOB_PATHS]:
2214         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2215
2216   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2217     """Verifies and updates the node volume data.
2218
2219     This function will update a L{NodeImage}'s internal structures
2220     with data from the remote call.
2221
2222     @type ninfo: L{objects.Node}
2223     @param ninfo: the node to check
2224     @param nresult: the remote results for the node
2225     @param nimg: the node image object
2226     @param vg_name: the configured VG name
2227
2228     """
2229     node = ninfo.name
2230     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2231
2232     nimg.lvm_fail = True
2233     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2234     if vg_name is None:
2235       pass
2236     elif isinstance(lvdata, basestring):
2237       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2238                utils.SafeEncode(lvdata))
2239     elif not isinstance(lvdata, dict):
2240       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2241     else:
2242       nimg.volumes = lvdata
2243       nimg.lvm_fail = False
2244
2245   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2246     """Verifies and updates the node instance list.
2247
2248     If the listing was successful, then updates this node's instance
2249     list. Otherwise, it marks the RPC call as failed for the instance
2250     list key.
2251
2252     @type ninfo: L{objects.Node}
2253     @param ninfo: the node to check
2254     @param nresult: the remote results for the node
2255     @param nimg: the node image object
2256
2257     """
2258     idata = nresult.get(constants.NV_INSTANCELIST, None)
2259     test = not isinstance(idata, list)
2260     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2261                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2262     if test:
2263       nimg.hyp_fail = True
2264     else:
2265       nimg.instances = idata
2266
2267   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2268     """Verifies and computes a node information map
2269
2270     @type ninfo: L{objects.Node}
2271     @param ninfo: the node to check
2272     @param nresult: the remote results for the node
2273     @param nimg: the node image object
2274     @param vg_name: the configured VG name
2275
2276     """
2277     node = ninfo.name
2278     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2279
2280     # try to read free memory (from the hypervisor)
2281     hv_info = nresult.get(constants.NV_HVINFO, None)
2282     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2283     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2284     if not test:
2285       try:
2286         nimg.mfree = int(hv_info["memory_free"])
2287       except (ValueError, TypeError):
2288         _ErrorIf(True, self.ENODERPC, node,
2289                  "node returned invalid nodeinfo, check hypervisor")
2290
2291     # FIXME: devise a free space model for file based instances as well
2292     if vg_name is not None:
2293       test = (constants.NV_VGLIST not in nresult or
2294               vg_name not in nresult[constants.NV_VGLIST])
2295       _ErrorIf(test, self.ENODELVM, node,
2296                "node didn't return data for the volume group '%s'"
2297                " - it is either missing or broken", vg_name)
2298       if not test:
2299         try:
2300           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2301         except (ValueError, TypeError):
2302           _ErrorIf(True, self.ENODERPC, node,
2303                    "node returned invalid LVM info, check LVM status")
2304
2305   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2306     """Gets per-disk status information for all instances.
2307
2308     @type nodelist: list of strings
2309     @param nodelist: Node names
2310     @type node_image: dict of (name, L{objects.Node})
2311     @param node_image: Node objects
2312     @type instanceinfo: dict of (name, L{objects.Instance})
2313     @param instanceinfo: Instance objects
2314     @rtype: {instance: {node: [(succes, payload)]}}
2315     @return: a dictionary of per-instance dictionaries with nodes as
2316         keys and disk information as values; the disk information is a
2317         list of tuples (success, payload)
2318
2319     """
2320     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2321
2322     node_disks = {}
2323     node_disks_devonly = {}
2324     diskless_instances = set()
2325     diskless = constants.DT_DISKLESS
2326
2327     for nname in nodelist:
2328       node_instances = list(itertools.chain(node_image[nname].pinst,
2329                                             node_image[nname].sinst))
2330       diskless_instances.update(inst for inst in node_instances
2331                                 if instanceinfo[inst].disk_template == diskless)
2332       disks = [(inst, disk)
2333                for inst in node_instances
2334                for disk in instanceinfo[inst].disks]
2335
2336       if not disks:
2337         # No need to collect data
2338         continue
2339
2340       node_disks[nname] = disks
2341
2342       # Creating copies as SetDiskID below will modify the objects and that can
2343       # lead to incorrect data returned from nodes
2344       devonly = [dev.Copy() for (_, dev) in disks]
2345
2346       for dev in devonly:
2347         self.cfg.SetDiskID(dev, nname)
2348
2349       node_disks_devonly[nname] = devonly
2350
2351     assert len(node_disks) == len(node_disks_devonly)
2352
2353     # Collect data from all nodes with disks
2354     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2355                                                           node_disks_devonly)
2356
2357     assert len(result) == len(node_disks)
2358
2359     instdisk = {}
2360
2361     for (nname, nres) in result.items():
2362       disks = node_disks[nname]
2363
2364       if nres.offline:
2365         # No data from this node
2366         data = len(disks) * [(False, "node offline")]
2367       else:
2368         msg = nres.fail_msg
2369         _ErrorIf(msg, self.ENODERPC, nname,
2370                  "while getting disk information: %s", msg)
2371         if msg:
2372           # No data from this node
2373           data = len(disks) * [(False, msg)]
2374         else:
2375           data = []
2376           for idx, i in enumerate(nres.payload):
2377             if isinstance(i, (tuple, list)) and len(i) == 2:
2378               data.append(i)
2379             else:
2380               logging.warning("Invalid result from node %s, entry %d: %s",
2381                               nname, idx, i)
2382               data.append((False, "Invalid result from the remote node"))
2383
2384       for ((inst, _), status) in zip(disks, data):
2385         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2386
2387     # Add empty entries for diskless instances.
2388     for inst in diskless_instances:
2389       assert inst not in instdisk
2390       instdisk[inst] = {}
2391
2392     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2393                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2394                       compat.all(isinstance(s, (tuple, list)) and
2395                                  len(s) == 2 for s in statuses)
2396                       for inst, nnames in instdisk.items()
2397                       for nname, statuses in nnames.items())
2398     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2399
2400     return instdisk
2401
2402   def BuildHooksEnv(self):
2403     """Build hooks env.
2404
2405     Cluster-Verify hooks just ran in the post phase and their failure makes
2406     the output be logged in the verify output and the verification to fail.
2407
2408     """
2409     env = {
2410       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2411       }
2412
2413     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2414                for node in self.my_node_info.values())
2415
2416     return env
2417
2418   def BuildHooksNodes(self):
2419     """Build hooks nodes.
2420
2421     """
2422     assert self.my_node_names, ("Node list not gathered,"
2423       " has CheckPrereq been executed?")
2424     return ([], self.my_node_names)
2425
2426   def Exec(self, feedback_fn):
2427     """Verify integrity of the node group, performing various test on nodes.
2428
2429     """
2430     # This method has too many local variables. pylint: disable-msg=R0914
2431     self.bad = False
2432     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2433     verbose = self.op.verbose
2434     self._feedback_fn = feedback_fn
2435
2436     vg_name = self.cfg.GetVGName()
2437     drbd_helper = self.cfg.GetDRBDHelper()
2438     cluster = self.cfg.GetClusterInfo()
2439     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2440     hypervisors = cluster.enabled_hypervisors
2441     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2442
2443     i_non_redundant = [] # Non redundant instances
2444     i_non_a_balanced = [] # Non auto-balanced instances
2445     n_offline = 0 # Count of offline nodes
2446     n_drained = 0 # Count of nodes being drained
2447     node_vol_should = {}
2448
2449     # FIXME: verify OS list
2450
2451     # File verification
2452     filemap = _ComputeAncillaryFiles(cluster, False)
2453
2454     # do local checksums
2455     master_node = self.master_node = self.cfg.GetMasterNode()
2456     master_ip = self.cfg.GetMasterIP()
2457
2458     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2459
2460     # We will make nodes contact all nodes in their group, and one node from
2461     # every other group.
2462     # TODO: should it be a *random* node, different every time?
2463     online_nodes = [node.name for node in node_data_list if not node.offline]
2464     other_group_nodes = {}
2465
2466     for name in sorted(self.all_node_info):
2467       node = self.all_node_info[name]
2468       if (node.group not in other_group_nodes
2469           and node.group != self.group_uuid
2470           and not node.offline):
2471         other_group_nodes[node.group] = node.name
2472
2473     node_verify_param = {
2474       constants.NV_FILELIST:
2475         utils.UniqueSequence(filename
2476                              for files in filemap
2477                              for filename in files),
2478       constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2479       constants.NV_HYPERVISOR: hypervisors,
2480       constants.NV_HVPARAMS:
2481         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2482       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2483                                  for node in node_data_list
2484                                  if not node.offline],
2485       constants.NV_INSTANCELIST: hypervisors,
2486       constants.NV_VERSION: None,
2487       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2488       constants.NV_NODESETUP: None,
2489       constants.NV_TIME: None,
2490       constants.NV_MASTERIP: (master_node, master_ip),
2491       constants.NV_OSLIST: None,
2492       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2493       }
2494
2495     if vg_name is not None:
2496       node_verify_param[constants.NV_VGLIST] = None
2497       node_verify_param[constants.NV_LVLIST] = vg_name
2498       node_verify_param[constants.NV_PVLIST] = [vg_name]
2499       node_verify_param[constants.NV_DRBDLIST] = None
2500
2501     if drbd_helper:
2502       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2503
2504     # bridge checks
2505     # FIXME: this needs to be changed per node-group, not cluster-wide
2506     bridges = set()
2507     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2508     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2509       bridges.add(default_nicpp[constants.NIC_LINK])
2510     for instance in self.my_inst_info.values():
2511       for nic in instance.nics:
2512         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2513         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2514           bridges.add(full_nic[constants.NIC_LINK])
2515
2516     if bridges:
2517       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2518
2519     # Build our expected cluster state
2520     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2521                                                  name=node.name,
2522                                                  vm_capable=node.vm_capable))
2523                       for node in node_data_list)
2524
2525     # Gather OOB paths
2526     oob_paths = []
2527     for node in self.all_node_info.values():
2528       path = _SupportsOob(self.cfg, node)
2529       if path and path not in oob_paths:
2530         oob_paths.append(path)
2531
2532     if oob_paths:
2533       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2534
2535     for instance in self.my_inst_names:
2536       inst_config = self.my_inst_info[instance]
2537
2538       for nname in inst_config.all_nodes:
2539         if nname not in node_image:
2540           gnode = self.NodeImage(name=nname)
2541           gnode.ghost = (nname not in self.all_node_info)
2542           node_image[nname] = gnode
2543
2544       inst_config.MapLVsByNode(node_vol_should)
2545
2546       pnode = inst_config.primary_node
2547       node_image[pnode].pinst.append(instance)
2548
2549       for snode in inst_config.secondary_nodes:
2550         nimg = node_image[snode]
2551         nimg.sinst.append(instance)
2552         if pnode not in nimg.sbp:
2553           nimg.sbp[pnode] = []
2554         nimg.sbp[pnode].append(instance)
2555
2556     # At this point, we have the in-memory data structures complete,
2557     # except for the runtime information, which we'll gather next
2558
2559     # Due to the way our RPC system works, exact response times cannot be
2560     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2561     # time before and after executing the request, we can at least have a time
2562     # window.
2563     nvinfo_starttime = time.time()
2564     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2565                                            node_verify_param,
2566                                            self.cfg.GetClusterName())
2567     nvinfo_endtime = time.time()
2568
2569     if self.extra_lv_nodes and vg_name is not None:
2570       extra_lv_nvinfo = \
2571           self.rpc.call_node_verify(self.extra_lv_nodes,
2572                                     {constants.NV_LVLIST: vg_name},
2573                                     self.cfg.GetClusterName())
2574     else:
2575       extra_lv_nvinfo = {}
2576
2577     all_drbd_map = self.cfg.ComputeDRBDMap()
2578
2579     feedback_fn("* Gathering disk information (%s nodes)" %
2580                 len(self.my_node_names))
2581     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2582                                      self.my_inst_info)
2583
2584     feedback_fn("* Verifying configuration file consistency")
2585
2586     # If not all nodes are being checked, we need to make sure the master node
2587     # and a non-checked vm_capable node are in the list.
2588     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2589     if absent_nodes:
2590       vf_nvinfo = all_nvinfo.copy()
2591       vf_node_info = list(self.my_node_info.values())
2592       additional_nodes = []
2593       if master_node not in self.my_node_info:
2594         additional_nodes.append(master_node)
2595         vf_node_info.append(self.all_node_info[master_node])
2596       # Add the first vm_capable node we find which is not included
2597       for node in absent_nodes:
2598         nodeinfo = self.all_node_info[node]
2599         if nodeinfo.vm_capable and not nodeinfo.offline:
2600           additional_nodes.append(node)
2601           vf_node_info.append(self.all_node_info[node])
2602           break
2603       key = constants.NV_FILELIST
2604       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2605                                                  {key: node_verify_param[key]},
2606                                                  self.cfg.GetClusterName()))
2607     else:
2608       vf_nvinfo = all_nvinfo
2609       vf_node_info = self.my_node_info.values()
2610
2611     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2612
2613     feedback_fn("* Verifying node status")
2614
2615     refos_img = None
2616
2617     for node_i in node_data_list:
2618       node = node_i.name
2619       nimg = node_image[node]
2620
2621       if node_i.offline:
2622         if verbose:
2623           feedback_fn("* Skipping offline node %s" % (node,))
2624         n_offline += 1
2625         continue
2626
2627       if node == master_node:
2628         ntype = "master"
2629       elif node_i.master_candidate:
2630         ntype = "master candidate"
2631       elif node_i.drained:
2632         ntype = "drained"
2633         n_drained += 1
2634       else:
2635         ntype = "regular"
2636       if verbose:
2637         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2638
2639       msg = all_nvinfo[node].fail_msg
2640       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2641       if msg:
2642         nimg.rpc_fail = True
2643         continue
2644
2645       nresult = all_nvinfo[node].payload
2646
2647       nimg.call_ok = self._VerifyNode(node_i, nresult)
2648       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2649       self._VerifyNodeNetwork(node_i, nresult)
2650       self._VerifyOob(node_i, nresult)
2651
2652       if nimg.vm_capable:
2653         self._VerifyNodeLVM(node_i, nresult, vg_name)
2654         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2655                              all_drbd_map)
2656
2657         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2658         self._UpdateNodeInstances(node_i, nresult, nimg)
2659         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2660         self._UpdateNodeOS(node_i, nresult, nimg)
2661
2662         if not nimg.os_fail:
2663           if refos_img is None:
2664             refos_img = nimg
2665           self._VerifyNodeOS(node_i, nimg, refos_img)
2666         self._VerifyNodeBridges(node_i, nresult, bridges)
2667
2668         # Check whether all running instancies are primary for the node. (This
2669         # can no longer be done from _VerifyInstance below, since some of the
2670         # wrong instances could be from other node groups.)
2671         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2672
2673         for inst in non_primary_inst:
2674           test = inst in self.all_inst_info
2675           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2676                    "instance should not run on node %s", node_i.name)
2677           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2678                    "node is running unknown instance %s", inst)
2679
2680     for node, result in extra_lv_nvinfo.items():
2681       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2682                               node_image[node], vg_name)
2683
2684     feedback_fn("* Verifying instance status")
2685     for instance in self.my_inst_names:
2686       if verbose:
2687         feedback_fn("* Verifying instance %s" % instance)
2688       inst_config = self.my_inst_info[instance]
2689       self._VerifyInstance(instance, inst_config, node_image,
2690                            instdisk[instance])
2691       inst_nodes_offline = []
2692
2693       pnode = inst_config.primary_node
2694       pnode_img = node_image[pnode]
2695       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2696                self.ENODERPC, pnode, "instance %s, connection to"
2697                " primary node failed", instance)
2698
2699       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2700                self.EINSTANCEBADNODE, instance,
2701                "instance is marked as running and lives on offline node %s",
2702                inst_config.primary_node)
2703
2704       # If the instance is non-redundant we cannot survive losing its primary
2705       # node, so we are not N+1 compliant. On the other hand we have no disk
2706       # templates with more than one secondary so that situation is not well
2707       # supported either.
2708       # FIXME: does not support file-backed instances
2709       if not inst_config.secondary_nodes:
2710         i_non_redundant.append(instance)
2711
2712       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2713                instance, "instance has multiple secondary nodes: %s",
2714                utils.CommaJoin(inst_config.secondary_nodes),
2715                code=self.ETYPE_WARNING)
2716
2717       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2718         pnode = inst_config.primary_node
2719         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2720         instance_groups = {}
2721
2722         for node in instance_nodes:
2723           instance_groups.setdefault(self.all_node_info[node].group,
2724                                      []).append(node)
2725
2726         pretty_list = [
2727           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2728           # Sort so that we always list the primary node first.
2729           for group, nodes in sorted(instance_groups.items(),
2730                                      key=lambda (_, nodes): pnode in nodes,
2731                                      reverse=True)]
2732
2733         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2734                       instance, "instance has primary and secondary nodes in"
2735                       " different groups: %s", utils.CommaJoin(pretty_list),
2736                       code=self.ETYPE_WARNING)
2737
2738       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2739         i_non_a_balanced.append(instance)
2740
2741       for snode in inst_config.secondary_nodes:
2742         s_img = node_image[snode]
2743         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2744                  "instance %s, connection to secondary node failed", instance)
2745
2746         if s_img.offline:
2747           inst_nodes_offline.append(snode)
2748
2749       # warn that the instance lives on offline nodes
2750       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2751                "instance has offline secondary node(s) %s",
2752                utils.CommaJoin(inst_nodes_offline))
2753       # ... or ghost/non-vm_capable nodes
2754       for node in inst_config.all_nodes:
2755         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2756                  "instance lives on ghost node %s", node)
2757         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2758                  instance, "instance lives on non-vm_capable node %s", node)
2759
2760     feedback_fn("* Verifying orphan volumes")
2761     reserved = utils.FieldSet(*cluster.reserved_lvs)
2762
2763     # We will get spurious "unknown volume" warnings if any node of this group
2764     # is secondary for an instance whose primary is in another group. To avoid
2765     # them, we find these instances and add their volumes to node_vol_should.
2766     for inst in self.all_inst_info.values():
2767       for secondary in inst.secondary_nodes:
2768         if (secondary in self.my_node_info
2769             and inst.name not in self.my_inst_info):
2770           inst.MapLVsByNode(node_vol_should)
2771           break
2772
2773     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2774
2775     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2776       feedback_fn("* Verifying N+1 Memory redundancy")
2777       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2778
2779     feedback_fn("* Other Notes")
2780     if i_non_redundant:
2781       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2782                   % len(i_non_redundant))
2783
2784     if i_non_a_balanced:
2785       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2786                   % len(i_non_a_balanced))
2787
2788     if n_offline:
2789       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2790
2791     if n_drained:
2792       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2793
2794     return not self.bad
2795
2796   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2797     """Analyze the post-hooks' result
2798
2799     This method analyses the hook result, handles it, and sends some
2800     nicely-formatted feedback back to the user.
2801
2802     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2803         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2804     @param hooks_results: the results of the multi-node hooks rpc call
2805     @param feedback_fn: function used send feedback back to the caller
2806     @param lu_result: previous Exec result
2807     @return: the new Exec result, based on the previous result
2808         and hook results
2809
2810     """
2811     # We only really run POST phase hooks, and are only interested in
2812     # their results
2813     if phase == constants.HOOKS_PHASE_POST:
2814       # Used to change hooks' output to proper indentation
2815       feedback_fn("* Hooks Results")
2816       assert hooks_results, "invalid result from hooks"
2817
2818       for node_name in hooks_results:
2819         res = hooks_results[node_name]
2820         msg = res.fail_msg
2821         test = msg and not res.offline
2822         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2823                       "Communication failure in hooks execution: %s", msg)
2824         if res.offline or msg:
2825           # No need to investigate payload if node is offline or gave an error.
2826           # override manually lu_result here as _ErrorIf only
2827           # overrides self.bad
2828           lu_result = 1
2829           continue
2830         for script, hkr, output in res.payload:
2831           test = hkr == constants.HKR_FAIL
2832           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2833                         "Script %s failed, output:", script)
2834           if test:
2835             output = self._HOOKS_INDENT_RE.sub('      ', output)
2836             feedback_fn("%s" % output)
2837             lu_result = 0
2838
2839       return lu_result
2840
2841
2842 class LUClusterVerifyDisks(NoHooksLU):
2843   """Verifies the cluster disks status.
2844
2845   """
2846   REQ_BGL = False
2847
2848   def ExpandNames(self):
2849     self.needed_locks = {
2850       locking.LEVEL_NODE: locking.ALL_SET,
2851       locking.LEVEL_INSTANCE: locking.ALL_SET,
2852     }
2853     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2854
2855   def Exec(self, feedback_fn):
2856     """Verify integrity of cluster disks.
2857
2858     @rtype: tuple of three items
2859     @return: a tuple of (dict of node-to-node_error, list of instances
2860         which need activate-disks, dict of instance: (node, volume) for
2861         missing volumes
2862
2863     """
2864     result = res_nodes, res_instances, res_missing = {}, [], {}
2865
2866     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2867     instances = self.cfg.GetAllInstancesInfo().values()
2868
2869     nv_dict = {}
2870     for inst in instances:
2871       inst_lvs = {}
2872       if not inst.admin_up:
2873         continue
2874       inst.MapLVsByNode(inst_lvs)
2875       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2876       for node, vol_list in inst_lvs.iteritems():
2877         for vol in vol_list:
2878           nv_dict[(node, vol)] = inst
2879
2880     if not nv_dict:
2881       return result
2882
2883     node_lvs = self.rpc.call_lv_list(nodes, [])
2884     for node, node_res in node_lvs.items():
2885       if node_res.offline:
2886         continue
2887       msg = node_res.fail_msg
2888       if msg:
2889         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2890         res_nodes[node] = msg
2891         continue
2892
2893       lvs = node_res.payload
2894       for lv_name, (_, _, lv_online) in lvs.items():
2895         inst = nv_dict.pop((node, lv_name), None)
2896         if (not lv_online and inst is not None
2897             and inst.name not in res_instances):
2898           res_instances.append(inst.name)
2899
2900     # any leftover items in nv_dict are missing LVs, let's arrange the
2901     # data better
2902     for key, inst in nv_dict.iteritems():
2903       if inst.name not in res_missing:
2904         res_missing[inst.name] = []
2905       res_missing[inst.name].append(key)
2906
2907     return result
2908
2909
2910 class LUClusterRepairDiskSizes(NoHooksLU):
2911   """Verifies the cluster disks sizes.
2912
2913   """
2914   REQ_BGL = False
2915
2916   def ExpandNames(self):
2917     if self.op.instances:
2918       self.wanted_names = _GetWantedInstances(self, self.op.instances)
2919       self.needed_locks = {
2920         locking.LEVEL_NODE: [],
2921         locking.LEVEL_INSTANCE: self.wanted_names,
2922         }
2923       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2924     else:
2925       self.wanted_names = None
2926       self.needed_locks = {
2927         locking.LEVEL_NODE: locking.ALL_SET,
2928         locking.LEVEL_INSTANCE: locking.ALL_SET,
2929         }
2930     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2931
2932   def DeclareLocks(self, level):
2933     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2934       self._LockInstancesNodes(primary_only=True)
2935
2936   def CheckPrereq(self):
2937     """Check prerequisites.
2938
2939     This only checks the optional instance list against the existing names.
2940
2941     """
2942     if self.wanted_names is None:
2943       self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2944
2945     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2946                              in self.wanted_names]
2947
2948   def _EnsureChildSizes(self, disk):
2949     """Ensure children of the disk have the needed disk size.
2950
2951     This is valid mainly for DRBD8 and fixes an issue where the
2952     children have smaller disk size.
2953
2954     @param disk: an L{ganeti.objects.Disk} object
2955
2956     """
2957     if disk.dev_type == constants.LD_DRBD8:
2958       assert disk.children, "Empty children for DRBD8?"
2959       fchild = disk.children[0]
2960       mismatch = fchild.size < disk.size
2961       if mismatch:
2962         self.LogInfo("Child disk has size %d, parent %d, fixing",
2963                      fchild.size, disk.size)
2964         fchild.size = disk.size
2965
2966       # and we recurse on this child only, not on the metadev
2967       return self._EnsureChildSizes(fchild) or mismatch
2968     else:
2969       return False
2970
2971   def Exec(self, feedback_fn):
2972     """Verify the size of cluster disks.
2973
2974     """
2975     # TODO: check child disks too
2976     # TODO: check differences in size between primary/secondary nodes
2977     per_node_disks = {}
2978     for instance in self.wanted_instances:
2979       pnode = instance.primary_node
2980       if pnode not in per_node_disks:
2981         per_node_disks[pnode] = []
2982       for idx, disk in enumerate(instance.disks):
2983         per_node_disks[pnode].append((instance, idx, disk))
2984
2985     changed = []
2986     for node, dskl in per_node_disks.items():
2987       newl = [v[2].Copy() for v in dskl]
2988       for dsk in newl:
2989         self.cfg.SetDiskID(dsk, node)
2990       result = self.rpc.call_blockdev_getsize(node, newl)
2991       if result.fail_msg:
2992         self.LogWarning("Failure in blockdev_getsize call to node"
2993                         " %s, ignoring", node)
2994         continue
2995       if len(result.payload) != len(dskl):
2996         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2997                         " result.payload=%s", node, len(dskl), result.payload)
2998         self.LogWarning("Invalid result from node %s, ignoring node results",
2999                         node)
3000         continue
3001       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3002         if size is None:
3003           self.LogWarning("Disk %d of instance %s did not return size"
3004                           " information, ignoring", idx, instance.name)
3005           continue
3006         if not isinstance(size, (int, long)):
3007           self.LogWarning("Disk %d of instance %s did not return valid"
3008                           " size information, ignoring", idx, instance.name)
3009           continue
3010         size = size >> 20
3011         if size != disk.size:
3012           self.LogInfo("Disk %d of instance %s has mismatched size,"
3013                        " correcting: recorded %d, actual %d", idx,
3014                        instance.name, disk.size, size)
3015           disk.size = size
3016           self.cfg.Update(instance, feedback_fn)
3017           changed.append((instance.name, idx, size))
3018         if self._EnsureChildSizes(disk):
3019           self.cfg.Update(instance, feedback_fn)
3020           changed.append((instance.name, idx, disk.size))
3021     return changed
3022
3023
3024 class LUClusterRename(LogicalUnit):
3025   """Rename the cluster.
3026
3027   """
3028   HPATH = "cluster-rename"
3029   HTYPE = constants.HTYPE_CLUSTER
3030
3031   def BuildHooksEnv(self):
3032     """Build hooks env.
3033
3034     """
3035     return {
3036       "OP_TARGET": self.cfg.GetClusterName(),
3037       "NEW_NAME": self.op.name,
3038       }
3039
3040   def BuildHooksNodes(self):
3041     """Build hooks nodes.
3042
3043     """
3044     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3045
3046   def CheckPrereq(self):
3047     """Verify that the passed name is a valid one.
3048
3049     """
3050     hostname = netutils.GetHostname(name=self.op.name,
3051                                     family=self.cfg.GetPrimaryIPFamily())
3052
3053     new_name = hostname.name
3054     self.ip = new_ip = hostname.ip
3055     old_name = self.cfg.GetClusterName()
3056     old_ip = self.cfg.GetMasterIP()
3057     if new_name == old_name and new_ip == old_ip:
3058       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3059                                  " cluster has changed",
3060                                  errors.ECODE_INVAL)
3061     if new_ip != old_ip:
3062       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3063         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3064                                    " reachable on the network" %
3065                                    new_ip, errors.ECODE_NOTUNIQUE)
3066
3067     self.op.name = new_name
3068
3069   def Exec(self, feedback_fn):
3070     """Rename the cluster.
3071
3072     """
3073     clustername = self.op.name
3074     ip = self.ip
3075
3076     # shutdown the master IP
3077     master = self.cfg.GetMasterNode()
3078     result = self.rpc.call_node_stop_master(master, False)
3079     result.Raise("Could not disable the master role")
3080
3081     try:
3082       cluster = self.cfg.GetClusterInfo()
3083       cluster.cluster_name = clustername
3084       cluster.master_ip = ip
3085       self.cfg.Update(cluster, feedback_fn)
3086
3087       # update the known hosts file
3088       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3089       node_list = self.cfg.GetOnlineNodeList()
3090       try:
3091         node_list.remove(master)
3092       except ValueError:
3093         pass
3094       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3095     finally:
3096       result = self.rpc.call_node_start_master(master, False, False)
3097       msg = result.fail_msg
3098       if msg:
3099         self.LogWarning("Could not re-enable the master role on"
3100                         " the master, please restart manually: %s", msg)
3101
3102     return clustername
3103
3104
3105 class LUClusterSetParams(LogicalUnit):
3106   """Change the parameters of the cluster.
3107
3108   """
3109   HPATH = "cluster-modify"
3110   HTYPE = constants.HTYPE_CLUSTER
3111   REQ_BGL = False
3112
3113   def CheckArguments(self):
3114     """Check parameters
3115
3116     """
3117     if self.op.uid_pool:
3118       uidpool.CheckUidPool(self.op.uid_pool)
3119
3120     if self.op.add_uids:
3121       uidpool.CheckUidPool(self.op.add_uids)
3122
3123     if self.op.remove_uids:
3124       uidpool.CheckUidPool(self.op.remove_uids)
3125
3126   def ExpandNames(self):
3127     # FIXME: in the future maybe other cluster params won't require checking on
3128     # all nodes to be modified.
3129     self.needed_locks = {
3130       locking.LEVEL_NODE: locking.ALL_SET,
3131     }
3132     self.share_locks[locking.LEVEL_NODE] = 1
3133
3134   def BuildHooksEnv(self):
3135     """Build hooks env.
3136
3137     """
3138     return {
3139       "OP_TARGET": self.cfg.GetClusterName(),
3140       "NEW_VG_NAME": self.op.vg_name,
3141       }
3142
3143   def BuildHooksNodes(self):
3144     """Build hooks nodes.
3145
3146     """
3147     mn = self.cfg.GetMasterNode()
3148     return ([mn], [mn])
3149
3150   def CheckPrereq(self):
3151     """Check prerequisites.
3152
3153     This checks whether the given params don't conflict and
3154     if the given volume group is valid.
3155
3156     """
3157     if self.op.vg_name is not None and not self.op.vg_name:
3158       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3159         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3160                                    " instances exist", errors.ECODE_INVAL)
3161
3162     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3163       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3164         raise errors.OpPrereqError("Cannot disable drbd helper while"
3165                                    " drbd-based instances exist",
3166                                    errors.ECODE_INVAL)
3167
3168     node_list = self.glm.list_owned(locking.LEVEL_NODE)
3169
3170     # if vg_name not None, checks given volume group on all nodes
3171     if self.op.vg_name:
3172       vglist = self.rpc.call_vg_list(node_list)
3173       for node in node_list:
3174         msg = vglist[node].fail_msg
3175         if msg:
3176           # ignoring down node
3177           self.LogWarning("Error while gathering data on node %s"
3178                           " (ignoring node): %s", node, msg)
3179           continue
3180         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3181                                               self.op.vg_name,
3182                                               constants.MIN_VG_SIZE)
3183         if vgstatus:
3184           raise errors.OpPrereqError("Error on node '%s': %s" %
3185                                      (node, vgstatus), errors.ECODE_ENVIRON)
3186
3187     if self.op.drbd_helper:
3188       # checks given drbd helper on all nodes
3189       helpers = self.rpc.call_drbd_helper(node_list)
3190       for node in node_list:
3191         ninfo = self.cfg.GetNodeInfo(node)
3192         if ninfo.offline:
3193           self.LogInfo("Not checking drbd helper on offline node %s", node)
3194           continue
3195         msg = helpers[node].fail_msg
3196         if msg:
3197           raise errors.OpPrereqError("Error checking drbd helper on node"
3198                                      " '%s': %s" % (node, msg),
3199                                      errors.ECODE_ENVIRON)
3200         node_helper = helpers[node].payload
3201         if node_helper != self.op.drbd_helper:
3202           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3203                                      (node, node_helper), errors.ECODE_ENVIRON)
3204
3205     self.cluster = cluster = self.cfg.GetClusterInfo()
3206     # validate params changes
3207     if self.op.beparams:
3208       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3209       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3210
3211     if self.op.ndparams:
3212       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3213       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3214
3215       # TODO: we need a more general way to handle resetting
3216       # cluster-level parameters to default values
3217       if self.new_ndparams["oob_program"] == "":
3218         self.new_ndparams["oob_program"] = \
3219             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3220
3221     if self.op.nicparams:
3222       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3223       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3224       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3225       nic_errors = []
3226
3227       # check all instances for consistency
3228       for instance in self.cfg.GetAllInstancesInfo().values():
3229         for nic_idx, nic in enumerate(instance.nics):
3230           params_copy = copy.deepcopy(nic.nicparams)
3231           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3232
3233           # check parameter syntax
3234           try:
3235             objects.NIC.CheckParameterSyntax(params_filled)
3236           except errors.ConfigurationError, err:
3237             nic_errors.append("Instance %s, nic/%d: %s" %
3238                               (instance.name, nic_idx, err))
3239
3240           # if we're moving instances to routed, check that they have an ip
3241           target_mode = params_filled[constants.NIC_MODE]
3242           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3243             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3244                               " address" % (instance.name, nic_idx))
3245       if nic_errors:
3246         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3247                                    "\n".join(nic_errors))
3248
3249     # hypervisor list/parameters
3250     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3251     if self.op.hvparams:
3252       for hv_name, hv_dict in self.op.hvparams.items():
3253         if hv_name not in self.new_hvparams:
3254           self.new_hvparams[hv_name] = hv_dict
3255         else:
3256           self.new_hvparams[hv_name].update(hv_dict)
3257
3258     # os hypervisor parameters
3259     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3260     if self.op.os_hvp:
3261       for os_name, hvs in self.op.os_hvp.items():
3262         if os_name not in self.new_os_hvp:
3263           self.new_os_hvp[os_name] = hvs
3264         else:
3265           for hv_name, hv_dict in hvs.items():
3266             if hv_name not in self.new_os_hvp[os_name]:
3267               self.new_os_hvp[os_name][hv_name] = hv_dict
3268             else:
3269               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3270
3271     # os parameters
3272     self.new_osp = objects.FillDict(cluster.osparams, {})
3273     if self.op.osparams:
3274       for os_name, osp in self.op.osparams.items():
3275         if os_name not in self.new_osp:
3276           self.new_osp[os_name] = {}
3277
3278         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3279                                                   use_none=True)
3280
3281         if not self.new_osp[os_name]:
3282           # we removed all parameters
3283           del self.new_osp[os_name]
3284         else:
3285           # check the parameter validity (remote check)
3286           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3287                          os_name, self.new_osp[os_name])
3288
3289     # changes to the hypervisor list
3290     if self.op.enabled_hypervisors is not None:
3291       self.hv_list = self.op.enabled_hypervisors
3292       for hv in self.hv_list:
3293         # if the hypervisor doesn't already exist in the cluster
3294         # hvparams, we initialize it to empty, and then (in both
3295         # cases) we make sure to fill the defaults, as we might not
3296         # have a complete defaults list if the hypervisor wasn't
3297         # enabled before
3298         if hv not in new_hvp:
3299           new_hvp[hv] = {}
3300         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3301         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3302     else:
3303       self.hv_list = cluster.enabled_hypervisors
3304
3305     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3306       # either the enabled list has changed, or the parameters have, validate
3307       for hv_name, hv_params in self.new_hvparams.items():
3308         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3309             (self.op.enabled_hypervisors and
3310              hv_name in self.op.enabled_hypervisors)):
3311           # either this is a new hypervisor, or its parameters have changed
3312           hv_class = hypervisor.GetHypervisor(hv_name)
3313           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3314           hv_class.CheckParameterSyntax(hv_params)
3315           _CheckHVParams(self, node_list, hv_name, hv_params)
3316
3317     if self.op.os_hvp:
3318       # no need to check any newly-enabled hypervisors, since the
3319       # defaults have already been checked in the above code-block
3320       for os_name, os_hvp in self.new_os_hvp.items():
3321         for hv_name, hv_params in os_hvp.items():
3322           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3323           # we need to fill in the new os_hvp on top of the actual hv_p
3324           cluster_defaults = self.new_hvparams.get(hv_name, {})
3325           new_osp = objects.FillDict(cluster_defaults, hv_params)
3326           hv_class = hypervisor.GetHypervisor(hv_name)
3327           hv_class.CheckParameterSyntax(new_osp)
3328           _CheckHVParams(self, node_list, hv_name, new_osp)
3329
3330     if self.op.default_iallocator:
3331       alloc_script = utils.FindFile(self.op.default_iallocator,
3332                                     constants.IALLOCATOR_SEARCH_PATH,
3333                                     os.path.isfile)
3334       if alloc_script is None:
3335         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3336                                    " specified" % self.op.default_iallocator,
3337                                    errors.ECODE_INVAL)
3338
3339   def Exec(self, feedback_fn):
3340     """Change the parameters of the cluster.
3341
3342     """
3343     if self.op.vg_name is not None:
3344       new_volume = self.op.vg_name
3345       if not new_volume:
3346         new_volume = None
3347       if new_volume != self.cfg.GetVGName():
3348         self.cfg.SetVGName(new_volume)
3349       else:
3350         feedback_fn("Cluster LVM configuration already in desired"
3351                     " state, not changing")
3352     if self.op.drbd_helper is not None:
3353       new_helper = self.op.drbd_helper
3354       if not new_helper:
3355         new_helper = None
3356       if new_helper != self.cfg.GetDRBDHelper():
3357         self.cfg.SetDRBDHelper(new_helper)
3358       else:
3359         feedback_fn("Cluster DRBD helper already in desired state,"
3360                     " not changing")
3361     if self.op.hvparams:
3362       self.cluster.hvparams = self.new_hvparams
3363     if self.op.os_hvp:
3364       self.cluster.os_hvp = self.new_os_hvp
3365     if self.op.enabled_hypervisors is not None:
3366       self.cluster.hvparams = self.new_hvparams
3367       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3368     if self.op.beparams:
3369       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3370     if self.op.nicparams:
3371       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3372     if self.op.osparams:
3373       self.cluster.osparams = self.new_osp
3374     if self.op.ndparams:
3375       self.cluster.ndparams = self.new_ndparams
3376
3377     if self.op.candidate_pool_size is not None:
3378       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3379       # we need to update the pool size here, otherwise the save will fail
3380       _AdjustCandidatePool(self, [])
3381
3382     if self.op.maintain_node_health is not None:
3383       self.cluster.maintain_node_health = self.op.maintain_node_health
3384
3385     if self.op.prealloc_wipe_disks is not None:
3386       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3387
3388     if self.op.add_uids is not None:
3389       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3390
3391     if self.op.remove_uids is not None:
3392       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3393
3394     if self.op.uid_pool is not None:
3395       self.cluster.uid_pool = self.op.uid_pool
3396
3397     if self.op.default_iallocator is not None:
3398       self.cluster.default_iallocator = self.op.default_iallocator
3399
3400     if self.op.reserved_lvs is not None:
3401       self.cluster.reserved_lvs = self.op.reserved_lvs
3402
3403     def helper_os(aname, mods, desc):
3404       desc += " OS list"
3405       lst = getattr(self.cluster, aname)
3406       for key, val in mods:
3407         if key == constants.DDM_ADD:
3408           if val in lst:
3409             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3410           else:
3411             lst.append(val)
3412         elif key == constants.DDM_REMOVE:
3413           if val in lst:
3414             lst.remove(val)
3415           else:
3416             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3417         else:
3418           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3419
3420     if self.op.hidden_os:
3421       helper_os("hidden_os", self.op.hidden_os, "hidden")
3422
3423     if self.op.blacklisted_os:
3424       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3425
3426     if self.op.master_netdev:
3427       master = self.cfg.GetMasterNode()
3428       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3429                   self.cluster.master_netdev)
3430       result = self.rpc.call_node_stop_master(master, False)
3431       result.Raise("Could not disable the master ip")
3432       feedback_fn("Changing master_netdev from %s to %s" %
3433                   (self.cluster.master_netdev, self.op.master_netdev))
3434       self.cluster.master_netdev = self.op.master_netdev
3435
3436     self.cfg.Update(self.cluster, feedback_fn)
3437
3438     if self.op.master_netdev:
3439       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3440                   self.op.master_netdev)
3441       result = self.rpc.call_node_start_master(master, False, False)
3442       if result.fail_msg:
3443         self.LogWarning("Could not re-enable the master ip on"
3444                         " the master, please restart manually: %s",
3445                         result.fail_msg)
3446
3447
3448 def _UploadHelper(lu, nodes, fname):
3449   """Helper for uploading a file and showing warnings.
3450
3451   """
3452   if os.path.exists(fname):
3453     result = lu.rpc.call_upload_file(nodes, fname)
3454     for to_node, to_result in result.items():
3455       msg = to_result.fail_msg
3456       if msg:
3457         msg = ("Copy of file %s to node %s failed: %s" %
3458                (fname, to_node, msg))
3459         lu.proc.LogWarning(msg)
3460
3461
3462 def _ComputeAncillaryFiles(cluster, redist):
3463   """Compute files external to Ganeti which need to be consistent.
3464
3465   @type redist: boolean
3466   @param redist: Whether to include files which need to be redistributed
3467
3468   """
3469   # Compute files for all nodes
3470   files_all = set([
3471     constants.SSH_KNOWN_HOSTS_FILE,
3472     constants.CONFD_HMAC_KEY,
3473     constants.CLUSTER_DOMAIN_SECRET_FILE,
3474     ])
3475
3476   if not redist:
3477     files_all.update(constants.ALL_CERT_FILES)
3478     files_all.update(ssconf.SimpleStore().GetFileList())
3479
3480   if cluster.modify_etc_hosts:
3481     files_all.add(constants.ETC_HOSTS)
3482
3483   # Files which must either exist on all nodes or on none
3484   files_all_opt = set([
3485     constants.RAPI_USERS_FILE,
3486     ])
3487
3488   # Files which should only be on master candidates
3489   files_mc = set()
3490   if not redist:
3491     files_mc.add(constants.CLUSTER_CONF_FILE)
3492
3493   # Files which should only be on VM-capable nodes
3494   files_vm = set(filename
3495     for hv_name in cluster.enabled_hypervisors
3496     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3497
3498   # Filenames must be unique
3499   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3500           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3501          "Found file listed in more than one file list"
3502
3503   return (files_all, files_all_opt, files_mc, files_vm)
3504
3505
3506 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3507   """Distribute additional files which are part of the cluster configuration.
3508
3509   ConfigWriter takes care of distributing the config and ssconf files, but
3510   there are more files which should be distributed to all nodes. This function
3511   makes sure those are copied.
3512
3513   @param lu: calling logical unit
3514   @param additional_nodes: list of nodes not in the config to distribute to
3515   @type additional_vm: boolean
3516   @param additional_vm: whether the additional nodes are vm-capable or not
3517
3518   """
3519   # Gather target nodes
3520   cluster = lu.cfg.GetClusterInfo()
3521   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3522
3523   online_nodes = lu.cfg.GetOnlineNodeList()
3524   vm_nodes = lu.cfg.GetVmCapableNodeList()
3525
3526   if additional_nodes is not None:
3527     online_nodes.extend(additional_nodes)
3528     if additional_vm:
3529       vm_nodes.extend(additional_nodes)
3530
3531   # Never distribute to master node
3532   for nodelist in [online_nodes, vm_nodes]:
3533     if master_info.name in nodelist:
3534       nodelist.remove(master_info.name)
3535
3536   # Gather file lists
3537   (files_all, files_all_opt, files_mc, files_vm) = \
3538     _ComputeAncillaryFiles(cluster, True)
3539
3540   # Never re-distribute configuration file from here
3541   assert not (constants.CLUSTER_CONF_FILE in files_all or
3542               constants.CLUSTER_CONF_FILE in files_vm)
3543   assert not files_mc, "Master candidates not handled in this function"
3544
3545   filemap = [
3546     (online_nodes, files_all),
3547     (online_nodes, files_all_opt),
3548     (vm_nodes, files_vm),
3549     ]
3550
3551   # Upload the files
3552   for (node_list, files) in filemap:
3553     for fname in files:
3554       _UploadHelper(lu, node_list, fname)
3555
3556
3557 class LUClusterRedistConf(NoHooksLU):
3558   """Force the redistribution of cluster configuration.
3559
3560   This is a very simple LU.
3561
3562   """
3563   REQ_BGL = False
3564
3565   def ExpandNames(self):
3566     self.needed_locks = {
3567       locking.LEVEL_NODE: locking.ALL_SET,
3568     }
3569     self.share_locks[locking.LEVEL_NODE] = 1
3570
3571   def Exec(self, feedback_fn):
3572     """Redistribute the configuration.
3573
3574     """
3575     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3576     _RedistributeAncillaryFiles(self)
3577
3578
3579 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3580   """Sleep and poll for an instance's disk to sync.
3581
3582   """
3583   if not instance.disks or disks is not None and not disks:
3584     return True
3585
3586   disks = _ExpandCheckDisks(instance, disks)
3587
3588   if not oneshot:
3589     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3590
3591   node = instance.primary_node
3592
3593   for dev in disks:
3594     lu.cfg.SetDiskID(dev, node)
3595
3596   # TODO: Convert to utils.Retry
3597
3598   retries = 0
3599   degr_retries = 10 # in seconds, as we sleep 1 second each time
3600   while True:
3601     max_time = 0
3602     done = True
3603     cumul_degraded = False
3604     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3605     msg = rstats.fail_msg
3606     if msg:
3607       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3608       retries += 1
3609       if retries >= 10:
3610         raise errors.RemoteError("Can't contact node %s for mirror data,"
3611                                  " aborting." % node)
3612       time.sleep(6)
3613       continue
3614     rstats = rstats.payload
3615     retries = 0
3616     for i, mstat in enumerate(rstats):
3617       if mstat is None:
3618         lu.LogWarning("Can't compute data for node %s/%s",
3619                            node, disks[i].iv_name)
3620         continue
3621
3622       cumul_degraded = (cumul_degraded or
3623                         (mstat.is_degraded and mstat.sync_percent is None))
3624       if mstat.sync_percent is not None:
3625         done = False
3626         if mstat.estimated_time is not None:
3627           rem_time = ("%s remaining (estimated)" %
3628                       utils.FormatSeconds(mstat.estimated_time))
3629           max_time = mstat.estimated_time
3630         else:
3631           rem_time = "no time estimate"
3632         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3633                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3634
3635     # if we're done but degraded, let's do a few small retries, to
3636     # make sure we see a stable and not transient situation; therefore
3637     # we force restart of the loop
3638     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3639       logging.info("Degraded disks found, %d retries left", degr_retries)
3640       degr_retries -= 1
3641       time.sleep(1)
3642       continue
3643
3644     if done or oneshot:
3645       break
3646
3647     time.sleep(min(60, max_time))
3648
3649   if done:
3650     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3651   return not cumul_degraded
3652
3653
3654 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3655   """Check that mirrors are not degraded.
3656
3657   The ldisk parameter, if True, will change the test from the
3658   is_degraded attribute (which represents overall non-ok status for
3659   the device(s)) to the ldisk (representing the local storage status).
3660
3661   """
3662   lu.cfg.SetDiskID(dev, node)
3663
3664   result = True
3665
3666   if on_primary or dev.AssembleOnSecondary():
3667     rstats = lu.rpc.call_blockdev_find(node, dev)
3668     msg = rstats.fail_msg
3669     if msg:
3670       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3671       result = False
3672     elif not rstats.payload:
3673       lu.LogWarning("Can't find disk on node %s", node)
3674       result = False
3675     else:
3676       if ldisk:
3677         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3678       else:
3679         result = result and not rstats.payload.is_degraded
3680
3681   if dev.children:
3682     for child in dev.children:
3683       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3684
3685   return result
3686
3687
3688 class LUOobCommand(NoHooksLU):
3689   """Logical unit for OOB handling.
3690
3691   """
3692   REG_BGL = False
3693   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3694
3695   def ExpandNames(self):
3696     """Gather locks we need.
3697
3698     """
3699     if self.op.node_names:
3700       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3701       lock_names = self.op.node_names
3702     else:
3703       lock_names = locking.ALL_SET
3704
3705     self.needed_locks = {
3706       locking.LEVEL_NODE: lock_names,
3707       }
3708
3709   def CheckPrereq(self):
3710     """Check prerequisites.
3711
3712     This checks:
3713      - the node exists in the configuration
3714      - OOB is supported
3715
3716     Any errors are signaled by raising errors.OpPrereqError.
3717
3718     """
3719     self.nodes = []
3720     self.master_node = self.cfg.GetMasterNode()
3721
3722     assert self.op.power_delay >= 0.0
3723
3724     if self.op.node_names:
3725       if (self.op.command in self._SKIP_MASTER and
3726           self.master_node in self.op.node_names):
3727         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3728         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3729
3730         if master_oob_handler:
3731           additional_text = ("run '%s %s %s' if you want to operate on the"
3732                              " master regardless") % (master_oob_handler,
3733                                                       self.op.command,
3734                                                       self.master_node)
3735         else:
3736           additional_text = "it does not support out-of-band operations"
3737
3738         raise errors.OpPrereqError(("Operating on the master node %s is not"
3739                                     " allowed for %s; %s") %
3740                                    (self.master_node, self.op.command,
3741                                     additional_text), errors.ECODE_INVAL)
3742     else:
3743       self.op.node_names = self.cfg.GetNodeList()
3744       if self.op.command in self._SKIP_MASTER:
3745         self.op.node_names.remove(self.master_node)
3746
3747     if self.op.command in self._SKIP_MASTER:
3748       assert self.master_node not in self.op.node_names
3749
3750     for node_name in self.op.node_names:
3751       node = self.cfg.GetNodeInfo(node_name)
3752
3753       if node is None:
3754         raise errors.OpPrereqError("Node %s not found" % node_name,
3755                                    errors.ECODE_NOENT)
3756       else:
3757         self.nodes.append(node)
3758
3759       if (not self.op.ignore_status and
3760           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3761         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3762                                     " not marked offline") % node_name,
3763                                    errors.ECODE_STATE)
3764
3765   def Exec(self, feedback_fn):
3766     """Execute OOB and return result if we expect any.
3767
3768     """
3769     master_node = self.master_node
3770     ret = []
3771
3772     for idx, node in enumerate(utils.NiceSort(self.nodes,
3773                                               key=lambda node: node.name)):
3774       node_entry = [(constants.RS_NORMAL, node.name)]
3775       ret.append(node_entry)
3776
3777       oob_program = _SupportsOob(self.cfg, node)
3778
3779       if not oob_program:
3780         node_entry.append((constants.RS_UNAVAIL, None))
3781         continue
3782
3783       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3784                    self.op.command, oob_program, node.name)
3785       result = self.rpc.call_run_oob(master_node, oob_program,
3786                                      self.op.command, node.name,
3787                                      self.op.timeout)
3788
3789       if result.fail_msg:
3790         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3791                         node.name, result.fail_msg)
3792         node_entry.append((constants.RS_NODATA, None))
3793       else:
3794         try:
3795           self._CheckPayload(result)
3796         except errors.OpExecError, err:
3797           self.LogWarning("Payload returned by node '%s' is not valid: %s",
3798                           node.name, err)
3799           node_entry.append((constants.RS_NODATA, None))
3800         else:
3801           if self.op.command == constants.OOB_HEALTH:
3802             # For health we should log important events
3803             for item, status in result.payload:
3804               if status in [constants.OOB_STATUS_WARNING,
3805                             constants.OOB_STATUS_CRITICAL]:
3806                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3807                                 item, node.name, status)
3808
3809           if self.op.command == constants.OOB_POWER_ON:
3810             node.powered = True
3811           elif self.op.command == constants.OOB_POWER_OFF:
3812             node.powered = False
3813           elif self.op.command == constants.OOB_POWER_STATUS:
3814             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3815             if powered != node.powered:
3816               logging.warning(("Recorded power state (%s) of node '%s' does not"
3817                                " match actual power state (%s)"), node.powered,
3818                               node.name, powered)
3819
3820           # For configuration changing commands we should update the node
3821           if self.op.command in (constants.OOB_POWER_ON,
3822                                  constants.OOB_POWER_OFF):
3823             self.cfg.Update(node, feedback_fn)
3824
3825           node_entry.append((constants.RS_NORMAL, result.payload))
3826
3827           if (self.op.command == constants.OOB_POWER_ON and
3828               idx < len(self.nodes) - 1):
3829             time.sleep(self.op.power_delay)
3830
3831     return ret
3832
3833   def _CheckPayload(self, result):
3834     """Checks if the payload is valid.
3835
3836     @param result: RPC result
3837     @raises errors.OpExecError: If payload is not valid
3838
3839     """
3840     errs = []
3841     if self.op.command == constants.OOB_HEALTH:
3842       if not isinstance(result.payload, list):
3843         errs.append("command 'health' is expected to return a list but got %s" %
3844                     type(result.payload))
3845       else:
3846         for item, status in result.payload:
3847           if status not in constants.OOB_STATUSES:
3848             errs.append("health item '%s' has invalid status '%s'" %
3849                         (item, status))
3850
3851     if self.op.command == constants.OOB_POWER_STATUS:
3852       if not isinstance(result.payload, dict):
3853         errs.append("power-status is expected to return a dict but got %s" %
3854                     type(result.payload))
3855
3856     if self.op.command in [
3857         constants.OOB_POWER_ON,
3858         constants.OOB_POWER_OFF,
3859         constants.OOB_POWER_CYCLE,
3860         ]:
3861       if result.payload is not None:
3862         errs.append("%s is expected to not return payload but got '%s'" %
3863                     (self.op.command, result.payload))
3864
3865     if errs:
3866       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3867                                utils.CommaJoin(errs))
3868
3869 class _OsQuery(_QueryBase):
3870   FIELDS = query.OS_FIELDS
3871
3872   def ExpandNames(self, lu):
3873     # Lock all nodes in shared mode
3874     # Temporary removal of locks, should be reverted later
3875     # TODO: reintroduce locks when they are lighter-weight
3876     lu.needed_locks = {}
3877     #self.share_locks[locking.LEVEL_NODE] = 1
3878     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3879
3880     # The following variables interact with _QueryBase._GetNames
3881     if self.names:
3882       self.wanted = self.names
3883     else:
3884       self.wanted = locking.ALL_SET
3885
3886     self.do_locking = self.use_locking
3887
3888   def DeclareLocks(self, lu, level):
3889     pass
3890
3891   @staticmethod
3892   def _DiagnoseByOS(rlist):
3893     """Remaps a per-node return list into an a per-os per-node dictionary
3894
3895     @param rlist: a map with node names as keys and OS objects as values
3896
3897     @rtype: dict
3898     @return: a dictionary with osnames as keys and as value another
3899         map, with nodes as keys and tuples of (path, status, diagnose,
3900         variants, parameters, api_versions) as values, eg::
3901
3902           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3903                                      (/srv/..., False, "invalid api")],
3904                            "node2": [(/srv/..., True, "", [], [])]}
3905           }
3906
3907     """
3908     all_os = {}
3909     # we build here the list of nodes that didn't fail the RPC (at RPC
3910     # level), so that nodes with a non-responding node daemon don't
3911     # make all OSes invalid
3912     good_nodes = [node_name for node_name in rlist
3913                   if not rlist[node_name].fail_msg]
3914     for node_name, nr in rlist.items():
3915       if nr.fail_msg or not nr.payload:
3916         continue
3917       for (name, path, status, diagnose, variants,
3918            params, api_versions) in nr.payload:
3919         if name not in all_os:
3920           # build a list of nodes for this os containing empty lists
3921           # for each node in node_list
3922           all_os[name] = {}
3923           for nname in good_nodes:
3924             all_os[name][nname] = []
3925         # convert params from [name, help] to (name, help)
3926         params = [tuple(v) for v in params]
3927         all_os[name][node_name].append((path, status, diagnose,
3928                                         variants, params, api_versions))
3929     return all_os
3930
3931   def _GetQueryData(self, lu):
3932     """Computes the list of nodes and their attributes.
3933
3934     """
3935     # Locking is not used
3936     assert not (compat.any(lu.glm.is_owned(level)
3937                            for level in locking.LEVELS
3938                            if level != locking.LEVEL_CLUSTER) or
3939                 self.do_locking or self.use_locking)
3940
3941     valid_nodes = [node.name
3942                    for node in lu.cfg.GetAllNodesInfo().values()
3943                    if not node.offline and node.vm_capable]
3944     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3945     cluster = lu.cfg.GetClusterInfo()
3946
3947     data = {}
3948
3949     for (os_name, os_data) in pol.items():
3950       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3951                           hidden=(os_name in cluster.hidden_os),
3952                           blacklisted=(os_name in cluster.blacklisted_os))
3953
3954       variants = set()
3955       parameters = set()
3956       api_versions = set()
3957
3958       for idx, osl in enumerate(os_data.values()):
3959         info.valid = bool(info.valid and osl and osl[0][1])
3960         if not info.valid:
3961           break
3962
3963         (node_variants, node_params, node_api) = osl[0][3:6]
3964         if idx == 0:
3965           # First entry
3966           variants.update(node_variants)
3967           parameters.update(node_params)
3968           api_versions.update(node_api)
3969         else:
3970           # Filter out inconsistent values
3971           variants.intersection_update(node_variants)
3972           parameters.intersection_update(node_params)
3973           api_versions.intersection_update(node_api)
3974
3975       info.variants = list(variants)
3976       info.parameters = list(parameters)
3977       info.api_versions = list(api_versions)
3978
3979       data[os_name] = info
3980
3981     # Prepare data in requested order
3982     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3983             if name in data]
3984
3985
3986 class LUOsDiagnose(NoHooksLU):
3987   """Logical unit for OS diagnose/query.
3988
3989   """
3990   REQ_BGL = False
3991
3992   @staticmethod
3993   def _BuildFilter(fields, names):
3994     """Builds a filter for querying OSes.
3995
3996     """
3997     name_filter = qlang.MakeSimpleFilter("name", names)
3998
3999     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4000     # respective field is not requested
4001     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4002                      for fname in ["hidden", "blacklisted"]
4003                      if fname not in fields]
4004     if "valid" not in fields:
4005       status_filter.append([qlang.OP_TRUE, "valid"])
4006
4007     if status_filter:
4008       status_filter.insert(0, qlang.OP_AND)
4009     else:
4010       status_filter = None
4011
4012     if name_filter and status_filter:
4013       return [qlang.OP_AND, name_filter, status_filter]
4014     elif name_filter:
4015       return name_filter
4016     else:
4017       return status_filter
4018
4019   def CheckArguments(self):
4020     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4021                        self.op.output_fields, False)
4022
4023   def ExpandNames(self):
4024     self.oq.ExpandNames(self)
4025
4026   def Exec(self, feedback_fn):
4027     return self.oq.OldStyleQuery(self)
4028
4029
4030 class LUNodeRemove(LogicalUnit):
4031   """Logical unit for removing a node.
4032
4033   """
4034   HPATH = "node-remove"
4035   HTYPE = constants.HTYPE_NODE
4036
4037   def BuildHooksEnv(self):
4038     """Build hooks env.
4039
4040     This doesn't run on the target node in the pre phase as a failed
4041     node would then be impossible to remove.
4042
4043     """
4044     return {
4045       "OP_TARGET": self.op.node_name,
4046       "NODE_NAME": self.op.node_name,
4047       }
4048
4049   def BuildHooksNodes(self):
4050     """Build hooks nodes.
4051
4052     """
4053     all_nodes = self.cfg.GetNodeList()
4054     try:
4055       all_nodes.remove(self.op.node_name)
4056     except ValueError:
4057       logging.warning("Node '%s', which is about to be removed, was not found"
4058                       " in the list of all nodes", self.op.node_name)
4059     return (all_nodes, all_nodes)
4060
4061   def CheckPrereq(self):
4062     """Check prerequisites.
4063
4064     This checks:
4065      - the node exists in the configuration
4066      - it does not have primary or secondary instances
4067      - it's not the master
4068
4069     Any errors are signaled by raising errors.OpPrereqError.
4070
4071     """
4072     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4073     node = self.cfg.GetNodeInfo(self.op.node_name)
4074     assert node is not None
4075
4076     instance_list = self.cfg.GetInstanceList()
4077
4078     masternode = self.cfg.GetMasterNode()
4079     if node.name == masternode:
4080       raise errors.OpPrereqError("Node is the master node, failover to another"
4081                                  " node is required", errors.ECODE_INVAL)
4082
4083     for instance_name in instance_list:
4084       instance = self.cfg.GetInstanceInfo(instance_name)
4085       if node.name in instance.all_nodes:
4086         raise errors.OpPrereqError("Instance %s is still running on the node,"
4087                                    " please remove first" % instance_name,
4088                                    errors.ECODE_INVAL)
4089     self.op.node_name = node.name
4090     self.node = node
4091
4092   def Exec(self, feedback_fn):
4093     """Removes the node from the cluster.
4094
4095     """
4096     node = self.node
4097     logging.info("Stopping the node daemon and removing configs from node %s",
4098                  node.name)
4099
4100     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4101
4102     # Promote nodes to master candidate as needed
4103     _AdjustCandidatePool(self, exceptions=[node.name])
4104     self.context.RemoveNode(node.name)
4105
4106     # Run post hooks on the node before it's removed
4107     _RunPostHook(self, node.name)
4108
4109     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4110     msg = result.fail_msg
4111     if msg:
4112       self.LogWarning("Errors encountered on the remote node while leaving"
4113                       " the cluster: %s", msg)
4114
4115     # Remove node from our /etc/hosts
4116     if self.cfg.GetClusterInfo().modify_etc_hosts:
4117       master_node = self.cfg.GetMasterNode()
4118       result = self.rpc.call_etc_hosts_modify(master_node,
4119                                               constants.ETC_HOSTS_REMOVE,
4120                                               node.name, None)
4121       result.Raise("Can't update hosts file with new host data")
4122       _RedistributeAncillaryFiles(self)
4123
4124
4125 class _NodeQuery(_QueryBase):
4126   FIELDS = query.NODE_FIELDS
4127
4128   def ExpandNames(self, lu):
4129     lu.needed_locks = {}
4130     lu.share_locks[locking.LEVEL_NODE] = 1
4131
4132     if self.names:
4133       self.wanted = _GetWantedNodes(lu, self.names)
4134     else:
4135       self.wanted = locking.ALL_SET
4136
4137     self.do_locking = (self.use_locking and
4138                        query.NQ_LIVE in self.requested_data)
4139
4140     if self.do_locking:
4141       # if we don't request only static fields, we need to lock the nodes
4142       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4143
4144   def DeclareLocks(self, lu, level):
4145     pass
4146
4147   def _GetQueryData(self, lu):
4148     """Computes the list of nodes and their attributes.
4149
4150     """
4151     all_info = lu.cfg.GetAllNodesInfo()
4152
4153     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4154
4155     # Gather data as requested
4156     if query.NQ_LIVE in self.requested_data:
4157       # filter out non-vm_capable nodes
4158       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4159
4160       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4161                                         lu.cfg.GetHypervisorType())
4162       live_data = dict((name, nresult.payload)
4163                        for (name, nresult) in node_data.items()
4164                        if not nresult.fail_msg and nresult.payload)
4165     else:
4166       live_data = None
4167
4168     if query.NQ_INST in self.requested_data:
4169       node_to_primary = dict([(name, set()) for name in nodenames])
4170       node_to_secondary = dict([(name, set()) for name in nodenames])
4171
4172       inst_data = lu.cfg.GetAllInstancesInfo()
4173
4174       for inst in inst_data.values():
4175         if inst.primary_node in node_to_primary:
4176           node_to_primary[inst.primary_node].add(inst.name)
4177         for secnode in inst.secondary_nodes:
4178           if secnode in node_to_secondary:
4179             node_to_secondary[secnode].add(inst.name)
4180     else:
4181       node_to_primary = None
4182       node_to_secondary = None
4183
4184     if query.NQ_OOB in self.requested_data:
4185       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4186                          for name, node in all_info.iteritems())
4187     else:
4188       oob_support = None
4189
4190     if query.NQ_GROUP in self.requested_data:
4191       groups = lu.cfg.GetAllNodeGroupsInfo()
4192     else:
4193       groups = {}
4194
4195     return query.NodeQueryData([all_info[name] for name in nodenames],
4196                                live_data, lu.cfg.GetMasterNode(),
4197                                node_to_primary, node_to_secondary, groups,
4198                                oob_support, lu.cfg.GetClusterInfo())
4199
4200
4201 class LUNodeQuery(NoHooksLU):
4202   """Logical unit for querying nodes.
4203
4204   """
4205   # pylint: disable-msg=W0142
4206   REQ_BGL = False
4207
4208   def CheckArguments(self):
4209     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4210                          self.op.output_fields, self.op.use_locking)
4211
4212   def ExpandNames(self):
4213     self.nq.ExpandNames(self)
4214
4215   def Exec(self, feedback_fn):
4216     return self.nq.OldStyleQuery(self)
4217
4218
4219 class LUNodeQueryvols(NoHooksLU):
4220   """Logical unit for getting volumes on node(s).
4221
4222   """
4223   REQ_BGL = False
4224   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4225   _FIELDS_STATIC = utils.FieldSet("node")
4226
4227   def CheckArguments(self):
4228     _CheckOutputFields(static=self._FIELDS_STATIC,
4229                        dynamic=self._FIELDS_DYNAMIC,
4230                        selected=self.op.output_fields)
4231
4232   def ExpandNames(self):
4233     self.needed_locks = {}
4234     self.share_locks[locking.LEVEL_NODE] = 1
4235     if not self.op.nodes:
4236       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4237     else:
4238       self.needed_locks[locking.LEVEL_NODE] = \
4239         _GetWantedNodes(self, self.op.nodes)
4240
4241   def Exec(self, feedback_fn):
4242     """Computes the list of nodes and their attributes.
4243
4244     """
4245     nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4246     volumes = self.rpc.call_node_volumes(nodenames)
4247
4248     ilist = [self.cfg.GetInstanceInfo(iname) for iname
4249              in self.cfg.GetInstanceList()]
4250
4251     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
4252
4253     output = []
4254     for node in nodenames:
4255       nresult = volumes[node]
4256       if nresult.offline:
4257         continue
4258       msg = nresult.fail_msg
4259       if msg:
4260         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4261         continue
4262
4263       node_vols = nresult.payload[:]
4264       node_vols.sort(key=lambda vol: vol['dev'])
4265
4266       for vol in node_vols:
4267         node_output = []
4268         for field in self.op.output_fields:
4269           if field == "node":
4270             val = node
4271           elif field == "phys":
4272             val = vol['dev']
4273           elif field == "vg":
4274             val = vol['vg']
4275           elif field == "name":
4276             val = vol['name']
4277           elif field == "size":
4278             val = int(float(vol['size']))
4279           elif field == "instance":
4280             for inst in ilist:
4281               if node not in lv_by_node[inst]:
4282                 continue
4283               if vol['name'] in lv_by_node[inst][node]:
4284                 val = inst.name
4285                 break
4286             else:
4287               val = '-'
4288           else:
4289             raise errors.ParameterError(field)
4290           node_output.append(str(val))
4291
4292         output.append(node_output)
4293
4294     return output
4295
4296
4297 class LUNodeQueryStorage(NoHooksLU):
4298   """Logical unit for getting information on storage units on node(s).
4299
4300   """
4301   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4302   REQ_BGL = False
4303
4304   def CheckArguments(self):
4305     _CheckOutputFields(static=self._FIELDS_STATIC,
4306                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4307                        selected=self.op.output_fields)
4308
4309   def ExpandNames(self):
4310     self.needed_locks = {}
4311     self.share_locks[locking.LEVEL_NODE] = 1
4312
4313     if self.op.nodes:
4314       self.needed_locks[locking.LEVEL_NODE] = \
4315         _GetWantedNodes(self, self.op.nodes)
4316     else:
4317       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4318
4319   def Exec(self, feedback_fn):
4320     """Computes the list of nodes and their attributes.
4321
4322     """
4323     self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4324
4325     # Always get name to sort by
4326     if constants.SF_NAME in self.op.output_fields:
4327       fields = self.op.output_fields[:]
4328     else:
4329       fields = [constants.SF_NAME] + self.op.output_fields
4330
4331     # Never ask for node or type as it's only known to the LU
4332     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4333       while extra in fields:
4334         fields.remove(extra)
4335
4336     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4337     name_idx = field_idx[constants.SF_NAME]
4338
4339     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4340     data = self.rpc.call_storage_list(self.nodes,
4341                                       self.op.storage_type, st_args,
4342                                       self.op.name, fields)
4343
4344     result = []
4345
4346     for node in utils.NiceSort(self.nodes):
4347       nresult = data[node]
4348       if nresult.offline:
4349         continue
4350
4351       msg = nresult.fail_msg
4352       if msg:
4353         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4354         continue
4355
4356       rows = dict([(row[name_idx], row) for row in nresult.payload])
4357
4358       for name in utils.NiceSort(rows.keys()):
4359         row = rows[name]
4360
4361         out = []
4362
4363         for field in self.op.output_fields:
4364           if field == constants.SF_NODE:
4365             val = node
4366           elif field == constants.SF_TYPE:
4367             val = self.op.storage_type
4368           elif field in field_idx:
4369             val = row[field_idx[field]]
4370           else:
4371             raise errors.ParameterError(field)
4372
4373           out.append(val)
4374
4375         result.append(out)
4376
4377     return result
4378
4379
4380 class _InstanceQuery(_QueryBase):
4381   FIELDS = query.INSTANCE_FIELDS
4382
4383   def ExpandNames(self, lu):
4384     lu.needed_locks = {}
4385     lu.share_locks[locking.LEVEL_INSTANCE] = 1
4386     lu.share_locks[locking.LEVEL_NODE] = 1
4387
4388     if self.names:
4389       self.wanted = _GetWantedInstances(lu, self.names)
4390     else:
4391       self.wanted = locking.ALL_SET
4392
4393     self.do_locking = (self.use_locking and
4394                        query.IQ_LIVE in self.requested_data)
4395     if self.do_locking:
4396       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4397       lu.needed_locks[locking.LEVEL_NODE] = []
4398       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4399
4400   def DeclareLocks(self, lu, level):
4401     if level == locking.LEVEL_NODE and self.do_locking:
4402       lu._LockInstancesNodes() # pylint: disable-msg=W0212
4403
4404   def _GetQueryData(self, lu):
4405     """Computes the list of instances and their attributes.
4406
4407     """
4408     cluster = lu.cfg.GetClusterInfo()
4409     all_info = lu.cfg.GetAllInstancesInfo()
4410
4411     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4412
4413     instance_list = [all_info[name] for name in instance_names]
4414     nodes = frozenset(itertools.chain(*(inst.all_nodes
4415                                         for inst in instance_list)))
4416     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4417     bad_nodes = []
4418     offline_nodes = []
4419     wrongnode_inst = set()
4420
4421     # Gather data as requested
4422     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4423       live_data = {}
4424       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4425       for name in nodes:
4426         result = node_data[name]
4427         if result.offline:
4428           # offline nodes will be in both lists
4429           assert result.fail_msg
4430           offline_nodes.append(name)
4431         if result.fail_msg:
4432           bad_nodes.append(name)
4433         elif result.payload:
4434           for inst in result.payload:
4435             if inst in all_info:
4436               if all_info[inst].primary_node == name:
4437                 live_data.update(result.payload)
4438               else:
4439                 wrongnode_inst.add(inst)
4440             else:
4441               # orphan instance; we don't list it here as we don't
4442               # handle this case yet in the output of instance listing
4443               logging.warning("Orphan instance '%s' found on node %s",
4444                               inst, name)
4445         # else no instance is alive
4446     else:
4447       live_data = {}
4448
4449     if query.IQ_DISKUSAGE in self.requested_data:
4450       disk_usage = dict((inst.name,
4451                          _ComputeDiskSize(inst.disk_template,
4452                                           [{constants.IDISK_SIZE: disk.size}
4453                                            for disk in inst.disks]))
4454                         for inst in instance_list)
4455     else:
4456       disk_usage = None
4457
4458     if query.IQ_CONSOLE in self.requested_data:
4459       consinfo = {}
4460       for inst in instance_list:
4461         if inst.name in live_data:
4462           # Instance is running
4463           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4464         else:
4465           consinfo[inst.name] = None
4466       assert set(consinfo.keys()) == set(instance_names)
4467     else:
4468       consinfo = None
4469
4470     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4471                                    disk_usage, offline_nodes, bad_nodes,
4472                                    live_data, wrongnode_inst, consinfo)
4473
4474
4475 class LUQuery(NoHooksLU):
4476   """Query for resources/items of a certain kind.
4477
4478   """
4479   # pylint: disable-msg=W0142
4480   REQ_BGL = False
4481
4482   def CheckArguments(self):
4483     qcls = _GetQueryImplementation(self.op.what)
4484
4485     self.impl = qcls(self.op.filter, self.op.fields, False)
4486
4487   def ExpandNames(self):
4488     self.impl.ExpandNames(self)
4489
4490   def DeclareLocks(self, level):
4491     self.impl.DeclareLocks(self, level)
4492
4493   def Exec(self, feedback_fn):
4494     return self.impl.NewStyleQuery(self)
4495
4496
4497 class LUQueryFields(NoHooksLU):
4498   """Query for resources/items of a certain kind.
4499
4500   """
4501   # pylint: disable-msg=W0142
4502   REQ_BGL = False
4503
4504   def CheckArguments(self):
4505     self.qcls = _GetQueryImplementation(self.op.what)
4506
4507   def ExpandNames(self):
4508     self.needed_locks = {}
4509
4510   def Exec(self, feedback_fn):
4511     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4512
4513
4514 class LUNodeModifyStorage(NoHooksLU):
4515   """Logical unit for modifying a storage volume on a node.
4516
4517   """
4518   REQ_BGL = False
4519
4520   def CheckArguments(self):
4521     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4522
4523     storage_type = self.op.storage_type
4524
4525     try:
4526       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4527     except KeyError:
4528       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4529                                  " modified" % storage_type,
4530                                  errors.ECODE_INVAL)
4531
4532     diff = set(self.op.changes.keys()) - modifiable
4533     if diff:
4534       raise errors.OpPrereqError("The following fields can not be modified for"
4535                                  " storage units of type '%s': %r" %
4536                                  (storage_type, list(diff)),
4537                                  errors.ECODE_INVAL)
4538
4539   def ExpandNames(self):
4540     self.needed_locks = {
4541       locking.LEVEL_NODE: self.op.node_name,
4542       }
4543
4544   def Exec(self, feedback_fn):
4545     """Computes the list of nodes and their attributes.
4546
4547     """
4548     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4549     result = self.rpc.call_storage_modify(self.op.node_name,
4550                                           self.op.storage_type, st_args,
4551                                           self.op.name, self.op.changes)
4552     result.Raise("Failed to modify storage unit '%s' on %s" %
4553                  (self.op.name, self.op.node_name))
4554
4555
4556 class LUNodeAdd(LogicalUnit):
4557   """Logical unit for adding node to the cluster.
4558
4559   """
4560   HPATH = "node-add"
4561   HTYPE = constants.HTYPE_NODE
4562   _NFLAGS = ["master_capable", "vm_capable"]
4563
4564   def CheckArguments(self):
4565     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4566     # validate/normalize the node name
4567     self.hostname = netutils.GetHostname(name=self.op.node_name,
4568                                          family=self.primary_ip_family)
4569     self.op.node_name = self.hostname.name
4570
4571     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4572       raise errors.OpPrereqError("Cannot readd the master node",
4573                                  errors.ECODE_STATE)
4574
4575     if self.op.readd and self.op.group:
4576       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4577                                  " being readded", errors.ECODE_INVAL)
4578
4579   def BuildHooksEnv(self):
4580     """Build hooks env.
4581
4582     This will run on all nodes before, and on all nodes + the new node after.
4583
4584     """
4585     return {
4586       "OP_TARGET": self.op.node_name,
4587       "NODE_NAME": self.op.node_name,
4588       "NODE_PIP": self.op.primary_ip,
4589       "NODE_SIP": self.op.secondary_ip,
4590       "MASTER_CAPABLE": str(self.op.master_capable),
4591       "VM_CAPABLE": str(self.op.vm_capable),
4592       }
4593
4594   def BuildHooksNodes(self):
4595     """Build hooks nodes.
4596
4597     """
4598     # Exclude added node
4599     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4600     post_nodes = pre_nodes + [self.op.node_name, ]
4601
4602     return (pre_nodes, post_nodes)
4603
4604   def CheckPrereq(self):
4605     """Check prerequisites.
4606
4607     This checks:
4608      - the new node is not already in the config
4609      - it is resolvable
4610      - its parameters (single/dual homed) matches the cluster
4611
4612     Any errors are signaled by raising errors.OpPrereqError.
4613
4614     """
4615     cfg = self.cfg
4616     hostname = self.hostname
4617     node = hostname.name
4618     primary_ip = self.op.primary_ip = hostname.ip
4619     if self.op.secondary_ip is None:
4620       if self.primary_ip_family == netutils.IP6Address.family:
4621         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4622                                    " IPv4 address must be given as secondary",
4623                                    errors.ECODE_INVAL)
4624       self.op.secondary_ip = primary_ip
4625
4626     secondary_ip = self.op.secondary_ip
4627     if not netutils.IP4Address.IsValid(secondary_ip):
4628       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4629                                  " address" % secondary_ip, errors.ECODE_INVAL)
4630
4631     node_list = cfg.GetNodeList()
4632     if not self.op.readd and node in node_list:
4633       raise errors.OpPrereqError("Node %s is already in the configuration" %
4634                                  node, errors.ECODE_EXISTS)
4635     elif self.op.readd and node not in node_list:
4636       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4637                                  errors.ECODE_NOENT)
4638
4639     self.changed_primary_ip = False
4640
4641     for existing_node_name in node_list:
4642       existing_node = cfg.GetNodeInfo(existing_node_name)
4643
4644       if self.op.readd and node == existing_node_name:
4645         if existing_node.secondary_ip != secondary_ip:
4646           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4647                                      " address configuration as before",
4648                                      errors.ECODE_INVAL)
4649         if existing_node.primary_ip != primary_ip:
4650           self.changed_primary_ip = True
4651
4652         continue
4653
4654       if (existing_node.primary_ip == primary_ip or
4655           existing_node.secondary_ip == primary_ip or
4656           existing_node.primary_ip == secondary_ip or
4657           existing_node.secondary_ip == secondary_ip):
4658         raise errors.OpPrereqError("New node ip address(es) conflict with"
4659                                    " existing node %s" % existing_node.name,
4660                                    errors.ECODE_NOTUNIQUE)
4661
4662     # After this 'if' block, None is no longer a valid value for the
4663     # _capable op attributes
4664     if self.op.readd:
4665       old_node = self.cfg.GetNodeInfo(node)
4666       assert old_node is not None, "Can't retrieve locked node %s" % node
4667       for attr in self._NFLAGS:
4668         if getattr(self.op, attr) is None:
4669           setattr(self.op, attr, getattr(old_node, attr))
4670     else:
4671       for attr in self._NFLAGS:
4672         if getattr(self.op, attr) is None:
4673           setattr(self.op, attr, True)
4674
4675     if self.op.readd and not self.op.vm_capable:
4676       pri, sec = cfg.GetNodeInstances(node)
4677       if pri or sec:
4678         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4679                                    " flag set to false, but it already holds"
4680                                    " instances" % node,
4681                                    errors.ECODE_STATE)
4682
4683     # check that the type of the node (single versus dual homed) is the
4684     # same as for the master
4685     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4686     master_singlehomed = myself.secondary_ip == myself.primary_ip
4687     newbie_singlehomed = secondary_ip == primary_ip
4688     if master_singlehomed != newbie_singlehomed:
4689       if master_singlehomed:
4690         raise errors.OpPrereqError("The master has no secondary ip but the"
4691                                    " new node has one",
4692                                    errors.ECODE_INVAL)
4693       else:
4694         raise errors.OpPrereqError("The master has a secondary ip but the"
4695                                    " new node doesn't have one",
4696                                    errors.ECODE_INVAL)
4697
4698     # checks reachability
4699     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4700       raise errors.OpPrereqError("Node not reachable by ping",
4701                                  errors.ECODE_ENVIRON)
4702
4703     if not newbie_singlehomed:
4704       # check reachability from my secondary ip to newbie's secondary ip
4705       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4706                            source=myself.secondary_ip):
4707         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4708                                    " based ping to node daemon port",
4709                                    errors.ECODE_ENVIRON)
4710
4711     if self.op.readd:
4712       exceptions = [node]
4713     else:
4714       exceptions = []
4715
4716     if self.op.master_capable:
4717       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4718     else:
4719       self.master_candidate = False
4720
4721     if self.op.readd:
4722       self.new_node = old_node
4723     else:
4724       node_group = cfg.LookupNodeGroup(self.op.group)
4725       self.new_node = objects.Node(name=node,
4726                                    primary_ip=primary_ip,
4727                                    secondary_ip=secondary_ip,
4728                                    master_candidate=self.master_candidate,
4729                                    offline=False, drained=False,
4730                                    group=node_group)
4731
4732     if self.op.ndparams:
4733       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4734
4735   def Exec(self, feedback_fn):
4736     """Adds the new node to the cluster.
4737
4738     """
4739     new_node = self.new_node
4740     node = new_node.name
4741
4742     # We adding a new node so we assume it's powered
4743     new_node.powered = True
4744
4745     # for re-adds, reset the offline/drained/master-candidate flags;
4746     # we need to reset here, otherwise offline would prevent RPC calls
4747     # later in the procedure; this also means that if the re-add
4748     # fails, we are left with a non-offlined, broken node
4749     if self.op.readd:
4750       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4751       self.LogInfo("Readding a node, the offline/drained flags were reset")
4752       # if we demote the node, we do cleanup later in the procedure
4753       new_node.master_candidate = self.master_candidate
4754       if self.changed_primary_ip:
4755         new_node.primary_ip = self.op.primary_ip
4756
4757     # copy the master/vm_capable flags
4758     for attr in self._NFLAGS:
4759       setattr(new_node, attr, getattr(self.op, attr))
4760
4761     # notify the user about any possible mc promotion
4762     if new_node.master_candidate:
4763       self.LogInfo("Node will be a master candidate")
4764
4765     if self.op.ndparams:
4766       new_node.ndparams = self.op.ndparams
4767     else:
4768       new_node.ndparams = {}
4769
4770     # check connectivity
4771     result = self.rpc.call_version([node])[node]
4772     result.Raise("Can't get version information from node %s" % node)
4773     if constants.PROTOCOL_VERSION == result.payload:
4774       logging.info("Communication to node %s fine, sw version %s match",
4775                    node, result.payload)
4776     else:
4777       raise errors.OpExecError("Version mismatch master version %s,"
4778                                " node version %s" %
4779                                (constants.PROTOCOL_VERSION, result.payload))
4780
4781     # Add node to our /etc/hosts, and add key to known_hosts
4782     if self.cfg.GetClusterInfo().modify_etc_hosts:
4783       master_node = self.cfg.GetMasterNode()
4784       result = self.rpc.call_etc_hosts_modify(master_node,
4785                                               constants.ETC_HOSTS_ADD,
4786                                               self.hostname.name,
4787                                               self.hostname.ip)
4788       result.Raise("Can't update hosts file with new host data")
4789
4790     if new_node.secondary_ip != new_node.primary_ip:
4791       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4792                                False)
4793
4794     node_verify_list = [self.cfg.GetMasterNode()]
4795     node_verify_param = {
4796       constants.NV_NODELIST: [node],
4797       # TODO: do a node-net-test as well?
4798     }
4799
4800     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4801                                        self.cfg.GetClusterName())
4802     for verifier in node_verify_list:
4803       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4804       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4805       if nl_payload:
4806         for failed in nl_payload:
4807           feedback_fn("ssh/hostname verification failed"
4808                       " (checking from %s): %s" %
4809                       (verifier, nl_payload[failed]))
4810         raise errors.OpExecError("ssh/hostname verification failed")
4811
4812     if self.op.readd:
4813       _RedistributeAncillaryFiles(self)
4814       self.context.ReaddNode(new_node)
4815       # make sure we redistribute the config
4816       self.cfg.Update(new_node, feedback_fn)
4817       # and make sure the new node will not have old files around
4818       if not new_node.master_candidate:
4819         result = self.rpc.call_node_demote_from_mc(new_node.name)
4820         msg = result.fail_msg
4821         if msg:
4822           self.LogWarning("Node failed to demote itself from master"
4823                           " candidate status: %s" % msg)
4824     else:
4825       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4826                                   additional_vm=self.op.vm_capable)
4827       self.context.AddNode(new_node, self.proc.GetECId())
4828
4829
4830 class LUNodeSetParams(LogicalUnit):
4831   """Modifies the parameters of a node.
4832
4833   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4834       to the node role (as _ROLE_*)
4835   @cvar _R2F: a dictionary from node role to tuples of flags
4836   @cvar _FLAGS: a list of attribute names corresponding to the flags
4837
4838   """
4839   HPATH = "node-modify"
4840   HTYPE = constants.HTYPE_NODE
4841   REQ_BGL = False
4842   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4843   _F2R = {
4844     (True, False, False): _ROLE_CANDIDATE,
4845     (False, True, False): _ROLE_DRAINED,
4846     (False, False, True): _ROLE_OFFLINE,
4847     (False, False, False): _ROLE_REGULAR,
4848     }
4849   _R2F = dict((v, k) for k, v in _F2R.items())
4850   _FLAGS = ["master_candidate", "drained", "offline"]
4851
4852   def CheckArguments(self):
4853     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4854     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4855                 self.op.master_capable, self.op.vm_capable,
4856                 self.op.secondary_ip, self.op.ndparams]
4857     if all_mods.count(None) == len(all_mods):
4858       raise errors.OpPrereqError("Please pass at least one modification",
4859                                  errors.ECODE_INVAL)
4860     if all_mods.count(True) > 1:
4861       raise errors.OpPrereqError("Can't set the node into more than one"
4862                                  " state at the same time",
4863                                  errors.ECODE_INVAL)
4864
4865     # Boolean value that tells us whether we might be demoting from MC
4866     self.might_demote = (self.op.master_candidate == False or
4867                          self.op.offline == True or
4868                          self.op.drained == True or
4869                          self.op.master_capable == False)
4870
4871     if self.op.secondary_ip:
4872       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4873         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4874                                    " address" % self.op.secondary_ip,
4875                                    errors.ECODE_INVAL)
4876
4877     self.lock_all = self.op.auto_promote and self.might_demote
4878     self.lock_instances = self.op.secondary_ip is not None
4879
4880   def ExpandNames(self):
4881     if self.lock_all:
4882       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4883     else:
4884       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4885
4886     if self.lock_instances:
4887       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4888
4889   def DeclareLocks(self, level):
4890     # If we have locked all instances, before waiting to lock nodes, release
4891     # all the ones living on nodes unrelated to the current operation.
4892     if level == locking.LEVEL_NODE and self.lock_instances:
4893       self.affected_instances = []
4894       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4895         instances_keep = []
4896
4897         # Build list of instances to release
4898         for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4899           instance = self.context.cfg.GetInstanceInfo(instance_name)
4900           if (instance.disk_template in constants.DTS_INT_MIRROR and
4901               self.op.node_name in instance.all_nodes):
4902             instances_keep.append(instance_name)
4903             self.affected_instances.append(instance)
4904
4905         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4906
4907         assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4908                 set(instances_keep))
4909
4910   def BuildHooksEnv(self):
4911     """Build hooks env.
4912
4913     This runs on the master node.
4914
4915     """
4916     return {
4917       "OP_TARGET": self.op.node_name,
4918       "MASTER_CANDIDATE": str(self.op.master_candidate),
4919       "OFFLINE": str(self.op.offline),
4920       "DRAINED": str(self.op.drained),
4921       "MASTER_CAPABLE": str(self.op.master_capable),
4922       "VM_CAPABLE": str(self.op.vm_capable),
4923       }
4924
4925   def BuildHooksNodes(self):
4926     """Build hooks nodes.
4927
4928     """
4929     nl = [self.cfg.GetMasterNode(), self.op.node_name]
4930     return (nl, nl)
4931
4932   def CheckPrereq(self):
4933     """Check prerequisites.
4934
4935     This only checks the instance list against the existing names.
4936
4937     """
4938     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4939
4940     if (self.op.master_candidate is not None or
4941         self.op.drained is not None or
4942         self.op.offline is not None):
4943       # we can't change the master's node flags
4944       if self.op.node_name == self.cfg.GetMasterNode():
4945         raise errors.OpPrereqError("The master role can be changed"
4946                                    " only via master-failover",
4947                                    errors.ECODE_INVAL)
4948
4949     if self.op.master_candidate and not node.master_capable:
4950       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4951                                  " it a master candidate" % node.name,
4952                                  errors.ECODE_STATE)
4953
4954     if self.op.vm_capable == False:
4955       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4956       if ipri or isec:
4957         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4958                                    " the vm_capable flag" % node.name,
4959                                    errors.ECODE_STATE)
4960
4961     if node.master_candidate and self.might_demote and not self.lock_all:
4962       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4963       # check if after removing the current node, we're missing master
4964       # candidates
4965       (mc_remaining, mc_should, _) = \
4966           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4967       if mc_remaining < mc_should:
4968         raise errors.OpPrereqError("Not enough master candidates, please"
4969                                    " pass auto promote option to allow"
4970                                    " promotion", errors.ECODE_STATE)
4971
4972     self.old_flags = old_flags = (node.master_candidate,
4973                                   node.drained, node.offline)
4974     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4975     self.old_role = old_role = self._F2R[old_flags]
4976
4977     # Check for ineffective changes
4978     for attr in self._FLAGS:
4979       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4980         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4981         setattr(self.op, attr, None)
4982
4983     # Past this point, any flag change to False means a transition
4984     # away from the respective state, as only real changes are kept
4985
4986     # TODO: We might query the real power state if it supports OOB
4987     if _SupportsOob(self.cfg, node):
4988       if self.op.offline is False and not (node.powered or
4989                                            self.op.powered == True):
4990         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4991                                     " offline status can be reset") %
4992                                    self.op.node_name)
4993     elif self.op.powered is not None:
4994       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4995                                   " as it does not support out-of-band"
4996                                   " handling") % self.op.node_name)
4997
4998     # If we're being deofflined/drained, we'll MC ourself if needed
4999     if (self.op.drained == False or self.op.offline == False or
5000         (self.op.master_capable and not node.master_capable)):
5001       if _DecideSelfPromotion(self):
5002         self.op.master_candidate = True
5003         self.LogInfo("Auto-promoting node to master candidate")
5004
5005     # If we're no longer master capable, we'll demote ourselves from MC
5006     if self.op.master_capable == False and node.master_candidate:
5007       self.LogInfo("Demoting from master candidate")
5008       self.op.master_candidate = False
5009
5010     # Compute new role
5011     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5012     if self.op.master_candidate:
5013       new_role = self._ROLE_CANDIDATE
5014     elif self.op.drained:
5015       new_role = self._ROLE_DRAINED
5016     elif self.op.offline:
5017       new_role = self._ROLE_OFFLINE
5018     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5019       # False is still in new flags, which means we're un-setting (the
5020       # only) True flag
5021       new_role = self._ROLE_REGULAR
5022     else: # no new flags, nothing, keep old role
5023       new_role = old_role
5024
5025     self.new_role = new_role
5026
5027     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5028       # Trying to transition out of offline status
5029       result = self.rpc.call_version([node.name])[node.name]
5030       if result.fail_msg:
5031         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5032                                    " to report its version: %s" %
5033                                    (node.name, result.fail_msg),
5034                                    errors.ECODE_STATE)
5035       else:
5036         self.LogWarning("Transitioning node from offline to online state"
5037                         " without using re-add. Please make sure the node"
5038                         " is healthy!")
5039
5040     if self.op.secondary_ip:
5041       # Ok even without locking, because this can't be changed by any LU
5042       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5043       master_singlehomed = master.secondary_ip == master.primary_ip
5044       if master_singlehomed and self.op.secondary_ip:
5045         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5046                                    " homed cluster", errors.ECODE_INVAL)
5047
5048       if node.offline:
5049         if self.affected_instances:
5050           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5051                                      " node has instances (%s) configured"
5052                                      " to use it" % self.affected_instances)
5053       else:
5054         # On online nodes, check that no instances are running, and that
5055         # the node has the new ip and we can reach it.
5056         for instance in self.affected_instances:
5057           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5058
5059         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5060         if master.name != node.name:
5061           # check reachability from master secondary ip to new secondary ip
5062           if not netutils.TcpPing(self.op.secondary_ip,
5063                                   constants.DEFAULT_NODED_PORT,
5064                                   source=master.secondary_ip):
5065             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5066                                        " based ping to node daemon port",
5067                                        errors.ECODE_ENVIRON)
5068
5069     if self.op.ndparams:
5070       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5071       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5072       self.new_ndparams = new_ndparams
5073
5074   def Exec(self, feedback_fn):
5075     """Modifies a node.
5076
5077     """
5078     node = self.node
5079     old_role = self.old_role
5080     new_role = self.new_role
5081
5082     result = []
5083
5084     if self.op.ndparams:
5085       node.ndparams = self.new_ndparams
5086
5087     if self.op.powered is not None:
5088       node.powered = self.op.powered
5089
5090     for attr in ["master_capable", "vm_capable"]:
5091       val = getattr(self.op, attr)
5092       if val is not None:
5093         setattr(node, attr, val)
5094         result.append((attr, str(val)))
5095
5096     if new_role != old_role:
5097       # Tell the node to demote itself, if no longer MC and not offline
5098       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5099         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5100         if msg:
5101           self.LogWarning("Node failed to demote itself: %s", msg)
5102
5103       new_flags = self._R2F[new_role]
5104       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5105         if of != nf:
5106           result.append((desc, str(nf)))
5107       (node.master_candidate, node.drained, node.offline) = new_flags
5108
5109       # we locked all nodes, we adjust the CP before updating this node
5110       if self.lock_all:
5111         _AdjustCandidatePool(self, [node.name])
5112
5113     if self.op.secondary_ip:
5114       node.secondary_ip = self.op.secondary_ip
5115       result.append(("secondary_ip", self.op.secondary_ip))
5116
5117     # this will trigger configuration file update, if needed
5118     self.cfg.Update(node, feedback_fn)
5119
5120     # this will trigger job queue propagation or cleanup if the mc
5121     # flag changed
5122     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5123       self.context.ReaddNode(node)
5124
5125     return result
5126
5127
5128 class LUNodePowercycle(NoHooksLU):
5129   """Powercycles a node.
5130
5131   """
5132   REQ_BGL = False
5133
5134   def CheckArguments(self):
5135     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5136     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5137       raise errors.OpPrereqError("The node is the master and the force"
5138                                  " parameter was not set",
5139                                  errors.ECODE_INVAL)
5140
5141   def ExpandNames(self):
5142     """Locking for PowercycleNode.
5143
5144     This is a last-resort option and shouldn't block on other
5145     jobs. Therefore, we grab no locks.
5146
5147     """
5148     self.needed_locks = {}
5149
5150   def Exec(self, feedback_fn):
5151     """Reboots a node.
5152
5153     """
5154     result = self.rpc.call_node_powercycle(self.op.node_name,
5155                                            self.cfg.GetHypervisorType())
5156     result.Raise("Failed to schedule the reboot")
5157     return result.payload
5158
5159
5160 class LUClusterQuery(NoHooksLU):
5161   """Query cluster configuration.
5162
5163   """
5164   REQ_BGL = False
5165
5166   def ExpandNames(self):
5167     self.needed_locks = {}
5168
5169   def Exec(self, feedback_fn):
5170     """Return cluster config.
5171
5172     """
5173     cluster = self.cfg.GetClusterInfo()
5174     os_hvp = {}
5175
5176     # Filter just for enabled hypervisors
5177     for os_name, hv_dict in cluster.os_hvp.items():
5178       os_hvp[os_name] = {}
5179       for hv_name, hv_params in hv_dict.items():
5180         if hv_name in cluster.enabled_hypervisors:
5181           os_hvp[os_name][hv_name] = hv_params
5182
5183     # Convert ip_family to ip_version
5184     primary_ip_version = constants.IP4_VERSION
5185     if cluster.primary_ip_family == netutils.IP6Address.family:
5186       primary_ip_version = constants.IP6_VERSION
5187
5188     result = {
5189       "software_version": constants.RELEASE_VERSION,
5190       "protocol_version": constants.PROTOCOL_VERSION,
5191       "config_version": constants.CONFIG_VERSION,
5192       "os_api_version": max(constants.OS_API_VERSIONS),
5193       "export_version": constants.EXPORT_VERSION,
5194       "architecture": (platform.architecture()[0], platform.machine()),
5195       "name": cluster.cluster_name,
5196       "master": cluster.master_node,
5197       "default_hypervisor": cluster.enabled_hypervisors[0],
5198       "enabled_hypervisors": cluster.enabled_hypervisors,
5199       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5200                         for hypervisor_name in cluster.enabled_hypervisors]),
5201       "os_hvp": os_hvp,
5202       "beparams": cluster.beparams,
5203       "osparams": cluster.osparams,
5204       "nicparams": cluster.nicparams,
5205       "ndparams": cluster.ndparams,
5206       "candidate_pool_size": cluster.candidate_pool_size,
5207       "master_netdev": cluster.master_netdev,
5208       "volume_group_name": cluster.volume_group_name,
5209       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5210       "file_storage_dir": cluster.file_storage_dir,
5211       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5212       "maintain_node_health": cluster.maintain_node_health,
5213       "ctime": cluster.ctime,
5214       "mtime": cluster.mtime,
5215       "uuid": cluster.uuid,
5216       "tags": list(cluster.GetTags()),
5217       "uid_pool": cluster.uid_pool,
5218       "default_iallocator": cluster.default_iallocator,
5219       "reserved_lvs": cluster.reserved_lvs,
5220       "primary_ip_version": primary_ip_version,
5221       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5222       "hidden_os": cluster.hidden_os,
5223       "blacklisted_os": cluster.blacklisted_os,
5224       }
5225
5226     return result
5227
5228
5229 class LUClusterConfigQuery(NoHooksLU):
5230   """Return configuration values.
5231
5232   """
5233   REQ_BGL = False
5234   _FIELDS_DYNAMIC = utils.FieldSet()
5235   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5236                                   "watcher_pause", "volume_group_name")
5237
5238   def CheckArguments(self):
5239     _CheckOutputFields(static=self._FIELDS_STATIC,
5240                        dynamic=self._FIELDS_DYNAMIC,
5241                        selected=self.op.output_fields)
5242
5243   def ExpandNames(self):
5244     self.needed_locks = {}
5245
5246   def Exec(self, feedback_fn):
5247     """Dump a representation of the cluster config to the standard output.
5248
5249     """
5250     values = []
5251     for field in self.op.output_fields:
5252       if field == "cluster_name":
5253         entry = self.cfg.GetClusterName()
5254       elif field == "master_node":
5255         entry = self.cfg.GetMasterNode()
5256       elif field == "drain_flag":
5257         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5258       elif field == "watcher_pause":
5259         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5260       elif field == "volume_group_name":
5261         entry = self.cfg.GetVGName()
5262       else:
5263         raise errors.ParameterError(field)
5264       values.append(entry)
5265     return values
5266
5267
5268 class LUInstanceActivateDisks(NoHooksLU):
5269   """Bring up an instance's disks.
5270
5271   """
5272   REQ_BGL = False
5273
5274   def ExpandNames(self):
5275     self._ExpandAndLockInstance()
5276     self.needed_locks[locking.LEVEL_NODE] = []
5277     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5278
5279   def DeclareLocks(self, level):
5280     if level == locking.LEVEL_NODE:
5281       self._LockInstancesNodes()
5282
5283   def CheckPrereq(self):
5284     """Check prerequisites.
5285
5286     This checks that the instance is in the cluster.
5287
5288     """
5289     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5290     assert self.instance is not None, \
5291       "Cannot retrieve locked instance %s" % self.op.instance_name
5292     _CheckNodeOnline(self, self.instance.primary_node)
5293
5294   def Exec(self, feedback_fn):
5295     """Activate the disks.
5296
5297     """
5298     disks_ok, disks_info = \
5299               _AssembleInstanceDisks(self, self.instance,
5300                                      ignore_size=self.op.ignore_size)
5301     if not disks_ok:
5302       raise errors.OpExecError("Cannot activate block devices")
5303
5304     return disks_info
5305
5306
5307 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5308                            ignore_size=False):
5309   """Prepare the block devices for an instance.
5310
5311   This sets up the block devices on all nodes.
5312
5313   @type lu: L{LogicalUnit}
5314   @param lu: the logical unit on whose behalf we execute
5315   @type instance: L{objects.Instance}
5316   @param instance: the instance for whose disks we assemble
5317   @type disks: list of L{objects.Disk} or None
5318   @param disks: which disks to assemble (or all, if None)
5319   @type ignore_secondaries: boolean
5320   @param ignore_secondaries: if true, errors on secondary nodes
5321       won't result in an error return from the function
5322   @type ignore_size: boolean
5323   @param ignore_size: if true, the current known size of the disk
5324       will not be used during the disk activation, useful for cases
5325       when the size is wrong
5326   @return: False if the operation failed, otherwise a list of
5327       (host, instance_visible_name, node_visible_name)
5328       with the mapping from node devices to instance devices
5329
5330   """
5331   device_info = []
5332   disks_ok = True
5333   iname = instance.name
5334   disks = _ExpandCheckDisks(instance, disks)
5335
5336   # With the two passes mechanism we try to reduce the window of
5337   # opportunity for the race condition of switching DRBD to primary
5338   # before handshaking occured, but we do not eliminate it
5339
5340   # The proper fix would be to wait (with some limits) until the
5341   # connection has been made and drbd transitions from WFConnection
5342   # into any other network-connected state (Connected, SyncTarget,
5343   # SyncSource, etc.)
5344
5345   # 1st pass, assemble on all nodes in secondary mode
5346   for idx, inst_disk in enumerate(disks):
5347     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5348       if ignore_size:
5349         node_disk = node_disk.Copy()
5350         node_disk.UnsetSize()
5351       lu.cfg.SetDiskID(node_disk, node)
5352       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5353       msg = result.fail_msg
5354       if msg:
5355         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5356                            " (is_primary=False, pass=1): %s",
5357                            inst_disk.iv_name, node, msg)
5358         if not ignore_secondaries:
5359           disks_ok = False
5360
5361   # FIXME: race condition on drbd migration to primary
5362
5363   # 2nd pass, do only the primary node
5364   for idx, inst_disk in enumerate(disks):
5365     dev_path = None
5366
5367     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5368       if node != instance.primary_node:
5369         continue
5370       if ignore_size:
5371         node_disk = node_disk.Copy()
5372         node_disk.UnsetSize()
5373       lu.cfg.SetDiskID(node_disk, node)
5374       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5375       msg = result.fail_msg
5376       if msg:
5377         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5378                            " (is_primary=True, pass=2): %s",
5379                            inst_disk.iv_name, node, msg)
5380         disks_ok = False
5381       else:
5382         dev_path = result.payload
5383
5384     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5385
5386   # leave the disks configured for the primary node
5387   # this is a workaround that would be fixed better by
5388   # improving the logical/physical id handling
5389   for disk in disks:
5390     lu.cfg.SetDiskID(disk, instance.primary_node)
5391
5392   return disks_ok, device_info
5393
5394
5395 def _StartInstanceDisks(lu, instance, force):
5396   """Start the disks of an instance.
5397
5398   """
5399   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5400                                            ignore_secondaries=force)
5401   if not disks_ok:
5402     _ShutdownInstanceDisks(lu, instance)
5403     if force is not None and not force:
5404       lu.proc.LogWarning("", hint="If the message above refers to a"
5405                          " secondary node,"
5406                          " you can retry the operation using '--force'.")
5407     raise errors.OpExecError("Disk consistency error")
5408
5409
5410 class LUInstanceDeactivateDisks(NoHooksLU):
5411   """Shutdown an instance's disks.
5412
5413   """
5414   REQ_BGL = False
5415
5416   def ExpandNames(self):
5417     self._ExpandAndLockInstance()
5418     self.needed_locks[locking.LEVEL_NODE] = []
5419     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5420
5421   def DeclareLocks(self, level):
5422     if level == locking.LEVEL_NODE:
5423       self._LockInstancesNodes()
5424
5425   def CheckPrereq(self):
5426     """Check prerequisites.
5427
5428     This checks that the instance is in the cluster.
5429
5430     """
5431     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5432     assert self.instance is not None, \
5433       "Cannot retrieve locked instance %s" % self.op.instance_name
5434
5435   def Exec(self, feedback_fn):
5436     """Deactivate the disks
5437
5438     """
5439     instance = self.instance
5440     if self.op.force:
5441       _ShutdownInstanceDisks(self, instance)
5442     else:
5443       _SafeShutdownInstanceDisks(self, instance)
5444
5445
5446 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5447   """Shutdown block devices of an instance.
5448
5449   This function checks if an instance is running, before calling
5450   _ShutdownInstanceDisks.
5451
5452   """
5453   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5454   _ShutdownInstanceDisks(lu, instance, disks=disks)
5455
5456
5457 def _ExpandCheckDisks(instance, disks):
5458   """Return the instance disks selected by the disks list
5459
5460   @type disks: list of L{objects.Disk} or None
5461   @param disks: selected disks
5462   @rtype: list of L{objects.Disk}
5463   @return: selected instance disks to act on
5464
5465   """
5466   if disks is None:
5467     return instance.disks
5468   else:
5469     if not set(disks).issubset(instance.disks):
5470       raise errors.ProgrammerError("Can only act on disks belonging to the"
5471                                    " target instance")
5472     return disks
5473
5474
5475 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5476   """Shutdown block devices of an instance.
5477
5478   This does the shutdown on all nodes of the instance.
5479
5480   If the ignore_primary is false, errors on the primary node are
5481   ignored.
5482
5483   """
5484   all_result = True
5485   disks = _ExpandCheckDisks(instance, disks)
5486
5487   for disk in disks:
5488     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5489       lu.cfg.SetDiskID(top_disk, node)
5490       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5491       msg = result.fail_msg
5492       if msg:
5493         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5494                       disk.iv_name, node, msg)
5495         if ((node == instance.primary_node and not ignore_primary) or
5496             (node != instance.primary_node and not result.offline)):
5497           all_result = False
5498   return all_result
5499
5500
5501 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5502   """Checks if a node has enough free memory.
5503
5504   This function check if a given node has the needed amount of free
5505   memory. In case the node has less memory or we cannot get the
5506   information from the node, this function raise an OpPrereqError
5507   exception.
5508
5509   @type lu: C{LogicalUnit}
5510   @param lu: a logical unit from which we get configuration data
5511   @type node: C{str}
5512   @param node: the node to check
5513   @type reason: C{str}
5514   @param reason: string to use in the error message
5515   @type requested: C{int}
5516   @param requested: the amount of memory in MiB to check for
5517   @type hypervisor_name: C{str}
5518   @param hypervisor_name: the hypervisor to ask for memory stats
5519   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5520       we cannot check the node
5521
5522   """
5523   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5524   nodeinfo[node].Raise("Can't get data from node %s" % node,
5525                        prereq=True, ecode=errors.ECODE_ENVIRON)
5526   free_mem = nodeinfo[node].payload.get('memory_free', None)
5527   if not isinstance(free_mem, int):
5528     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5529                                " was '%s'" % (node, free_mem),
5530                                errors.ECODE_ENVIRON)
5531   if requested > free_mem:
5532     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5533                                " needed %s MiB, available %s MiB" %
5534                                (node, reason, requested, free_mem),
5535                                errors.ECODE_NORES)
5536
5537
5538 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5539   """Checks if nodes have enough free disk space in the all VGs.
5540
5541   This function check if all given nodes have the needed amount of
5542   free disk. In case any node has less disk or we cannot get the
5543   information from the node, this function raise an OpPrereqError
5544   exception.
5545
5546   @type lu: C{LogicalUnit}
5547   @param lu: a logical unit from which we get configuration data
5548   @type nodenames: C{list}
5549   @param nodenames: the list of node names to check
5550   @type req_sizes: C{dict}
5551   @param req_sizes: the hash of vg and corresponding amount of disk in
5552       MiB to check for
5553   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5554       or we cannot check the node
5555
5556   """
5557   for vg, req_size in req_sizes.items():
5558     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5559
5560
5561 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5562   """Checks if nodes have enough free disk space in the specified VG.
5563
5564   This function check if all given nodes have the needed amount of
5565   free disk. In case any node has less disk or we cannot get the
5566   information from the node, this function raise an OpPrereqError
5567   exception.
5568
5569   @type lu: C{LogicalUnit}
5570   @param lu: a logical unit from which we get configuration data
5571   @type nodenames: C{list}
5572   @param nodenames: the list of node names to check
5573   @type vg: C{str}
5574   @param vg: the volume group to check
5575   @type requested: C{int}
5576   @param requested: the amount of disk in MiB to check for
5577   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5578       or we cannot check the node
5579
5580   """
5581   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5582   for node in nodenames:
5583     info = nodeinfo[node]
5584     info.Raise("Cannot get current information from node %s" % node,
5585                prereq=True, ecode=errors.ECODE_ENVIRON)
5586     vg_free = info.payload.get("vg_free", None)
5587     if not isinstance(vg_free, int):
5588       raise errors.OpPrereqError("Can't compute free disk space on node"
5589                                  " %s for vg %s, result was '%s'" %
5590                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5591     if requested > vg_free:
5592       raise errors.OpPrereqError("Not enough disk space on target node %s"
5593                                  " vg %s: required %d MiB, available %d MiB" %
5594                                  (node, vg, requested, vg_free),
5595                                  errors.ECODE_NORES)
5596
5597
5598 class LUInstanceStartup(LogicalUnit):
5599   """Starts an instance.
5600
5601   """
5602   HPATH = "instance-start"
5603   HTYPE = constants.HTYPE_INSTANCE
5604   REQ_BGL = False
5605
5606   def CheckArguments(self):
5607     # extra beparams
5608     if self.op.beparams:
5609       # fill the beparams dict
5610       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5611
5612   def ExpandNames(self):
5613     self._ExpandAndLockInstance()
5614
5615   def BuildHooksEnv(self):
5616     """Build hooks env.
5617
5618     This runs on master, primary and secondary nodes of the instance.
5619
5620     """
5621     env = {
5622       "FORCE": self.op.force,
5623       }
5624
5625     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5626
5627     return env
5628
5629   def BuildHooksNodes(self):
5630     """Build hooks nodes.
5631
5632     """
5633     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5634     return (nl, nl)
5635
5636   def CheckPrereq(self):
5637     """Check prerequisites.
5638
5639     This checks that the instance is in the cluster.
5640
5641     """
5642     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5643     assert self.instance is not None, \
5644       "Cannot retrieve locked instance %s" % self.op.instance_name
5645
5646     # extra hvparams
5647     if self.op.hvparams:
5648       # check hypervisor parameter syntax (locally)
5649       cluster = self.cfg.GetClusterInfo()
5650       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5651       filled_hvp = cluster.FillHV(instance)
5652       filled_hvp.update(self.op.hvparams)
5653       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5654       hv_type.CheckParameterSyntax(filled_hvp)
5655       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5656
5657     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5658
5659     if self.primary_offline and self.op.ignore_offline_nodes:
5660       self.proc.LogWarning("Ignoring offline primary node")
5661
5662       if self.op.hvparams or self.op.beparams:
5663         self.proc.LogWarning("Overridden parameters are ignored")
5664     else:
5665       _CheckNodeOnline(self, instance.primary_node)
5666
5667       bep = self.cfg.GetClusterInfo().FillBE(instance)
5668
5669       # check bridges existence
5670       _CheckInstanceBridgesExist(self, instance)
5671
5672       remote_info = self.rpc.call_instance_info(instance.primary_node,
5673                                                 instance.name,
5674                                                 instance.hypervisor)
5675       remote_info.Raise("Error checking node %s" % instance.primary_node,
5676                         prereq=True, ecode=errors.ECODE_ENVIRON)
5677       if not remote_info.payload: # not running already
5678         _CheckNodeFreeMemory(self, instance.primary_node,
5679                              "starting instance %s" % instance.name,
5680                              bep[constants.BE_MEMORY], instance.hypervisor)
5681
5682   def Exec(self, feedback_fn):
5683     """Start the instance.
5684
5685     """
5686     instance = self.instance
5687     force = self.op.force
5688
5689     if not self.op.no_remember:
5690       self.cfg.MarkInstanceUp(instance.name)
5691
5692     if self.primary_offline:
5693       assert self.op.ignore_offline_nodes
5694       self.proc.LogInfo("Primary node offline, marked instance as started")
5695     else:
5696       node_current = instance.primary_node
5697
5698       _StartInstanceDisks(self, instance, force)
5699
5700       result = self.rpc.call_instance_start(node_current, instance,
5701                                             self.op.hvparams, self.op.beparams)
5702       msg = result.fail_msg
5703       if msg:
5704         _ShutdownInstanceDisks(self, instance)
5705         raise errors.OpExecError("Could not start instance: %s" % msg)
5706
5707
5708 class LUInstanceReboot(LogicalUnit):
5709   """Reboot an instance.
5710
5711   """
5712   HPATH = "instance-reboot"
5713   HTYPE = constants.HTYPE_INSTANCE
5714   REQ_BGL = False
5715
5716   def ExpandNames(self):
5717     self._ExpandAndLockInstance()
5718
5719   def BuildHooksEnv(self):
5720     """Build hooks env.
5721
5722     This runs on master, primary and secondary nodes of the instance.
5723
5724     """
5725     env = {
5726       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5727       "REBOOT_TYPE": self.op.reboot_type,
5728       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5729       }
5730
5731     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5732
5733     return env
5734
5735   def BuildHooksNodes(self):
5736     """Build hooks nodes.
5737
5738     """
5739     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5740     return (nl, nl)
5741
5742   def CheckPrereq(self):
5743     """Check prerequisites.
5744
5745     This checks that the instance is in the cluster.
5746
5747     """
5748     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5749     assert self.instance is not None, \
5750       "Cannot retrieve locked instance %s" % self.op.instance_name
5751
5752     _CheckNodeOnline(self, instance.primary_node)
5753
5754     # check bridges existence
5755     _CheckInstanceBridgesExist(self, instance)
5756
5757   def Exec(self, feedback_fn):
5758     """Reboot the instance.
5759
5760     """
5761     instance = self.instance
5762     ignore_secondaries = self.op.ignore_secondaries
5763     reboot_type = self.op.reboot_type
5764
5765     remote_info = self.rpc.call_instance_info(instance.primary_node,
5766                                               instance.name,
5767                                               instance.hypervisor)
5768     remote_info.Raise("Error checking node %s" % instance.primary_node)
5769     instance_running = bool(remote_info.payload)
5770
5771     node_current = instance.primary_node
5772
5773     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5774                                             constants.INSTANCE_REBOOT_HARD]:
5775       for disk in instance.disks:
5776         self.cfg.SetDiskID(disk, node_current)
5777       result = self.rpc.call_instance_reboot(node_current, instance,
5778                                              reboot_type,
5779                                              self.op.shutdown_timeout)
5780       result.Raise("Could not reboot instance")
5781     else:
5782       if instance_running:
5783         result = self.rpc.call_instance_shutdown(node_current, instance,
5784                                                  self.op.shutdown_timeout)
5785         result.Raise("Could not shutdown instance for full reboot")
5786         _ShutdownInstanceDisks(self, instance)
5787       else:
5788         self.LogInfo("Instance %s was already stopped, starting now",
5789                      instance.name)
5790       _StartInstanceDisks(self, instance, ignore_secondaries)
5791       result = self.rpc.call_instance_start(node_current, instance, None, None)
5792       msg = result.fail_msg
5793       if msg:
5794         _ShutdownInstanceDisks(self, instance)
5795         raise errors.OpExecError("Could not start instance for"
5796                                  " full reboot: %s" % msg)
5797
5798     self.cfg.MarkInstanceUp(instance.name)
5799
5800
5801 class LUInstanceShutdown(LogicalUnit):
5802   """Shutdown an instance.
5803
5804   """
5805   HPATH = "instance-stop"
5806   HTYPE = constants.HTYPE_INSTANCE
5807   REQ_BGL = False
5808
5809   def ExpandNames(self):
5810     self._ExpandAndLockInstance()
5811
5812   def BuildHooksEnv(self):
5813     """Build hooks env.
5814
5815     This runs on master, primary and secondary nodes of the instance.
5816
5817     """
5818     env = _BuildInstanceHookEnvByObject(self, self.instance)
5819     env["TIMEOUT"] = self.op.timeout
5820     return env
5821
5822   def BuildHooksNodes(self):
5823     """Build hooks nodes.
5824
5825     """
5826     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5827     return (nl, nl)
5828
5829   def CheckPrereq(self):
5830     """Check prerequisites.
5831
5832     This checks that the instance is in the cluster.
5833
5834     """
5835     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5836     assert self.instance is not None, \
5837       "Cannot retrieve locked instance %s" % self.op.instance_name
5838
5839     self.primary_offline = \
5840       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5841
5842     if self.primary_offline and self.op.ignore_offline_nodes:
5843       self.proc.LogWarning("Ignoring offline primary node")
5844     else:
5845       _CheckNodeOnline(self, self.instance.primary_node)
5846
5847   def Exec(self, feedback_fn):
5848     """Shutdown the instance.
5849
5850     """
5851     instance = self.instance
5852     node_current = instance.primary_node
5853     timeout = self.op.timeout
5854
5855     if not self.op.no_remember:
5856       self.cfg.MarkInstanceDown(instance.name)
5857
5858     if self.primary_offline:
5859       assert self.op.ignore_offline_nodes
5860       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5861     else:
5862       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5863       msg = result.fail_msg
5864       if msg:
5865         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5866
5867       _ShutdownInstanceDisks(self, instance)
5868
5869
5870 class LUInstanceReinstall(LogicalUnit):
5871   """Reinstall an instance.
5872
5873   """
5874   HPATH = "instance-reinstall"
5875   HTYPE = constants.HTYPE_INSTANCE
5876   REQ_BGL = False
5877
5878   def ExpandNames(self):
5879     self._ExpandAndLockInstance()
5880
5881   def BuildHooksEnv(self):
5882     """Build hooks env.
5883
5884     This runs on master, primary and secondary nodes of the instance.
5885
5886     """
5887     return _BuildInstanceHookEnvByObject(self, self.instance)
5888
5889   def BuildHooksNodes(self):
5890     """Build hooks nodes.
5891
5892     """
5893     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5894     return (nl, nl)
5895
5896   def CheckPrereq(self):
5897     """Check prerequisites.
5898
5899     This checks that the instance is in the cluster and is not running.
5900
5901     """
5902     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5903     assert instance is not None, \
5904       "Cannot retrieve locked instance %s" % self.op.instance_name
5905     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5906                      " offline, cannot reinstall")
5907     for node in instance.secondary_nodes:
5908       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5909                        " cannot reinstall")
5910
5911     if instance.disk_template == constants.DT_DISKLESS:
5912       raise errors.OpPrereqError("Instance '%s' has no disks" %
5913                                  self.op.instance_name,
5914                                  errors.ECODE_INVAL)
5915     _CheckInstanceDown(self, instance, "cannot reinstall")
5916
5917     if self.op.os_type is not None:
5918       # OS verification
5919       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5920       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5921       instance_os = self.op.os_type
5922     else:
5923       instance_os = instance.os
5924
5925     nodelist = list(instance.all_nodes)
5926
5927     if self.op.osparams:
5928       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5929       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5930       self.os_inst = i_osdict # the new dict (without defaults)
5931     else:
5932       self.os_inst = None
5933
5934     self.instance = instance
5935
5936   def Exec(self, feedback_fn):
5937     """Reinstall the instance.
5938
5939     """
5940     inst = self.instance
5941
5942     if self.op.os_type is not None:
5943       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5944       inst.os = self.op.os_type
5945       # Write to configuration
5946       self.cfg.Update(inst, feedback_fn)
5947
5948     _StartInstanceDisks(self, inst, None)
5949     try:
5950       feedback_fn("Running the instance OS create scripts...")
5951       # FIXME: pass debug option from opcode to backend
5952       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5953                                              self.op.debug_level,
5954                                              osparams=self.os_inst)
5955       result.Raise("Could not install OS for instance %s on node %s" %
5956                    (inst.name, inst.primary_node))
5957     finally:
5958       _ShutdownInstanceDisks(self, inst)
5959
5960
5961 class LUInstanceRecreateDisks(LogicalUnit):
5962   """Recreate an instance's missing disks.
5963
5964   """
5965   HPATH = "instance-recreate-disks"
5966   HTYPE = constants.HTYPE_INSTANCE
5967   REQ_BGL = False
5968
5969   def CheckArguments(self):
5970     # normalise the disk list
5971     self.op.disks = sorted(frozenset(self.op.disks))
5972
5973   def ExpandNames(self):
5974     self._ExpandAndLockInstance()
5975     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5976     if self.op.nodes:
5977       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5978       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5979     else:
5980       self.needed_locks[locking.LEVEL_NODE] = []
5981
5982   def DeclareLocks(self, level):
5983     if level == locking.LEVEL_NODE:
5984       # if we replace the nodes, we only need to lock the old primary,
5985       # otherwise we need to lock all nodes for disk re-creation
5986       primary_only = bool(self.op.nodes)
5987       self._LockInstancesNodes(primary_only=primary_only)
5988
5989   def BuildHooksEnv(self):
5990     """Build hooks env.
5991
5992     This runs on master, primary and secondary nodes of the instance.
5993
5994     """
5995     return _BuildInstanceHookEnvByObject(self, self.instance)
5996
5997   def BuildHooksNodes(self):
5998     """Build hooks nodes.
5999
6000     """
6001     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6002     return (nl, nl)
6003
6004   def CheckPrereq(self):
6005     """Check prerequisites.
6006
6007     This checks that the instance is in the cluster and is not running.
6008
6009     """
6010     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6011     assert instance is not None, \
6012       "Cannot retrieve locked instance %s" % self.op.instance_name
6013     if self.op.nodes:
6014       if len(self.op.nodes) != len(instance.all_nodes):
6015         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6016                                    " %d replacement nodes were specified" %
6017                                    (instance.name, len(instance.all_nodes),
6018                                     len(self.op.nodes)),
6019                                    errors.ECODE_INVAL)
6020       assert instance.disk_template != constants.DT_DRBD8 or \
6021           len(self.op.nodes) == 2
6022       assert instance.disk_template != constants.DT_PLAIN or \
6023           len(self.op.nodes) == 1
6024       primary_node = self.op.nodes[0]
6025     else:
6026       primary_node = instance.primary_node
6027     _CheckNodeOnline(self, primary_node)
6028
6029     if instance.disk_template == constants.DT_DISKLESS:
6030       raise errors.OpPrereqError("Instance '%s' has no disks" %
6031                                  self.op.instance_name, errors.ECODE_INVAL)
6032     # if we replace nodes *and* the old primary is offline, we don't
6033     # check
6034     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6035     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6036     if not (self.op.nodes and old_pnode.offline):
6037       _CheckInstanceDown(self, instance, "cannot recreate disks")
6038
6039     if not self.op.disks:
6040       self.op.disks = range(len(instance.disks))
6041     else:
6042       for idx in self.op.disks:
6043         if idx >= len(instance.disks):
6044           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6045                                      errors.ECODE_INVAL)
6046     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6047       raise errors.OpPrereqError("Can't recreate disks partially and"
6048                                  " change the nodes at the same time",
6049                                  errors.ECODE_INVAL)
6050     self.instance = instance
6051
6052   def Exec(self, feedback_fn):
6053     """Recreate the disks.
6054
6055     """
6056     # change primary node, if needed
6057     if self.op.nodes:
6058       self.instance.primary_node = self.op.nodes[0]
6059       self.LogWarning("Changing the instance's nodes, you will have to"
6060                       " remove any disks left on the older nodes manually")
6061
6062     to_skip = []
6063     for idx, disk in enumerate(self.instance.disks):
6064       if idx not in self.op.disks: # disk idx has not been passed in
6065         to_skip.append(idx)
6066         continue
6067       # update secondaries for disks, if needed
6068       if self.op.nodes:
6069         if disk.dev_type == constants.LD_DRBD8:
6070           # need to update the nodes
6071           assert len(self.op.nodes) == 2
6072           logical_id = list(disk.logical_id)
6073           logical_id[0] = self.op.nodes[0]
6074           logical_id[1] = self.op.nodes[1]
6075           disk.logical_id = tuple(logical_id)
6076
6077     if self.op.nodes:
6078       self.cfg.Update(self.instance, feedback_fn)
6079
6080     _CreateDisks(self, self.instance, to_skip=to_skip)
6081
6082
6083 class LUInstanceRename(LogicalUnit):
6084   """Rename an instance.
6085
6086   """
6087   HPATH = "instance-rename"
6088   HTYPE = constants.HTYPE_INSTANCE
6089
6090   def CheckArguments(self):
6091     """Check arguments.
6092
6093     """
6094     if self.op.ip_check and not self.op.name_check:
6095       # TODO: make the ip check more flexible and not depend on the name check
6096       raise errors.OpPrereqError("IP address check requires a name check",
6097                                  errors.ECODE_INVAL)
6098
6099   def BuildHooksEnv(self):
6100     """Build hooks env.
6101
6102     This runs on master, primary and secondary nodes of the instance.
6103
6104     """
6105     env = _BuildInstanceHookEnvByObject(self, self.instance)
6106     env["INSTANCE_NEW_NAME"] = self.op.new_name
6107     return env
6108
6109   def BuildHooksNodes(self):
6110     """Build hooks nodes.
6111
6112     """
6113     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6114     return (nl, nl)
6115
6116   def CheckPrereq(self):
6117     """Check prerequisites.
6118
6119     This checks that the instance is in the cluster and is not running.
6120
6121     """
6122     self.op.instance_name = _ExpandInstanceName(self.cfg,
6123                                                 self.op.instance_name)
6124     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6125     assert instance is not None
6126     _CheckNodeOnline(self, instance.primary_node)
6127     _CheckInstanceDown(self, instance, "cannot rename")
6128     self.instance = instance
6129
6130     new_name = self.op.new_name
6131     if self.op.name_check:
6132       hostname = netutils.GetHostname(name=new_name)
6133       if hostname != new_name:
6134         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6135                      hostname.name)
6136       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6137         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6138                                     " same as given hostname '%s'") %
6139                                     (hostname.name, self.op.new_name),
6140                                     errors.ECODE_INVAL)
6141       new_name = self.op.new_name = hostname.name
6142       if (self.op.ip_check and
6143           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6144         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6145                                    (hostname.ip, new_name),
6146                                    errors.ECODE_NOTUNIQUE)
6147
6148     instance_list = self.cfg.GetInstanceList()
6149     if new_name in instance_list and new_name != instance.name:
6150       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6151                                  new_name, errors.ECODE_EXISTS)
6152
6153   def Exec(self, feedback_fn):
6154     """Rename the instance.
6155
6156     """
6157     inst = self.instance
6158     old_name = inst.name
6159
6160     rename_file_storage = False
6161     if (inst.disk_template in constants.DTS_FILEBASED and
6162         self.op.new_name != inst.name):
6163       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6164       rename_file_storage = True
6165
6166     self.cfg.RenameInstance(inst.name, self.op.new_name)
6167     # Change the instance lock. This is definitely safe while we hold the BGL.
6168     # Otherwise the new lock would have to be added in acquired mode.
6169     assert self.REQ_BGL
6170     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6171     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6172
6173     # re-read the instance from the configuration after rename
6174     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6175
6176     if rename_file_storage:
6177       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6178       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6179                                                      old_file_storage_dir,
6180                                                      new_file_storage_dir)
6181       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6182                    " (but the instance has been renamed in Ganeti)" %
6183                    (inst.primary_node, old_file_storage_dir,
6184                     new_file_storage_dir))
6185
6186     _StartInstanceDisks(self, inst, None)
6187     try:
6188       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6189                                                  old_name, self.op.debug_level)
6190       msg = result.fail_msg
6191       if msg:
6192         msg = ("Could not run OS rename script for instance %s on node %s"
6193                " (but the instance has been renamed in Ganeti): %s" %
6194                (inst.name, inst.primary_node, msg))
6195         self.proc.LogWarning(msg)
6196     finally:
6197       _ShutdownInstanceDisks(self, inst)
6198
6199     return inst.name
6200
6201
6202 class LUInstanceRemove(LogicalUnit):
6203   """Remove an instance.
6204
6205   """
6206   HPATH = "instance-remove"
6207   HTYPE = constants.HTYPE_INSTANCE
6208   REQ_BGL = False
6209
6210   def ExpandNames(self):
6211     self._ExpandAndLockInstance()
6212     self.needed_locks[locking.LEVEL_NODE] = []
6213     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6214
6215   def DeclareLocks(self, level):
6216     if level == locking.LEVEL_NODE:
6217       self._LockInstancesNodes()
6218
6219   def BuildHooksEnv(self):
6220     """Build hooks env.
6221
6222     This runs on master, primary and secondary nodes of the instance.
6223
6224     """
6225     env = _BuildInstanceHookEnvByObject(self, self.instance)
6226     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6227     return env
6228
6229   def BuildHooksNodes(self):
6230     """Build hooks nodes.
6231
6232     """
6233     nl = [self.cfg.GetMasterNode()]
6234     nl_post = list(self.instance.all_nodes) + nl
6235     return (nl, nl_post)
6236
6237   def CheckPrereq(self):
6238     """Check prerequisites.
6239
6240     This checks that the instance is in the cluster.
6241
6242     """
6243     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6244     assert self.instance is not None, \
6245       "Cannot retrieve locked instance %s" % self.op.instance_name
6246
6247   def Exec(self, feedback_fn):
6248     """Remove the instance.
6249
6250     """
6251     instance = self.instance
6252     logging.info("Shutting down instance %s on node %s",
6253                  instance.name, instance.primary_node)
6254
6255     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6256                                              self.op.shutdown_timeout)
6257     msg = result.fail_msg
6258     if msg:
6259       if self.op.ignore_failures:
6260         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6261       else:
6262         raise errors.OpExecError("Could not shutdown instance %s on"
6263                                  " node %s: %s" %
6264                                  (instance.name, instance.primary_node, msg))
6265
6266     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6267
6268
6269 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6270   """Utility function to remove an instance.
6271
6272   """
6273   logging.info("Removing block devices for instance %s", instance.name)
6274
6275   if not _RemoveDisks(lu, instance):
6276     if not ignore_failures:
6277       raise errors.OpExecError("Can't remove instance's disks")
6278     feedback_fn("Warning: can't remove instance's disks")
6279
6280   logging.info("Removing instance %s out of cluster config", instance.name)
6281
6282   lu.cfg.RemoveInstance(instance.name)
6283
6284   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6285     "Instance lock removal conflict"
6286
6287   # Remove lock for the instance
6288   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6289
6290
6291 class LUInstanceQuery(NoHooksLU):
6292   """Logical unit for querying instances.
6293
6294   """
6295   # pylint: disable-msg=W0142
6296   REQ_BGL = False
6297
6298   def CheckArguments(self):
6299     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6300                              self.op.output_fields, self.op.use_locking)
6301
6302   def ExpandNames(self):
6303     self.iq.ExpandNames(self)
6304
6305   def DeclareLocks(self, level):
6306     self.iq.DeclareLocks(self, level)
6307
6308   def Exec(self, feedback_fn):
6309     return self.iq.OldStyleQuery(self)
6310
6311
6312 class LUInstanceFailover(LogicalUnit):
6313   """Failover an instance.
6314
6315   """
6316   HPATH = "instance-failover"
6317   HTYPE = constants.HTYPE_INSTANCE
6318   REQ_BGL = False
6319
6320   def CheckArguments(self):
6321     """Check the arguments.
6322
6323     """
6324     self.iallocator = getattr(self.op, "iallocator", None)
6325     self.target_node = getattr(self.op, "target_node", None)
6326
6327   def ExpandNames(self):
6328     self._ExpandAndLockInstance()
6329
6330     if self.op.target_node is not None:
6331       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6332
6333     self.needed_locks[locking.LEVEL_NODE] = []
6334     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6335
6336     ignore_consistency = self.op.ignore_consistency
6337     shutdown_timeout = self.op.shutdown_timeout
6338     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6339                                        cleanup=False,
6340                                        failover=True,
6341                                        ignore_consistency=ignore_consistency,
6342                                        shutdown_timeout=shutdown_timeout)
6343     self.tasklets = [self._migrater]
6344
6345   def DeclareLocks(self, level):
6346     if level == locking.LEVEL_NODE:
6347       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6348       if instance.disk_template in constants.DTS_EXT_MIRROR:
6349         if self.op.target_node is None:
6350           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6351         else:
6352           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6353                                                    self.op.target_node]
6354         del self.recalculate_locks[locking.LEVEL_NODE]
6355       else:
6356         self._LockInstancesNodes()
6357
6358   def BuildHooksEnv(self):
6359     """Build hooks env.
6360
6361     This runs on master, primary and secondary nodes of the instance.
6362
6363     """
6364     instance = self._migrater.instance
6365     source_node = instance.primary_node
6366     target_node = self.op.target_node
6367     env = {
6368       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6369       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6370       "OLD_PRIMARY": source_node,
6371       "NEW_PRIMARY": target_node,
6372       }
6373
6374     if instance.disk_template in constants.DTS_INT_MIRROR:
6375       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6376       env["NEW_SECONDARY"] = source_node
6377     else:
6378       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6379
6380     env.update(_BuildInstanceHookEnvByObject(self, instance))
6381
6382     return env
6383
6384   def BuildHooksNodes(self):
6385     """Build hooks nodes.
6386
6387     """
6388     instance = self._migrater.instance
6389     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6390     return (nl, nl + [instance.primary_node])
6391
6392
6393 class LUInstanceMigrate(LogicalUnit):
6394   """Migrate an instance.
6395
6396   This is migration without shutting down, compared to the failover,
6397   which is done with shutdown.
6398
6399   """
6400   HPATH = "instance-migrate"
6401   HTYPE = constants.HTYPE_INSTANCE
6402   REQ_BGL = False
6403
6404   def ExpandNames(self):
6405     self._ExpandAndLockInstance()
6406
6407     if self.op.target_node is not None:
6408       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6409
6410     self.needed_locks[locking.LEVEL_NODE] = []
6411     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6412
6413     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6414                                        cleanup=self.op.cleanup,
6415                                        failover=False,
6416                                        fallback=self.op.allow_failover)
6417     self.tasklets = [self._migrater]
6418
6419   def DeclareLocks(self, level):
6420     if level == locking.LEVEL_NODE:
6421       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6422       if instance.disk_template in constants.DTS_EXT_MIRROR:
6423         if self.op.target_node is None:
6424           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6425         else:
6426           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6427                                                    self.op.target_node]
6428         del self.recalculate_locks[locking.LEVEL_NODE]
6429       else:
6430         self._LockInstancesNodes()
6431
6432   def BuildHooksEnv(self):
6433     """Build hooks env.
6434
6435     This runs on master, primary and secondary nodes of the instance.
6436
6437     """
6438     instance = self._migrater.instance
6439     source_node = instance.primary_node
6440     target_node = self.op.target_node
6441     env = _BuildInstanceHookEnvByObject(self, instance)
6442     env.update({
6443       "MIGRATE_LIVE": self._migrater.live,
6444       "MIGRATE_CLEANUP": self.op.cleanup,
6445       "OLD_PRIMARY": source_node,
6446       "NEW_PRIMARY": target_node,
6447       })
6448
6449     if instance.disk_template in constants.DTS_INT_MIRROR:
6450       env["OLD_SECONDARY"] = target_node
6451       env["NEW_SECONDARY"] = source_node
6452     else:
6453       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6454
6455     return env
6456
6457   def BuildHooksNodes(self):
6458     """Build hooks nodes.
6459
6460     """
6461     instance = self._migrater.instance
6462     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6463     return (nl, nl + [instance.primary_node])
6464
6465
6466 class LUInstanceMove(LogicalUnit):
6467   """Move an instance by data-copying.
6468
6469   """
6470   HPATH = "instance-move"
6471   HTYPE = constants.HTYPE_INSTANCE
6472   REQ_BGL = False
6473
6474   def ExpandNames(self):
6475     self._ExpandAndLockInstance()
6476     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6477     self.op.target_node = target_node
6478     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6479     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6480
6481   def DeclareLocks(self, level):
6482     if level == locking.LEVEL_NODE:
6483       self._LockInstancesNodes(primary_only=True)
6484
6485   def BuildHooksEnv(self):
6486     """Build hooks env.
6487
6488     This runs on master, primary and secondary nodes of the instance.
6489
6490     """
6491     env = {
6492       "TARGET_NODE": self.op.target_node,
6493       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6494       }
6495     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6496     return env
6497
6498   def BuildHooksNodes(self):
6499     """Build hooks nodes.
6500
6501     """
6502     nl = [
6503       self.cfg.GetMasterNode(),
6504       self.instance.primary_node,
6505       self.op.target_node,
6506       ]
6507     return (nl, nl)
6508
6509   def CheckPrereq(self):
6510     """Check prerequisites.
6511
6512     This checks that the instance is in the cluster.
6513
6514     """
6515     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6516     assert self.instance is not None, \
6517       "Cannot retrieve locked instance %s" % self.op.instance_name
6518
6519     node = self.cfg.GetNodeInfo(self.op.target_node)
6520     assert node is not None, \
6521       "Cannot retrieve locked node %s" % self.op.target_node
6522
6523     self.target_node = target_node = node.name
6524
6525     if target_node == instance.primary_node:
6526       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6527                                  (instance.name, target_node),
6528                                  errors.ECODE_STATE)
6529
6530     bep = self.cfg.GetClusterInfo().FillBE(instance)
6531
6532     for idx, dsk in enumerate(instance.disks):
6533       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6534         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6535                                    " cannot copy" % idx, errors.ECODE_STATE)
6536
6537     _CheckNodeOnline(self, target_node)
6538     _CheckNodeNotDrained(self, target_node)
6539     _CheckNodeVmCapable(self, target_node)
6540
6541     if instance.admin_up:
6542       # check memory requirements on the secondary node
6543       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6544                            instance.name, bep[constants.BE_MEMORY],
6545                            instance.hypervisor)
6546     else:
6547       self.LogInfo("Not checking memory on the secondary node as"
6548                    " instance will not be started")
6549
6550     # check bridge existance
6551     _CheckInstanceBridgesExist(self, instance, node=target_node)
6552
6553   def Exec(self, feedback_fn):
6554     """Move an instance.
6555
6556     The move is done by shutting it down on its present node, copying
6557     the data over (slow) and starting it on the new node.
6558
6559     """
6560     instance = self.instance
6561
6562     source_node = instance.primary_node
6563     target_node = self.target_node
6564
6565     self.LogInfo("Shutting down instance %s on source node %s",
6566                  instance.name, source_node)
6567
6568     result = self.rpc.call_instance_shutdown(source_node, instance,
6569                                              self.op.shutdown_timeout)
6570     msg = result.fail_msg
6571     if msg:
6572       if self.op.ignore_consistency:
6573         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6574                              " Proceeding anyway. Please make sure node"
6575                              " %s is down. Error details: %s",
6576                              instance.name, source_node, source_node, msg)
6577       else:
6578         raise errors.OpExecError("Could not shutdown instance %s on"
6579                                  " node %s: %s" %
6580                                  (instance.name, source_node, msg))
6581
6582     # create the target disks
6583     try:
6584       _CreateDisks(self, instance, target_node=target_node)
6585     except errors.OpExecError:
6586       self.LogWarning("Device creation failed, reverting...")
6587       try:
6588         _RemoveDisks(self, instance, target_node=target_node)
6589       finally:
6590         self.cfg.ReleaseDRBDMinors(instance.name)
6591         raise
6592
6593     cluster_name = self.cfg.GetClusterInfo().cluster_name
6594
6595     errs = []
6596     # activate, get path, copy the data over
6597     for idx, disk in enumerate(instance.disks):
6598       self.LogInfo("Copying data for disk %d", idx)
6599       result = self.rpc.call_blockdev_assemble(target_node, disk,
6600                                                instance.name, True, idx)
6601       if result.fail_msg:
6602         self.LogWarning("Can't assemble newly created disk %d: %s",
6603                         idx, result.fail_msg)
6604         errs.append(result.fail_msg)
6605         break
6606       dev_path = result.payload
6607       result = self.rpc.call_blockdev_export(source_node, disk,
6608                                              target_node, dev_path,
6609                                              cluster_name)
6610       if result.fail_msg:
6611         self.LogWarning("Can't copy data over for disk %d: %s",
6612                         idx, result.fail_msg)
6613         errs.append(result.fail_msg)
6614         break
6615
6616     if errs:
6617       self.LogWarning("Some disks failed to copy, aborting")
6618       try:
6619         _RemoveDisks(self, instance, target_node=target_node)
6620       finally:
6621         self.cfg.ReleaseDRBDMinors(instance.name)
6622         raise errors.OpExecError("Errors during disk copy: %s" %
6623                                  (",".join(errs),))
6624
6625     instance.primary_node = target_node
6626     self.cfg.Update(instance, feedback_fn)
6627
6628     self.LogInfo("Removing the disks on the original node")
6629     _RemoveDisks(self, instance, target_node=source_node)
6630
6631     # Only start the instance if it's marked as up
6632     if instance.admin_up:
6633       self.LogInfo("Starting instance %s on node %s",
6634                    instance.name, target_node)
6635
6636       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6637                                            ignore_secondaries=True)
6638       if not disks_ok:
6639         _ShutdownInstanceDisks(self, instance)
6640         raise errors.OpExecError("Can't activate the instance's disks")
6641
6642       result = self.rpc.call_instance_start(target_node, instance, None, None)
6643       msg = result.fail_msg
6644       if msg:
6645         _ShutdownInstanceDisks(self, instance)
6646         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6647                                  (instance.name, target_node, msg))
6648
6649
6650 class LUNodeMigrate(LogicalUnit):
6651   """Migrate all instances from a node.
6652
6653   """
6654   HPATH = "node-migrate"
6655   HTYPE = constants.HTYPE_NODE
6656   REQ_BGL = False
6657
6658   def CheckArguments(self):
6659     pass
6660
6661   def ExpandNames(self):
6662     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6663
6664     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6665     self.needed_locks = {
6666       locking.LEVEL_NODE: [self.op.node_name],
6667       }
6668
6669   def BuildHooksEnv(self):
6670     """Build hooks env.
6671
6672     This runs on the master, the primary and all the secondaries.
6673
6674     """
6675     return {
6676       "NODE_NAME": self.op.node_name,
6677       }
6678
6679   def BuildHooksNodes(self):
6680     """Build hooks nodes.
6681
6682     """
6683     nl = [self.cfg.GetMasterNode()]
6684     return (nl, nl)
6685
6686   def CheckPrereq(self):
6687     pass
6688
6689   def Exec(self, feedback_fn):
6690     # Prepare jobs for migration instances
6691     jobs = [
6692       [opcodes.OpInstanceMigrate(instance_name=inst.name,
6693                                  mode=self.op.mode,
6694                                  live=self.op.live,
6695                                  iallocator=self.op.iallocator,
6696                                  target_node=self.op.target_node)]
6697       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6698       ]
6699
6700     # TODO: Run iallocator in this opcode and pass correct placement options to
6701     # OpInstanceMigrate. Since other jobs can modify the cluster between
6702     # running the iallocator and the actual migration, a good consistency model
6703     # will have to be found.
6704
6705     assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6706             frozenset([self.op.node_name]))
6707
6708     return ResultWithJobs(jobs)
6709
6710
6711 class TLMigrateInstance(Tasklet):
6712   """Tasklet class for instance migration.
6713
6714   @type live: boolean
6715   @ivar live: whether the migration will be done live or non-live;
6716       this variable is initalized only after CheckPrereq has run
6717   @type cleanup: boolean
6718   @ivar cleanup: Wheater we cleanup from a failed migration
6719   @type iallocator: string
6720   @ivar iallocator: The iallocator used to determine target_node
6721   @type target_node: string
6722   @ivar target_node: If given, the target_node to reallocate the instance to
6723   @type failover: boolean
6724   @ivar failover: Whether operation results in failover or migration
6725   @type fallback: boolean
6726   @ivar fallback: Whether fallback to failover is allowed if migration not
6727                   possible
6728   @type ignore_consistency: boolean
6729   @ivar ignore_consistency: Wheter we should ignore consistency between source
6730                             and target node
6731   @type shutdown_timeout: int
6732   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6733
6734   """
6735   def __init__(self, lu, instance_name, cleanup=False,
6736                failover=False, fallback=False,
6737                ignore_consistency=False,
6738                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6739     """Initializes this class.
6740
6741     """
6742     Tasklet.__init__(self, lu)
6743
6744     # Parameters
6745     self.instance_name = instance_name
6746     self.cleanup = cleanup
6747     self.live = False # will be overridden later
6748     self.failover = failover
6749     self.fallback = fallback
6750     self.ignore_consistency = ignore_consistency
6751     self.shutdown_timeout = shutdown_timeout
6752
6753   def CheckPrereq(self):
6754     """Check prerequisites.
6755
6756     This checks that the instance is in the cluster.
6757
6758     """
6759     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6760     instance = self.cfg.GetInstanceInfo(instance_name)
6761     assert instance is not None
6762     self.instance = instance
6763
6764     if (not self.cleanup and not instance.admin_up and not self.failover and
6765         self.fallback):
6766       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6767                       " to failover")
6768       self.failover = True
6769
6770     if instance.disk_template not in constants.DTS_MIRRORED:
6771       if self.failover:
6772         text = "failovers"
6773       else:
6774         text = "migrations"
6775       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6776                                  " %s" % (instance.disk_template, text),
6777                                  errors.ECODE_STATE)
6778
6779     if instance.disk_template in constants.DTS_EXT_MIRROR:
6780       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6781
6782       if self.lu.op.iallocator:
6783         self._RunAllocator()
6784       else:
6785         # We set set self.target_node as it is required by
6786         # BuildHooksEnv
6787         self.target_node = self.lu.op.target_node
6788
6789       # self.target_node is already populated, either directly or by the
6790       # iallocator run
6791       target_node = self.target_node
6792       if self.target_node == instance.primary_node:
6793         raise errors.OpPrereqError("Cannot migrate instance %s"
6794                                    " to its primary (%s)" %
6795                                    (instance.name, instance.primary_node))
6796
6797       if len(self.lu.tasklets) == 1:
6798         # It is safe to release locks only when we're the only tasklet
6799         # in the LU
6800         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6801                       keep=[instance.primary_node, self.target_node])
6802
6803     else:
6804       secondary_nodes = instance.secondary_nodes
6805       if not secondary_nodes:
6806         raise errors.ConfigurationError("No secondary node but using"
6807                                         " %s disk template" %
6808                                         instance.disk_template)
6809       target_node = secondary_nodes[0]
6810       if self.lu.op.iallocator or (self.lu.op.target_node and
6811                                    self.lu.op.target_node != target_node):
6812         if self.failover:
6813           text = "failed over"
6814         else:
6815           text = "migrated"
6816         raise errors.OpPrereqError("Instances with disk template %s cannot"
6817                                    " be %s to arbitrary nodes"
6818                                    " (neither an iallocator nor a target"
6819                                    " node can be passed)" %
6820                                    (instance.disk_template, text),
6821                                    errors.ECODE_INVAL)
6822
6823     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6824
6825     # check memory requirements on the secondary node
6826     if not self.failover or instance.admin_up:
6827       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6828                            instance.name, i_be[constants.BE_MEMORY],
6829                            instance.hypervisor)
6830     else:
6831       self.lu.LogInfo("Not checking memory on the secondary node as"
6832                       " instance will not be started")
6833
6834     # check bridge existance
6835     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6836
6837     if not self.cleanup:
6838       _CheckNodeNotDrained(self.lu, target_node)
6839       if not self.failover:
6840         result = self.rpc.call_instance_migratable(instance.primary_node,
6841                                                    instance)
6842         if result.fail_msg and self.fallback:
6843           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6844                           " failover")
6845           self.failover = True
6846         else:
6847           result.Raise("Can't migrate, please use failover",
6848                        prereq=True, ecode=errors.ECODE_STATE)
6849
6850     assert not (self.failover and self.cleanup)
6851
6852     if not self.failover:
6853       if self.lu.op.live is not None and self.lu.op.mode is not None:
6854         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6855                                    " parameters are accepted",
6856                                    errors.ECODE_INVAL)
6857       if self.lu.op.live is not None:
6858         if self.lu.op.live:
6859           self.lu.op.mode = constants.HT_MIGRATION_LIVE
6860         else:
6861           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6862         # reset the 'live' parameter to None so that repeated
6863         # invocations of CheckPrereq do not raise an exception
6864         self.lu.op.live = None
6865       elif self.lu.op.mode is None:
6866         # read the default value from the hypervisor
6867         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6868                                                 skip_globals=False)
6869         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6870
6871       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6872     else:
6873       # Failover is never live
6874       self.live = False
6875
6876   def _RunAllocator(self):
6877     """Run the allocator based on input opcode.
6878
6879     """
6880     ial = IAllocator(self.cfg, self.rpc,
6881                      mode=constants.IALLOCATOR_MODE_RELOC,
6882                      name=self.instance_name,
6883                      # TODO See why hail breaks with a single node below
6884                      relocate_from=[self.instance.primary_node,
6885                                     self.instance.primary_node],
6886                      )
6887
6888     ial.Run(self.lu.op.iallocator)
6889
6890     if not ial.success:
6891       raise errors.OpPrereqError("Can't compute nodes using"
6892                                  " iallocator '%s': %s" %
6893                                  (self.lu.op.iallocator, ial.info),
6894                                  errors.ECODE_NORES)
6895     if len(ial.result) != ial.required_nodes:
6896       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6897                                  " of nodes (%s), required %s" %
6898                                  (self.lu.op.iallocator, len(ial.result),
6899                                   ial.required_nodes), errors.ECODE_FAULT)
6900     self.target_node = ial.result[0]
6901     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6902                  self.instance_name, self.lu.op.iallocator,
6903                  utils.CommaJoin(ial.result))
6904
6905   def _WaitUntilSync(self):
6906     """Poll with custom rpc for disk sync.
6907
6908     This uses our own step-based rpc call.
6909
6910     """
6911     self.feedback_fn("* wait until resync is done")
6912     all_done = False
6913     while not all_done:
6914       all_done = True
6915       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6916                                             self.nodes_ip,
6917                                             self.instance.disks)
6918       min_percent = 100
6919       for node, nres in result.items():
6920         nres.Raise("Cannot resync disks on node %s" % node)
6921         node_done, node_percent = nres.payload
6922         all_done = all_done and node_done
6923         if node_percent is not None:
6924           min_percent = min(min_percent, node_percent)
6925       if not all_done:
6926         if min_percent < 100:
6927           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6928         time.sleep(2)
6929
6930   def _EnsureSecondary(self, node):
6931     """Demote a node to secondary.
6932
6933     """
6934     self.feedback_fn("* switching node %s to secondary mode" % node)
6935
6936     for dev in self.instance.disks:
6937       self.cfg.SetDiskID(dev, node)
6938
6939     result = self.rpc.call_blockdev_close(node, self.instance.name,
6940                                           self.instance.disks)
6941     result.Raise("Cannot change disk to secondary on node %s" % node)
6942
6943   def _GoStandalone(self):
6944     """Disconnect from the network.
6945
6946     """
6947     self.feedback_fn("* changing into standalone mode")
6948     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6949                                                self.instance.disks)
6950     for node, nres in result.items():
6951       nres.Raise("Cannot disconnect disks node %s" % node)
6952
6953   def _GoReconnect(self, multimaster):
6954     """Reconnect to the network.
6955
6956     """
6957     if multimaster:
6958       msg = "dual-master"
6959     else:
6960       msg = "single-master"
6961     self.feedback_fn("* changing disks into %s mode" % msg)
6962     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6963                                            self.instance.disks,
6964                                            self.instance.name, multimaster)
6965     for node, nres in result.items():
6966       nres.Raise("Cannot change disks config on node %s" % node)
6967
6968   def _ExecCleanup(self):
6969     """Try to cleanup after a failed migration.
6970
6971     The cleanup is done by:
6972       - check that the instance is running only on one node
6973         (and update the config if needed)
6974       - change disks on its secondary node to secondary
6975       - wait until disks are fully synchronized
6976       - disconnect from the network
6977       - change disks into single-master mode
6978       - wait again until disks are fully synchronized
6979
6980     """
6981     instance = self.instance
6982     target_node = self.target_node
6983     source_node = self.source_node
6984
6985     # check running on only one node
6986     self.feedback_fn("* checking where the instance actually runs"
6987                      " (if this hangs, the hypervisor might be in"
6988                      " a bad state)")
6989     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6990     for node, result in ins_l.items():
6991       result.Raise("Can't contact node %s" % node)
6992
6993     runningon_source = instance.name in ins_l[source_node].payload
6994     runningon_target = instance.name in ins_l[target_node].payload
6995
6996     if runningon_source and runningon_target:
6997       raise errors.OpExecError("Instance seems to be running on two nodes,"
6998                                " or the hypervisor is confused; you will have"
6999                                " to ensure manually that it runs only on one"
7000                                " and restart this operation")
7001
7002     if not (runningon_source or runningon_target):
7003       raise errors.OpExecError("Instance does not seem to be running at all;"
7004                                " in this case it's safer to repair by"
7005                                " running 'gnt-instance stop' to ensure disk"
7006                                " shutdown, and then restarting it")
7007
7008     if runningon_target:
7009       # the migration has actually succeeded, we need to update the config
7010       self.feedback_fn("* instance running on secondary node (%s),"
7011                        " updating config" % target_node)
7012       instance.primary_node = target_node
7013       self.cfg.Update(instance, self.feedback_fn)
7014       demoted_node = source_node
7015     else:
7016       self.feedback_fn("* instance confirmed to be running on its"
7017                        " primary node (%s)" % source_node)
7018       demoted_node = target_node
7019
7020     if instance.disk_template in constants.DTS_INT_MIRROR:
7021       self._EnsureSecondary(demoted_node)
7022       try:
7023         self._WaitUntilSync()
7024       except errors.OpExecError:
7025         # we ignore here errors, since if the device is standalone, it
7026         # won't be able to sync
7027         pass
7028       self._GoStandalone()
7029       self._GoReconnect(False)
7030       self._WaitUntilSync()
7031
7032     self.feedback_fn("* done")
7033
7034   def _RevertDiskStatus(self):
7035     """Try to revert the disk status after a failed migration.
7036
7037     """
7038     target_node = self.target_node
7039     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7040       return
7041
7042     try:
7043       self._EnsureSecondary(target_node)
7044       self._GoStandalone()
7045       self._GoReconnect(False)
7046       self._WaitUntilSync()
7047     except errors.OpExecError, err:
7048       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7049                          " please try to recover the instance manually;"
7050                          " error '%s'" % str(err))
7051
7052   def _AbortMigration(self):
7053     """Call the hypervisor code to abort a started migration.
7054
7055     """
7056     instance = self.instance
7057     target_node = self.target_node
7058     migration_info = self.migration_info
7059
7060     abort_result = self.rpc.call_finalize_migration(target_node,
7061                                                     instance,
7062                                                     migration_info,
7063                                                     False)
7064     abort_msg = abort_result.fail_msg
7065     if abort_msg:
7066       logging.error("Aborting migration failed on target node %s: %s",
7067                     target_node, abort_msg)
7068       # Don't raise an exception here, as we stil have to try to revert the
7069       # disk status, even if this step failed.
7070
7071   def _ExecMigration(self):
7072     """Migrate an instance.
7073
7074     The migrate is done by:
7075       - change the disks into dual-master mode
7076       - wait until disks are fully synchronized again
7077       - migrate the instance
7078       - change disks on the new secondary node (the old primary) to secondary
7079       - wait until disks are fully synchronized
7080       - change disks into single-master mode
7081
7082     """
7083     instance = self.instance
7084     target_node = self.target_node
7085     source_node = self.source_node
7086
7087     self.feedback_fn("* checking disk consistency between source and target")
7088     for dev in instance.disks:
7089       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7090         raise errors.OpExecError("Disk %s is degraded or not fully"
7091                                  " synchronized on target node,"
7092                                  " aborting migration" % dev.iv_name)
7093
7094     # First get the migration information from the remote node
7095     result = self.rpc.call_migration_info(source_node, instance)
7096     msg = result.fail_msg
7097     if msg:
7098       log_err = ("Failed fetching source migration information from %s: %s" %
7099                  (source_node, msg))
7100       logging.error(log_err)
7101       raise errors.OpExecError(log_err)
7102
7103     self.migration_info = migration_info = result.payload
7104
7105     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7106       # Then switch the disks to master/master mode
7107       self._EnsureSecondary(target_node)
7108       self._GoStandalone()
7109       self._GoReconnect(True)
7110       self._WaitUntilSync()
7111
7112     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7113     result = self.rpc.call_accept_instance(target_node,
7114                                            instance,
7115                                            migration_info,
7116                                            self.nodes_ip[target_node])
7117
7118     msg = result.fail_msg
7119     if msg:
7120       logging.error("Instance pre-migration failed, trying to revert"
7121                     " disk status: %s", msg)
7122       self.feedback_fn("Pre-migration failed, aborting")
7123       self._AbortMigration()
7124       self._RevertDiskStatus()
7125       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7126                                (instance.name, msg))
7127
7128     self.feedback_fn("* migrating instance to %s" % target_node)
7129     result = self.rpc.call_instance_migrate(source_node, instance,
7130                                             self.nodes_ip[target_node],
7131                                             self.live)
7132     msg = result.fail_msg
7133     if msg:
7134       logging.error("Instance migration failed, trying to revert"
7135                     " disk status: %s", msg)
7136       self.feedback_fn("Migration failed, aborting")
7137       self._AbortMigration()
7138       self._RevertDiskStatus()
7139       raise errors.OpExecError("Could not migrate instance %s: %s" %
7140                                (instance.name, msg))
7141
7142     instance.primary_node = target_node
7143     # distribute new instance config to the other nodes
7144     self.cfg.Update(instance, self.feedback_fn)
7145
7146     result = self.rpc.call_finalize_migration(target_node,
7147                                               instance,
7148                                               migration_info,
7149                                               True)
7150     msg = result.fail_msg
7151     if msg:
7152       logging.error("Instance migration succeeded, but finalization failed:"
7153                     " %s", msg)
7154       raise errors.OpExecError("Could not finalize instance migration: %s" %
7155                                msg)
7156
7157     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7158       self._EnsureSecondary(source_node)
7159       self._WaitUntilSync()
7160       self._GoStandalone()
7161       self._GoReconnect(False)
7162       self._WaitUntilSync()
7163
7164     self.feedback_fn("* done")
7165
7166   def _ExecFailover(self):
7167     """Failover an instance.
7168
7169     The failover is done by shutting it down on its present node and
7170     starting it on the secondary.
7171
7172     """
7173     instance = self.instance
7174     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7175
7176     source_node = instance.primary_node
7177     target_node = self.target_node
7178
7179     if instance.admin_up:
7180       self.feedback_fn("* checking disk consistency between source and target")
7181       for dev in instance.disks:
7182         # for drbd, these are drbd over lvm
7183         if not _CheckDiskConsistency(self, dev, target_node, False):
7184           if not self.ignore_consistency:
7185             raise errors.OpExecError("Disk %s is degraded on target node,"
7186                                      " aborting failover" % dev.iv_name)
7187     else:
7188       self.feedback_fn("* not checking disk consistency as instance is not"
7189                        " running")
7190
7191     self.feedback_fn("* shutting down instance on source node")
7192     logging.info("Shutting down instance %s on node %s",
7193                  instance.name, source_node)
7194
7195     result = self.rpc.call_instance_shutdown(source_node, instance,
7196                                              self.shutdown_timeout)
7197     msg = result.fail_msg
7198     if msg:
7199       if self.ignore_consistency or primary_node.offline:
7200         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7201                            " proceeding anyway; please make sure node"
7202                            " %s is down; error details: %s",
7203                            instance.name, source_node, source_node, msg)
7204       else:
7205         raise errors.OpExecError("Could not shutdown instance %s on"
7206                                  " node %s: %s" %
7207                                  (instance.name, source_node, msg))
7208
7209     self.feedback_fn("* deactivating the instance's disks on source node")
7210     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
7211       raise errors.OpExecError("Can't shut down the instance's disks.")
7212
7213     instance.primary_node = target_node
7214     # distribute new instance config to the other nodes
7215     self.cfg.Update(instance, self.feedback_fn)
7216
7217     # Only start the instance if it's marked as up
7218     if instance.admin_up:
7219       self.feedback_fn("* activating the instance's disks on target node")
7220       logging.info("Starting instance %s on node %s",
7221                    instance.name, target_node)
7222
7223       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7224                                            ignore_secondaries=True)
7225       if not disks_ok:
7226         _ShutdownInstanceDisks(self, instance)
7227         raise errors.OpExecError("Can't activate the instance's disks")
7228
7229       self.feedback_fn("* starting the instance on the target node")
7230       result = self.rpc.call_instance_start(target_node, instance, None, None)
7231       msg = result.fail_msg
7232       if msg:
7233         _ShutdownInstanceDisks(self, instance)
7234         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7235                                  (instance.name, target_node, msg))
7236
7237   def Exec(self, feedback_fn):
7238     """Perform the migration.
7239
7240     """
7241     self.feedback_fn = feedback_fn
7242     self.source_node = self.instance.primary_node
7243
7244     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7245     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7246       self.target_node = self.instance.secondary_nodes[0]
7247       # Otherwise self.target_node has been populated either
7248       # directly, or through an iallocator.
7249
7250     self.all_nodes = [self.source_node, self.target_node]
7251     self.nodes_ip = {
7252       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7253       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7254       }
7255
7256     if self.failover:
7257       feedback_fn("Failover instance %s" % self.instance.name)
7258       self._ExecFailover()
7259     else:
7260       feedback_fn("Migrating instance %s" % self.instance.name)
7261
7262       if self.cleanup:
7263         return self._ExecCleanup()
7264       else:
7265         return self._ExecMigration()
7266
7267
7268 def _CreateBlockDev(lu, node, instance, device, force_create,
7269                     info, force_open):
7270   """Create a tree of block devices on a given node.
7271
7272   If this device type has to be created on secondaries, create it and
7273   all its children.
7274
7275   If not, just recurse to children keeping the same 'force' value.
7276
7277   @param lu: the lu on whose behalf we execute
7278   @param node: the node on which to create the device
7279   @type instance: L{objects.Instance}
7280   @param instance: the instance which owns the device
7281   @type device: L{objects.Disk}
7282   @param device: the device to create
7283   @type force_create: boolean
7284   @param force_create: whether to force creation of this device; this
7285       will be change to True whenever we find a device which has
7286       CreateOnSecondary() attribute
7287   @param info: the extra 'metadata' we should attach to the device
7288       (this will be represented as a LVM tag)
7289   @type force_open: boolean
7290   @param force_open: this parameter will be passes to the
7291       L{backend.BlockdevCreate} function where it specifies
7292       whether we run on primary or not, and it affects both
7293       the child assembly and the device own Open() execution
7294
7295   """
7296   if device.CreateOnSecondary():
7297     force_create = True
7298
7299   if device.children:
7300     for child in device.children:
7301       _CreateBlockDev(lu, node, instance, child, force_create,
7302                       info, force_open)
7303
7304   if not force_create:
7305     return
7306
7307   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7308
7309
7310 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7311   """Create a single block device on a given node.
7312
7313   This will not recurse over children of the device, so they must be
7314   created in advance.
7315
7316   @param lu: the lu on whose behalf we execute
7317   @param node: the node on which to create the device
7318   @type instance: L{objects.Instance}
7319   @param instance: the instance which owns the device
7320   @type device: L{objects.Disk}
7321   @param device: the device to create
7322   @param info: the extra 'metadata' we should attach to the device
7323       (this will be represented as a LVM tag)
7324   @type force_open: boolean
7325   @param force_open: this parameter will be passes to the
7326       L{backend.BlockdevCreate} function where it specifies
7327       whether we run on primary or not, and it affects both
7328       the child assembly and the device own Open() execution
7329
7330   """
7331   lu.cfg.SetDiskID(device, node)
7332   result = lu.rpc.call_blockdev_create(node, device, device.size,
7333                                        instance.name, force_open, info)
7334   result.Raise("Can't create block device %s on"
7335                " node %s for instance %s" % (device, node, instance.name))
7336   if device.physical_id is None:
7337     device.physical_id = result.payload
7338
7339
7340 def _GenerateUniqueNames(lu, exts):
7341   """Generate a suitable LV name.
7342
7343   This will generate a logical volume name for the given instance.
7344
7345   """
7346   results = []
7347   for val in exts:
7348     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7349     results.append("%s%s" % (new_id, val))
7350   return results
7351
7352
7353 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7354                          iv_name, p_minor, s_minor):
7355   """Generate a drbd8 device complete with its children.
7356
7357   """
7358   assert len(vgnames) == len(names) == 2
7359   port = lu.cfg.AllocatePort()
7360   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7361   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7362                           logical_id=(vgnames[0], names[0]))
7363   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7364                           logical_id=(vgnames[1], names[1]))
7365   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7366                           logical_id=(primary, secondary, port,
7367                                       p_minor, s_minor,
7368                                       shared_secret),
7369                           children=[dev_data, dev_meta],
7370                           iv_name=iv_name)
7371   return drbd_dev
7372
7373
7374 def _GenerateDiskTemplate(lu, template_name,
7375                           instance_name, primary_node,
7376                           secondary_nodes, disk_info,
7377                           file_storage_dir, file_driver,
7378                           base_index, feedback_fn):
7379   """Generate the entire disk layout for a given template type.
7380
7381   """
7382   #TODO: compute space requirements
7383
7384   vgname = lu.cfg.GetVGName()
7385   disk_count = len(disk_info)
7386   disks = []
7387   if template_name == constants.DT_DISKLESS:
7388     pass
7389   elif template_name == constants.DT_PLAIN:
7390     if len(secondary_nodes) != 0:
7391       raise errors.ProgrammerError("Wrong template configuration")
7392
7393     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7394                                       for i in range(disk_count)])
7395     for idx, disk in enumerate(disk_info):
7396       disk_index = idx + base_index
7397       vg = disk.get(constants.IDISK_VG, vgname)
7398       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7399       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7400                               size=disk[constants.IDISK_SIZE],
7401                               logical_id=(vg, names[idx]),
7402                               iv_name="disk/%d" % disk_index,
7403                               mode=disk[constants.IDISK_MODE])
7404       disks.append(disk_dev)
7405   elif template_name == constants.DT_DRBD8:
7406     if len(secondary_nodes) != 1:
7407       raise errors.ProgrammerError("Wrong template configuration")
7408     remote_node = secondary_nodes[0]
7409     minors = lu.cfg.AllocateDRBDMinor(
7410       [primary_node, remote_node] * len(disk_info), instance_name)
7411
7412     names = []
7413     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7414                                                for i in range(disk_count)]):
7415       names.append(lv_prefix + "_data")
7416       names.append(lv_prefix + "_meta")
7417     for idx, disk in enumerate(disk_info):
7418       disk_index = idx + base_index
7419       data_vg = disk.get(constants.IDISK_VG, vgname)
7420       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7421       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7422                                       disk[constants.IDISK_SIZE],
7423                                       [data_vg, meta_vg],
7424                                       names[idx * 2:idx * 2 + 2],
7425                                       "disk/%d" % disk_index,
7426                                       minors[idx * 2], minors[idx * 2 + 1])
7427       disk_dev.mode = disk[constants.IDISK_MODE]
7428       disks.append(disk_dev)
7429   elif template_name == constants.DT_FILE:
7430     if len(secondary_nodes) != 0:
7431       raise errors.ProgrammerError("Wrong template configuration")
7432
7433     opcodes.RequireFileStorage()
7434
7435     for idx, disk in enumerate(disk_info):
7436       disk_index = idx + base_index
7437       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7438                               size=disk[constants.IDISK_SIZE],
7439                               iv_name="disk/%d" % disk_index,
7440                               logical_id=(file_driver,
7441                                           "%s/disk%d" % (file_storage_dir,
7442                                                          disk_index)),
7443                               mode=disk[constants.IDISK_MODE])
7444       disks.append(disk_dev)
7445   elif template_name == constants.DT_SHARED_FILE:
7446     if len(secondary_nodes) != 0:
7447       raise errors.ProgrammerError("Wrong template configuration")
7448
7449     opcodes.RequireSharedFileStorage()
7450
7451     for idx, disk in enumerate(disk_info):
7452       disk_index = idx + base_index
7453       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7454                               size=disk[constants.IDISK_SIZE],
7455                               iv_name="disk/%d" % disk_index,
7456                               logical_id=(file_driver,
7457                                           "%s/disk%d" % (file_storage_dir,
7458                                                          disk_index)),
7459                               mode=disk[constants.IDISK_MODE])
7460       disks.append(disk_dev)
7461   elif template_name == constants.DT_BLOCK:
7462     if len(secondary_nodes) != 0:
7463       raise errors.ProgrammerError("Wrong template configuration")
7464
7465     for idx, disk in enumerate(disk_info):
7466       disk_index = idx + base_index
7467       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7468                               size=disk[constants.IDISK_SIZE],
7469                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7470                                           disk[constants.IDISK_ADOPT]),
7471                               iv_name="disk/%d" % disk_index,
7472                               mode=disk[constants.IDISK_MODE])
7473       disks.append(disk_dev)
7474
7475   else:
7476     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7477   return disks
7478
7479
7480 def _GetInstanceInfoText(instance):
7481   """Compute that text that should be added to the disk's metadata.
7482
7483   """
7484   return "originstname+%s" % instance.name
7485
7486
7487 def _CalcEta(time_taken, written, total_size):
7488   """Calculates the ETA based on size written and total size.
7489
7490   @param time_taken: The time taken so far
7491   @param written: amount written so far
7492   @param total_size: The total size of data to be written
7493   @return: The remaining time in seconds
7494
7495   """
7496   avg_time = time_taken / float(written)
7497   return (total_size - written) * avg_time
7498
7499
7500 def _WipeDisks(lu, instance):
7501   """Wipes instance disks.
7502
7503   @type lu: L{LogicalUnit}
7504   @param lu: the logical unit on whose behalf we execute
7505   @type instance: L{objects.Instance}
7506   @param instance: the instance whose disks we should create
7507   @return: the success of the wipe
7508
7509   """
7510   node = instance.primary_node
7511
7512   for device in instance.disks:
7513     lu.cfg.SetDiskID(device, node)
7514
7515   logging.info("Pause sync of instance %s disks", instance.name)
7516   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7517
7518   for idx, success in enumerate(result.payload):
7519     if not success:
7520       logging.warn("pause-sync of instance %s for disks %d failed",
7521                    instance.name, idx)
7522
7523   try:
7524     for idx, device in enumerate(instance.disks):
7525       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7526       # MAX_WIPE_CHUNK at max
7527       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7528                             constants.MIN_WIPE_CHUNK_PERCENT)
7529       # we _must_ make this an int, otherwise rounding errors will
7530       # occur
7531       wipe_chunk_size = int(wipe_chunk_size)
7532
7533       lu.LogInfo("* Wiping disk %d", idx)
7534       logging.info("Wiping disk %d for instance %s, node %s using"
7535                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7536
7537       offset = 0
7538       size = device.size
7539       last_output = 0
7540       start_time = time.time()
7541
7542       while offset < size:
7543         wipe_size = min(wipe_chunk_size, size - offset)
7544         logging.debug("Wiping disk %d, offset %s, chunk %s",
7545                       idx, offset, wipe_size)
7546         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7547         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7548                      (idx, offset, wipe_size))
7549         now = time.time()
7550         offset += wipe_size
7551         if now - last_output >= 60:
7552           eta = _CalcEta(now - start_time, offset, size)
7553           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7554                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7555           last_output = now
7556   finally:
7557     logging.info("Resume sync of instance %s disks", instance.name)
7558
7559     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7560
7561     for idx, success in enumerate(result.payload):
7562       if not success:
7563         lu.LogWarning("Resume sync of disk %d failed, please have a"
7564                       " look at the status and troubleshoot the issue", idx)
7565         logging.warn("resume-sync of instance %s for disks %d failed",
7566                      instance.name, idx)
7567
7568
7569 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7570   """Create all disks for an instance.
7571
7572   This abstracts away some work from AddInstance.
7573
7574   @type lu: L{LogicalUnit}
7575   @param lu: the logical unit on whose behalf we execute
7576   @type instance: L{objects.Instance}
7577   @param instance: the instance whose disks we should create
7578   @type to_skip: list
7579   @param to_skip: list of indices to skip
7580   @type target_node: string
7581   @param target_node: if passed, overrides the target node for creation
7582   @rtype: boolean
7583   @return: the success of the creation
7584
7585   """
7586   info = _GetInstanceInfoText(instance)
7587   if target_node is None:
7588     pnode = instance.primary_node
7589     all_nodes = instance.all_nodes
7590   else:
7591     pnode = target_node
7592     all_nodes = [pnode]
7593
7594   if instance.disk_template in constants.DTS_FILEBASED:
7595     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7596     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7597
7598     result.Raise("Failed to create directory '%s' on"
7599                  " node %s" % (file_storage_dir, pnode))
7600
7601   # Note: this needs to be kept in sync with adding of disks in
7602   # LUInstanceSetParams
7603   for idx, device in enumerate(instance.disks):
7604     if to_skip and idx in to_skip:
7605       continue
7606     logging.info("Creating volume %s for instance %s",
7607                  device.iv_name, instance.name)
7608     #HARDCODE
7609     for node in all_nodes:
7610       f_create = node == pnode
7611       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7612
7613
7614 def _RemoveDisks(lu, instance, target_node=None):
7615   """Remove all disks for an instance.
7616
7617   This abstracts away some work from `AddInstance()` and
7618   `RemoveInstance()`. Note that in case some of the devices couldn't
7619   be removed, the removal will continue with the other ones (compare
7620   with `_CreateDisks()`).
7621
7622   @type lu: L{LogicalUnit}
7623   @param lu: the logical unit on whose behalf we execute
7624   @type instance: L{objects.Instance}
7625   @param instance: the instance whose disks we should remove
7626   @type target_node: string
7627   @param target_node: used to override the node on which to remove the disks
7628   @rtype: boolean
7629   @return: the success of the removal
7630
7631   """
7632   logging.info("Removing block devices for instance %s", instance.name)
7633
7634   all_result = True
7635   for device in instance.disks:
7636     if target_node:
7637       edata = [(target_node, device)]
7638     else:
7639       edata = device.ComputeNodeTree(instance.primary_node)
7640     for node, disk in edata:
7641       lu.cfg.SetDiskID(disk, node)
7642       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7643       if msg:
7644         lu.LogWarning("Could not remove block device %s on node %s,"
7645                       " continuing anyway: %s", device.iv_name, node, msg)
7646         all_result = False
7647
7648   if instance.disk_template == constants.DT_FILE:
7649     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7650     if target_node:
7651       tgt = target_node
7652     else:
7653       tgt = instance.primary_node
7654     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7655     if result.fail_msg:
7656       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7657                     file_storage_dir, instance.primary_node, result.fail_msg)
7658       all_result = False
7659
7660   return all_result
7661
7662
7663 def _ComputeDiskSizePerVG(disk_template, disks):
7664   """Compute disk size requirements in the volume group
7665
7666   """
7667   def _compute(disks, payload):
7668     """Universal algorithm.
7669
7670     """
7671     vgs = {}
7672     for disk in disks:
7673       vgs[disk[constants.IDISK_VG]] = \
7674         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7675
7676     return vgs
7677
7678   # Required free disk space as a function of disk and swap space
7679   req_size_dict = {
7680     constants.DT_DISKLESS: {},
7681     constants.DT_PLAIN: _compute(disks, 0),
7682     # 128 MB are added for drbd metadata for each disk
7683     constants.DT_DRBD8: _compute(disks, 128),
7684     constants.DT_FILE: {},
7685     constants.DT_SHARED_FILE: {},
7686   }
7687
7688   if disk_template not in req_size_dict:
7689     raise errors.ProgrammerError("Disk template '%s' size requirement"
7690                                  " is unknown" %  disk_template)
7691
7692   return req_size_dict[disk_template]
7693
7694
7695 def _ComputeDiskSize(disk_template, disks):
7696   """Compute disk size requirements in the volume group
7697
7698   """
7699   # Required free disk space as a function of disk and swap space
7700   req_size_dict = {
7701     constants.DT_DISKLESS: None,
7702     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7703     # 128 MB are added for drbd metadata for each disk
7704     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7705     constants.DT_FILE: None,
7706     constants.DT_SHARED_FILE: 0,
7707     constants.DT_BLOCK: 0,
7708   }
7709
7710   if disk_template not in req_size_dict:
7711     raise errors.ProgrammerError("Disk template '%s' size requirement"
7712                                  " is unknown" %  disk_template)
7713
7714   return req_size_dict[disk_template]
7715
7716
7717 def _FilterVmNodes(lu, nodenames):
7718   """Filters out non-vm_capable nodes from a list.
7719
7720   @type lu: L{LogicalUnit}
7721   @param lu: the logical unit for which we check
7722   @type nodenames: list
7723   @param nodenames: the list of nodes on which we should check
7724   @rtype: list
7725   @return: the list of vm-capable nodes
7726
7727   """
7728   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7729   return [name for name in nodenames if name not in vm_nodes]
7730
7731
7732 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7733   """Hypervisor parameter validation.
7734
7735   This function abstract the hypervisor parameter validation to be
7736   used in both instance create and instance modify.
7737
7738   @type lu: L{LogicalUnit}
7739   @param lu: the logical unit for which we check
7740   @type nodenames: list
7741   @param nodenames: the list of nodes on which we should check
7742   @type hvname: string
7743   @param hvname: the name of the hypervisor we should use
7744   @type hvparams: dict
7745   @param hvparams: the parameters which we need to check
7746   @raise errors.OpPrereqError: if the parameters are not valid
7747
7748   """
7749   nodenames = _FilterVmNodes(lu, nodenames)
7750   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7751                                                   hvname,
7752                                                   hvparams)
7753   for node in nodenames:
7754     info = hvinfo[node]
7755     if info.offline:
7756       continue
7757     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7758
7759
7760 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7761   """OS parameters validation.
7762
7763   @type lu: L{LogicalUnit}
7764   @param lu: the logical unit for which we check
7765   @type required: boolean
7766   @param required: whether the validation should fail if the OS is not
7767       found
7768   @type nodenames: list
7769   @param nodenames: the list of nodes on which we should check
7770   @type osname: string
7771   @param osname: the name of the hypervisor we should use
7772   @type osparams: dict
7773   @param osparams: the parameters which we need to check
7774   @raise errors.OpPrereqError: if the parameters are not valid
7775
7776   """
7777   nodenames = _FilterVmNodes(lu, nodenames)
7778   result = lu.rpc.call_os_validate(required, nodenames, osname,
7779                                    [constants.OS_VALIDATE_PARAMETERS],
7780                                    osparams)
7781   for node, nres in result.items():
7782     # we don't check for offline cases since this should be run only
7783     # against the master node and/or an instance's nodes
7784     nres.Raise("OS Parameters validation failed on node %s" % node)
7785     if not nres.payload:
7786       lu.LogInfo("OS %s not found on node %s, validation skipped",
7787                  osname, node)
7788
7789
7790 class LUInstanceCreate(LogicalUnit):
7791   """Create an instance.
7792
7793   """
7794   HPATH = "instance-add"
7795   HTYPE = constants.HTYPE_INSTANCE
7796   REQ_BGL = False
7797
7798   def CheckArguments(self):
7799     """Check arguments.
7800
7801     """
7802     # do not require name_check to ease forward/backward compatibility
7803     # for tools
7804     if self.op.no_install and self.op.start:
7805       self.LogInfo("No-installation mode selected, disabling startup")
7806       self.op.start = False
7807     # validate/normalize the instance name
7808     self.op.instance_name = \
7809       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7810
7811     if self.op.ip_check and not self.op.name_check:
7812       # TODO: make the ip check more flexible and not depend on the name check
7813       raise errors.OpPrereqError("Cannot do IP address check without a name"
7814                                  " check", errors.ECODE_INVAL)
7815
7816     # check nics' parameter names
7817     for nic in self.op.nics:
7818       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7819
7820     # check disks. parameter names and consistent adopt/no-adopt strategy
7821     has_adopt = has_no_adopt = False
7822     for disk in self.op.disks:
7823       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7824       if constants.IDISK_ADOPT in disk:
7825         has_adopt = True
7826       else:
7827         has_no_adopt = True
7828     if has_adopt and has_no_adopt:
7829       raise errors.OpPrereqError("Either all disks are adopted or none is",
7830                                  errors.ECODE_INVAL)
7831     if has_adopt:
7832       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7833         raise errors.OpPrereqError("Disk adoption is not supported for the"
7834                                    " '%s' disk template" %
7835                                    self.op.disk_template,
7836                                    errors.ECODE_INVAL)
7837       if self.op.iallocator is not None:
7838         raise errors.OpPrereqError("Disk adoption not allowed with an"
7839                                    " iallocator script", errors.ECODE_INVAL)
7840       if self.op.mode == constants.INSTANCE_IMPORT:
7841         raise errors.OpPrereqError("Disk adoption not allowed for"
7842                                    " instance import", errors.ECODE_INVAL)
7843     else:
7844       if self.op.disk_template in constants.DTS_MUST_ADOPT:
7845         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7846                                    " but no 'adopt' parameter given" %
7847                                    self.op.disk_template,
7848                                    errors.ECODE_INVAL)
7849
7850     self.adopt_disks = has_adopt
7851
7852     # instance name verification
7853     if self.op.name_check:
7854       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7855       self.op.instance_name = self.hostname1.name
7856       # used in CheckPrereq for ip ping check
7857       self.check_ip = self.hostname1.ip
7858     else:
7859       self.check_ip = None
7860
7861     # file storage checks
7862     if (self.op.file_driver and
7863         not self.op.file_driver in constants.FILE_DRIVER):
7864       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7865                                  self.op.file_driver, errors.ECODE_INVAL)
7866
7867     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7868       raise errors.OpPrereqError("File storage directory path not absolute",
7869                                  errors.ECODE_INVAL)
7870
7871     ### Node/iallocator related checks
7872     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7873
7874     if self.op.pnode is not None:
7875       if self.op.disk_template in constants.DTS_INT_MIRROR:
7876         if self.op.snode is None:
7877           raise errors.OpPrereqError("The networked disk templates need"
7878                                      " a mirror node", errors.ECODE_INVAL)
7879       elif self.op.snode:
7880         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7881                         " template")
7882         self.op.snode = None
7883
7884     self._cds = _GetClusterDomainSecret()
7885
7886     if self.op.mode == constants.INSTANCE_IMPORT:
7887       # On import force_variant must be True, because if we forced it at
7888       # initial install, our only chance when importing it back is that it
7889       # works again!
7890       self.op.force_variant = True
7891
7892       if self.op.no_install:
7893         self.LogInfo("No-installation mode has no effect during import")
7894
7895     elif self.op.mode == constants.INSTANCE_CREATE:
7896       if self.op.os_type is None:
7897         raise errors.OpPrereqError("No guest OS specified",
7898                                    errors.ECODE_INVAL)
7899       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7900         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7901                                    " installation" % self.op.os_type,
7902                                    errors.ECODE_STATE)
7903       if self.op.disk_template is None:
7904         raise errors.OpPrereqError("No disk template specified",
7905                                    errors.ECODE_INVAL)
7906
7907     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7908       # Check handshake to ensure both clusters have the same domain secret
7909       src_handshake = self.op.source_handshake
7910       if not src_handshake:
7911         raise errors.OpPrereqError("Missing source handshake",
7912                                    errors.ECODE_INVAL)
7913
7914       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7915                                                            src_handshake)
7916       if errmsg:
7917         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7918                                    errors.ECODE_INVAL)
7919
7920       # Load and check source CA
7921       self.source_x509_ca_pem = self.op.source_x509_ca
7922       if not self.source_x509_ca_pem:
7923         raise errors.OpPrereqError("Missing source X509 CA",
7924                                    errors.ECODE_INVAL)
7925
7926       try:
7927         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7928                                                     self._cds)
7929       except OpenSSL.crypto.Error, err:
7930         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7931                                    (err, ), errors.ECODE_INVAL)
7932
7933       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7934       if errcode is not None:
7935         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7936                                    errors.ECODE_INVAL)
7937
7938       self.source_x509_ca = cert
7939
7940       src_instance_name = self.op.source_instance_name
7941       if not src_instance_name:
7942         raise errors.OpPrereqError("Missing source instance name",
7943                                    errors.ECODE_INVAL)
7944
7945       self.source_instance_name = \
7946           netutils.GetHostname(name=src_instance_name).name
7947
7948     else:
7949       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7950                                  self.op.mode, errors.ECODE_INVAL)
7951
7952   def ExpandNames(self):
7953     """ExpandNames for CreateInstance.
7954
7955     Figure out the right locks for instance creation.
7956
7957     """
7958     self.needed_locks = {}
7959
7960     instance_name = self.op.instance_name
7961     # this is just a preventive check, but someone might still add this
7962     # instance in the meantime, and creation will fail at lock-add time
7963     if instance_name in self.cfg.GetInstanceList():
7964       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7965                                  instance_name, errors.ECODE_EXISTS)
7966
7967     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7968
7969     if self.op.iallocator:
7970       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7971     else:
7972       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7973       nodelist = [self.op.pnode]
7974       if self.op.snode is not None:
7975         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7976         nodelist.append(self.op.snode)
7977       self.needed_locks[locking.LEVEL_NODE] = nodelist
7978
7979     # in case of import lock the source node too
7980     if self.op.mode == constants.INSTANCE_IMPORT:
7981       src_node = self.op.src_node
7982       src_path = self.op.src_path
7983
7984       if src_path is None:
7985         self.op.src_path = src_path = self.op.instance_name
7986
7987       if src_node is None:
7988         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7989         self.op.src_node = None
7990         if os.path.isabs(src_path):
7991           raise errors.OpPrereqError("Importing an instance from an absolute"
7992                                      " path requires a source node option",
7993                                      errors.ECODE_INVAL)
7994       else:
7995         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7996         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7997           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7998         if not os.path.isabs(src_path):
7999           self.op.src_path = src_path = \
8000             utils.PathJoin(constants.EXPORT_DIR, src_path)
8001
8002   def _RunAllocator(self):
8003     """Run the allocator based on input opcode.
8004
8005     """
8006     nics = [n.ToDict() for n in self.nics]
8007     ial = IAllocator(self.cfg, self.rpc,
8008                      mode=constants.IALLOCATOR_MODE_ALLOC,
8009                      name=self.op.instance_name,
8010                      disk_template=self.op.disk_template,
8011                      tags=self.op.tags,
8012                      os=self.op.os_type,
8013                      vcpus=self.be_full[constants.BE_VCPUS],
8014                      memory=self.be_full[constants.BE_MEMORY],
8015                      disks=self.disks,
8016                      nics=nics,
8017                      hypervisor=self.op.hypervisor,
8018                      )
8019
8020     ial.Run(self.op.iallocator)
8021
8022     if not ial.success:
8023       raise errors.OpPrereqError("Can't compute nodes using"
8024                                  " iallocator '%s': %s" %
8025                                  (self.op.iallocator, ial.info),
8026                                  errors.ECODE_NORES)
8027     if len(ial.result) != ial.required_nodes:
8028       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8029                                  " of nodes (%s), required %s" %
8030                                  (self.op.iallocator, len(ial.result),
8031                                   ial.required_nodes), errors.ECODE_FAULT)
8032     self.op.pnode = ial.result[0]
8033     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8034                  self.op.instance_name, self.op.iallocator,
8035                  utils.CommaJoin(ial.result))
8036     if ial.required_nodes == 2:
8037       self.op.snode = ial.result[1]
8038
8039   def BuildHooksEnv(self):
8040     """Build hooks env.
8041
8042     This runs on master, primary and secondary nodes of the instance.
8043
8044     """
8045     env = {
8046       "ADD_MODE": self.op.mode,
8047       }
8048     if self.op.mode == constants.INSTANCE_IMPORT:
8049       env["SRC_NODE"] = self.op.src_node
8050       env["SRC_PATH"] = self.op.src_path
8051       env["SRC_IMAGES"] = self.src_images
8052
8053     env.update(_BuildInstanceHookEnv(
8054       name=self.op.instance_name,
8055       primary_node=self.op.pnode,
8056       secondary_nodes=self.secondaries,
8057       status=self.op.start,
8058       os_type=self.op.os_type,
8059       memory=self.be_full[constants.BE_MEMORY],
8060       vcpus=self.be_full[constants.BE_VCPUS],
8061       nics=_NICListToTuple(self, self.nics),
8062       disk_template=self.op.disk_template,
8063       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8064              for d in self.disks],
8065       bep=self.be_full,
8066       hvp=self.hv_full,
8067       hypervisor_name=self.op.hypervisor,
8068       tags=self.op.tags,
8069     ))
8070
8071     return env
8072
8073   def BuildHooksNodes(self):
8074     """Build hooks nodes.
8075
8076     """
8077     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8078     return nl, nl
8079
8080   def _ReadExportInfo(self):
8081     """Reads the export information from disk.
8082
8083     It will override the opcode source node and path with the actual
8084     information, if these two were not specified before.
8085
8086     @return: the export information
8087
8088     """
8089     assert self.op.mode == constants.INSTANCE_IMPORT
8090
8091     src_node = self.op.src_node
8092     src_path = self.op.src_path
8093
8094     if src_node is None:
8095       locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8096       exp_list = self.rpc.call_export_list(locked_nodes)
8097       found = False
8098       for node in exp_list:
8099         if exp_list[node].fail_msg:
8100           continue
8101         if src_path in exp_list[node].payload:
8102           found = True
8103           self.op.src_node = src_node = node
8104           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8105                                                        src_path)
8106           break
8107       if not found:
8108         raise errors.OpPrereqError("No export found for relative path %s" %
8109                                     src_path, errors.ECODE_INVAL)
8110
8111     _CheckNodeOnline(self, src_node)
8112     result = self.rpc.call_export_info(src_node, src_path)
8113     result.Raise("No export or invalid export found in dir %s" % src_path)
8114
8115     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8116     if not export_info.has_section(constants.INISECT_EXP):
8117       raise errors.ProgrammerError("Corrupted export config",
8118                                    errors.ECODE_ENVIRON)
8119
8120     ei_version = export_info.get(constants.INISECT_EXP, "version")
8121     if (int(ei_version) != constants.EXPORT_VERSION):
8122       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8123                                  (ei_version, constants.EXPORT_VERSION),
8124                                  errors.ECODE_ENVIRON)
8125     return export_info
8126
8127   def _ReadExportParams(self, einfo):
8128     """Use export parameters as defaults.
8129
8130     In case the opcode doesn't specify (as in override) some instance
8131     parameters, then try to use them from the export information, if
8132     that declares them.
8133
8134     """
8135     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8136
8137     if self.op.disk_template is None:
8138       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8139         self.op.disk_template = einfo.get(constants.INISECT_INS,
8140                                           "disk_template")
8141       else:
8142         raise errors.OpPrereqError("No disk template specified and the export"
8143                                    " is missing the disk_template information",
8144                                    errors.ECODE_INVAL)
8145
8146     if not self.op.disks:
8147       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8148         disks = []
8149         # TODO: import the disk iv_name too
8150         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8151           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8152           disks.append({constants.IDISK_SIZE: disk_sz})
8153         self.op.disks = disks
8154       else:
8155         raise errors.OpPrereqError("No disk info specified and the export"
8156                                    " is missing the disk information",
8157                                    errors.ECODE_INVAL)
8158
8159     if (not self.op.nics and
8160         einfo.has_option(constants.INISECT_INS, "nic_count")):
8161       nics = []
8162       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8163         ndict = {}
8164         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8165           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8166           ndict[name] = v
8167         nics.append(ndict)
8168       self.op.nics = nics
8169
8170     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8171       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8172
8173     if (self.op.hypervisor is None and
8174         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8175       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8176
8177     if einfo.has_section(constants.INISECT_HYP):
8178       # use the export parameters but do not override the ones
8179       # specified by the user
8180       for name, value in einfo.items(constants.INISECT_HYP):
8181         if name not in self.op.hvparams:
8182           self.op.hvparams[name] = value
8183
8184     if einfo.has_section(constants.INISECT_BEP):
8185       # use the parameters, without overriding
8186       for name, value in einfo.items(constants.INISECT_BEP):
8187         if name not in self.op.beparams:
8188           self.op.beparams[name] = value
8189     else:
8190       # try to read the parameters old style, from the main section
8191       for name in constants.BES_PARAMETERS:
8192         if (name not in self.op.beparams and
8193             einfo.has_option(constants.INISECT_INS, name)):
8194           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8195
8196     if einfo.has_section(constants.INISECT_OSP):
8197       # use the parameters, without overriding
8198       for name, value in einfo.items(constants.INISECT_OSP):
8199         if name not in self.op.osparams:
8200           self.op.osparams[name] = value
8201
8202   def _RevertToDefaults(self, cluster):
8203     """Revert the instance parameters to the default values.
8204
8205     """
8206     # hvparams
8207     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8208     for name in self.op.hvparams.keys():
8209       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8210         del self.op.hvparams[name]
8211     # beparams
8212     be_defs = cluster.SimpleFillBE({})
8213     for name in self.op.beparams.keys():
8214       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8215         del self.op.beparams[name]
8216     # nic params
8217     nic_defs = cluster.SimpleFillNIC({})
8218     for nic in self.op.nics:
8219       for name in constants.NICS_PARAMETERS:
8220         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8221           del nic[name]
8222     # osparams
8223     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8224     for name in self.op.osparams.keys():
8225       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8226         del self.op.osparams[name]
8227
8228   def CheckPrereq(self):
8229     """Check prerequisites.
8230
8231     """
8232     if self.op.mode == constants.INSTANCE_IMPORT:
8233       export_info = self._ReadExportInfo()
8234       self._ReadExportParams(export_info)
8235
8236     if (not self.cfg.GetVGName() and
8237         self.op.disk_template not in constants.DTS_NOT_LVM):
8238       raise errors.OpPrereqError("Cluster does not support lvm-based"
8239                                  " instances", errors.ECODE_STATE)
8240
8241     if self.op.hypervisor is None:
8242       self.op.hypervisor = self.cfg.GetHypervisorType()
8243
8244     cluster = self.cfg.GetClusterInfo()
8245     enabled_hvs = cluster.enabled_hypervisors
8246     if self.op.hypervisor not in enabled_hvs:
8247       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8248                                  " cluster (%s)" % (self.op.hypervisor,
8249                                   ",".join(enabled_hvs)),
8250                                  errors.ECODE_STATE)
8251
8252     # Check tag validity
8253     for tag in self.op.tags:
8254       objects.TaggableObject.ValidateTag(tag)
8255
8256     # check hypervisor parameter syntax (locally)
8257     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8258     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8259                                       self.op.hvparams)
8260     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8261     hv_type.CheckParameterSyntax(filled_hvp)
8262     self.hv_full = filled_hvp
8263     # check that we don't specify global parameters on an instance
8264     _CheckGlobalHvParams(self.op.hvparams)
8265
8266     # fill and remember the beparams dict
8267     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8268     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8269
8270     # build os parameters
8271     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8272
8273     # now that hvp/bep are in final format, let's reset to defaults,
8274     # if told to do so
8275     if self.op.identify_defaults:
8276       self._RevertToDefaults(cluster)
8277
8278     # NIC buildup
8279     self.nics = []
8280     for idx, nic in enumerate(self.op.nics):
8281       nic_mode_req = nic.get(constants.INIC_MODE, None)
8282       nic_mode = nic_mode_req
8283       if nic_mode is None:
8284         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8285
8286       # in routed mode, for the first nic, the default ip is 'auto'
8287       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8288         default_ip_mode = constants.VALUE_AUTO
8289       else:
8290         default_ip_mode = constants.VALUE_NONE
8291
8292       # ip validity checks
8293       ip = nic.get(constants.INIC_IP, default_ip_mode)
8294       if ip is None or ip.lower() == constants.VALUE_NONE:
8295         nic_ip = None
8296       elif ip.lower() == constants.VALUE_AUTO:
8297         if not self.op.name_check:
8298           raise errors.OpPrereqError("IP address set to auto but name checks"
8299                                      " have been skipped",
8300                                      errors.ECODE_INVAL)
8301         nic_ip = self.hostname1.ip
8302       else:
8303         if not netutils.IPAddress.IsValid(ip):
8304           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8305                                      errors.ECODE_INVAL)
8306         nic_ip = ip
8307
8308       # TODO: check the ip address for uniqueness
8309       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8310         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8311                                    errors.ECODE_INVAL)
8312
8313       # MAC address verification
8314       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8315       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8316         mac = utils.NormalizeAndValidateMac(mac)
8317
8318         try:
8319           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8320         except errors.ReservationError:
8321           raise errors.OpPrereqError("MAC address %s already in use"
8322                                      " in cluster" % mac,
8323                                      errors.ECODE_NOTUNIQUE)
8324
8325       #  Build nic parameters
8326       link = nic.get(constants.INIC_LINK, None)
8327       nicparams = {}
8328       if nic_mode_req:
8329         nicparams[constants.NIC_MODE] = nic_mode_req
8330       if link:
8331         nicparams[constants.NIC_LINK] = link
8332
8333       check_params = cluster.SimpleFillNIC(nicparams)
8334       objects.NIC.CheckParameterSyntax(check_params)
8335       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8336
8337     # disk checks/pre-build
8338     default_vg = self.cfg.GetVGName()
8339     self.disks = []
8340     for disk in self.op.disks:
8341       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8342       if mode not in constants.DISK_ACCESS_SET:
8343         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8344                                    mode, errors.ECODE_INVAL)
8345       size = disk.get(constants.IDISK_SIZE, None)
8346       if size is None:
8347         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8348       try:
8349         size = int(size)
8350       except (TypeError, ValueError):
8351         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8352                                    errors.ECODE_INVAL)
8353
8354       data_vg = disk.get(constants.IDISK_VG, default_vg)
8355       new_disk = {
8356         constants.IDISK_SIZE: size,
8357         constants.IDISK_MODE: mode,
8358         constants.IDISK_VG: data_vg,
8359         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8360         }
8361       if constants.IDISK_ADOPT in disk:
8362         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8363       self.disks.append(new_disk)
8364
8365     if self.op.mode == constants.INSTANCE_IMPORT:
8366
8367       # Check that the new instance doesn't have less disks than the export
8368       instance_disks = len(self.disks)
8369       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8370       if instance_disks < export_disks:
8371         raise errors.OpPrereqError("Not enough disks to import."
8372                                    " (instance: %d, export: %d)" %
8373                                    (instance_disks, export_disks),
8374                                    errors.ECODE_INVAL)
8375
8376       disk_images = []
8377       for idx in range(export_disks):
8378         option = 'disk%d_dump' % idx
8379         if export_info.has_option(constants.INISECT_INS, option):
8380           # FIXME: are the old os-es, disk sizes, etc. useful?
8381           export_name = export_info.get(constants.INISECT_INS, option)
8382           image = utils.PathJoin(self.op.src_path, export_name)
8383           disk_images.append(image)
8384         else:
8385           disk_images.append(False)
8386
8387       self.src_images = disk_images
8388
8389       old_name = export_info.get(constants.INISECT_INS, 'name')
8390       try:
8391         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8392       except (TypeError, ValueError), err:
8393         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8394                                    " an integer: %s" % str(err),
8395                                    errors.ECODE_STATE)
8396       if self.op.instance_name == old_name:
8397         for idx, nic in enumerate(self.nics):
8398           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8399             nic_mac_ini = 'nic%d_mac' % idx
8400             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8401
8402     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8403
8404     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8405     if self.op.ip_check:
8406       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8407         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8408                                    (self.check_ip, self.op.instance_name),
8409                                    errors.ECODE_NOTUNIQUE)
8410
8411     #### mac address generation
8412     # By generating here the mac address both the allocator and the hooks get
8413     # the real final mac address rather than the 'auto' or 'generate' value.
8414     # There is a race condition between the generation and the instance object
8415     # creation, which means that we know the mac is valid now, but we're not
8416     # sure it will be when we actually add the instance. If things go bad
8417     # adding the instance will abort because of a duplicate mac, and the
8418     # creation job will fail.
8419     for nic in self.nics:
8420       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8421         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8422
8423     #### allocator run
8424
8425     if self.op.iallocator is not None:
8426       self._RunAllocator()
8427
8428     #### node related checks
8429
8430     # check primary node
8431     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8432     assert self.pnode is not None, \
8433       "Cannot retrieve locked node %s" % self.op.pnode
8434     if pnode.offline:
8435       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8436                                  pnode.name, errors.ECODE_STATE)
8437     if pnode.drained:
8438       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8439                                  pnode.name, errors.ECODE_STATE)
8440     if not pnode.vm_capable:
8441       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8442                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8443
8444     self.secondaries = []
8445
8446     # mirror node verification
8447     if self.op.disk_template in constants.DTS_INT_MIRROR:
8448       if self.op.snode == pnode.name:
8449         raise errors.OpPrereqError("The secondary node cannot be the"
8450                                    " primary node", errors.ECODE_INVAL)
8451       _CheckNodeOnline(self, self.op.snode)
8452       _CheckNodeNotDrained(self, self.op.snode)
8453       _CheckNodeVmCapable(self, self.op.snode)
8454       self.secondaries.append(self.op.snode)
8455
8456     nodenames = [pnode.name] + self.secondaries
8457
8458     if not self.adopt_disks:
8459       # Check lv size requirements, if not adopting
8460       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8461       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8462
8463     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8464       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8465                                 disk[constants.IDISK_ADOPT])
8466                      for disk in self.disks])
8467       if len(all_lvs) != len(self.disks):
8468         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8469                                    errors.ECODE_INVAL)
8470       for lv_name in all_lvs:
8471         try:
8472           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8473           # to ReserveLV uses the same syntax
8474           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8475         except errors.ReservationError:
8476           raise errors.OpPrereqError("LV named %s used by another instance" %
8477                                      lv_name, errors.ECODE_NOTUNIQUE)
8478
8479       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8480       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8481
8482       node_lvs = self.rpc.call_lv_list([pnode.name],
8483                                        vg_names.payload.keys())[pnode.name]
8484       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8485       node_lvs = node_lvs.payload
8486
8487       delta = all_lvs.difference(node_lvs.keys())
8488       if delta:
8489         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8490                                    utils.CommaJoin(delta),
8491                                    errors.ECODE_INVAL)
8492       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8493       if online_lvs:
8494         raise errors.OpPrereqError("Online logical volumes found, cannot"
8495                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8496                                    errors.ECODE_STATE)
8497       # update the size of disk based on what is found
8498       for dsk in self.disks:
8499         dsk[constants.IDISK_SIZE] = \
8500           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8501                                         dsk[constants.IDISK_ADOPT])][0]))
8502
8503     elif self.op.disk_template == constants.DT_BLOCK:
8504       # Normalize and de-duplicate device paths
8505       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8506                        for disk in self.disks])
8507       if len(all_disks) != len(self.disks):
8508         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8509                                    errors.ECODE_INVAL)
8510       baddisks = [d for d in all_disks
8511                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8512       if baddisks:
8513         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8514                                    " cannot be adopted" %
8515                                    (", ".join(baddisks),
8516                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8517                                    errors.ECODE_INVAL)
8518
8519       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8520                                             list(all_disks))[pnode.name]
8521       node_disks.Raise("Cannot get block device information from node %s" %
8522                        pnode.name)
8523       node_disks = node_disks.payload
8524       delta = all_disks.difference(node_disks.keys())
8525       if delta:
8526         raise errors.OpPrereqError("Missing block device(s): %s" %
8527                                    utils.CommaJoin(delta),
8528                                    errors.ECODE_INVAL)
8529       for dsk in self.disks:
8530         dsk[constants.IDISK_SIZE] = \
8531           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8532
8533     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8534
8535     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8536     # check OS parameters (remotely)
8537     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8538
8539     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8540
8541     # memory check on primary node
8542     if self.op.start:
8543       _CheckNodeFreeMemory(self, self.pnode.name,
8544                            "creating instance %s" % self.op.instance_name,
8545                            self.be_full[constants.BE_MEMORY],
8546                            self.op.hypervisor)
8547
8548     self.dry_run_result = list(nodenames)
8549
8550   def Exec(self, feedback_fn):
8551     """Create and add the instance to the cluster.
8552
8553     """
8554     instance = self.op.instance_name
8555     pnode_name = self.pnode.name
8556
8557     ht_kind = self.op.hypervisor
8558     if ht_kind in constants.HTS_REQ_PORT:
8559       network_port = self.cfg.AllocatePort()
8560     else:
8561       network_port = None
8562
8563     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8564       # this is needed because os.path.join does not accept None arguments
8565       if self.op.file_storage_dir is None:
8566         string_file_storage_dir = ""
8567       else:
8568         string_file_storage_dir = self.op.file_storage_dir
8569
8570       # build the full file storage dir path
8571       if self.op.disk_template == constants.DT_SHARED_FILE:
8572         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8573       else:
8574         get_fsd_fn = self.cfg.GetFileStorageDir
8575
8576       file_storage_dir = utils.PathJoin(get_fsd_fn(),
8577                                         string_file_storage_dir, instance)
8578     else:
8579       file_storage_dir = ""
8580
8581     disks = _GenerateDiskTemplate(self,
8582                                   self.op.disk_template,
8583                                   instance, pnode_name,
8584                                   self.secondaries,
8585                                   self.disks,
8586                                   file_storage_dir,
8587                                   self.op.file_driver,
8588                                   0,
8589                                   feedback_fn)
8590
8591     iobj = objects.Instance(name=instance, os=self.op.os_type,
8592                             primary_node=pnode_name,
8593                             nics=self.nics, disks=disks,
8594                             disk_template=self.op.disk_template,
8595                             admin_up=False,
8596                             network_port=network_port,
8597                             beparams=self.op.beparams,
8598                             hvparams=self.op.hvparams,
8599                             hypervisor=self.op.hypervisor,
8600                             osparams=self.op.osparams,
8601                             )
8602
8603     if self.op.tags:
8604       for tag in self.op.tags:
8605         iobj.AddTag(tag)
8606
8607     if self.adopt_disks:
8608       if self.op.disk_template == constants.DT_PLAIN:
8609         # rename LVs to the newly-generated names; we need to construct
8610         # 'fake' LV disks with the old data, plus the new unique_id
8611         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8612         rename_to = []
8613         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8614           rename_to.append(t_dsk.logical_id)
8615           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8616           self.cfg.SetDiskID(t_dsk, pnode_name)
8617         result = self.rpc.call_blockdev_rename(pnode_name,
8618                                                zip(tmp_disks, rename_to))
8619         result.Raise("Failed to rename adoped LVs")
8620     else:
8621       feedback_fn("* creating instance disks...")
8622       try:
8623         _CreateDisks(self, iobj)
8624       except errors.OpExecError:
8625         self.LogWarning("Device creation failed, reverting...")
8626         try:
8627           _RemoveDisks(self, iobj)
8628         finally:
8629           self.cfg.ReleaseDRBDMinors(instance)
8630           raise
8631
8632     feedback_fn("adding instance %s to cluster config" % instance)
8633
8634     self.cfg.AddInstance(iobj, self.proc.GetECId())
8635
8636     # Declare that we don't want to remove the instance lock anymore, as we've
8637     # added the instance to the config
8638     del self.remove_locks[locking.LEVEL_INSTANCE]
8639
8640     if self.op.mode == constants.INSTANCE_IMPORT:
8641       # Release unused nodes
8642       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8643     else:
8644       # Release all nodes
8645       _ReleaseLocks(self, locking.LEVEL_NODE)
8646
8647     disk_abort = False
8648     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8649       feedback_fn("* wiping instance disks...")
8650       try:
8651         _WipeDisks(self, iobj)
8652       except errors.OpExecError, err:
8653         logging.exception("Wiping disks failed")
8654         self.LogWarning("Wiping instance disks failed (%s)", err)
8655         disk_abort = True
8656
8657     if disk_abort:
8658       # Something is already wrong with the disks, don't do anything else
8659       pass
8660     elif self.op.wait_for_sync:
8661       disk_abort = not _WaitForSync(self, iobj)
8662     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8663       # make sure the disks are not degraded (still sync-ing is ok)
8664       time.sleep(15)
8665       feedback_fn("* checking mirrors status")
8666       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8667     else:
8668       disk_abort = False
8669
8670     if disk_abort:
8671       _RemoveDisks(self, iobj)
8672       self.cfg.RemoveInstance(iobj.name)
8673       # Make sure the instance lock gets removed
8674       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8675       raise errors.OpExecError("There are some degraded disks for"
8676                                " this instance")
8677
8678     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8679       if self.op.mode == constants.INSTANCE_CREATE:
8680         if not self.op.no_install:
8681           feedback_fn("* running the instance OS create scripts...")
8682           # FIXME: pass debug option from opcode to backend
8683           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8684                                                  self.op.debug_level)
8685           result.Raise("Could not add os for instance %s"
8686                        " on node %s" % (instance, pnode_name))
8687
8688       elif self.op.mode == constants.INSTANCE_IMPORT:
8689         feedback_fn("* running the instance OS import scripts...")
8690
8691         transfers = []
8692
8693         for idx, image in enumerate(self.src_images):
8694           if not image:
8695             continue
8696
8697           # FIXME: pass debug option from opcode to backend
8698           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8699                                              constants.IEIO_FILE, (image, ),
8700                                              constants.IEIO_SCRIPT,
8701                                              (iobj.disks[idx], idx),
8702                                              None)
8703           transfers.append(dt)
8704
8705         import_result = \
8706           masterd.instance.TransferInstanceData(self, feedback_fn,
8707                                                 self.op.src_node, pnode_name,
8708                                                 self.pnode.secondary_ip,
8709                                                 iobj, transfers)
8710         if not compat.all(import_result):
8711           self.LogWarning("Some disks for instance %s on node %s were not"
8712                           " imported successfully" % (instance, pnode_name))
8713
8714       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8715         feedback_fn("* preparing remote import...")
8716         # The source cluster will stop the instance before attempting to make a
8717         # connection. In some cases stopping an instance can take a long time,
8718         # hence the shutdown timeout is added to the connection timeout.
8719         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8720                            self.op.source_shutdown_timeout)
8721         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8722
8723         assert iobj.primary_node == self.pnode.name
8724         disk_results = \
8725           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8726                                         self.source_x509_ca,
8727                                         self._cds, timeouts)
8728         if not compat.all(disk_results):
8729           # TODO: Should the instance still be started, even if some disks
8730           # failed to import (valid for local imports, too)?
8731           self.LogWarning("Some disks for instance %s on node %s were not"
8732                           " imported successfully" % (instance, pnode_name))
8733
8734         # Run rename script on newly imported instance
8735         assert iobj.name == instance
8736         feedback_fn("Running rename script for %s" % instance)
8737         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8738                                                    self.source_instance_name,
8739                                                    self.op.debug_level)
8740         if result.fail_msg:
8741           self.LogWarning("Failed to run rename script for %s on node"
8742                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8743
8744       else:
8745         # also checked in the prereq part
8746         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8747                                      % self.op.mode)
8748
8749     if self.op.start:
8750       iobj.admin_up = True
8751       self.cfg.Update(iobj, feedback_fn)
8752       logging.info("Starting instance %s on node %s", instance, pnode_name)
8753       feedback_fn("* starting instance...")
8754       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8755       result.Raise("Could not start instance")
8756
8757     return list(iobj.all_nodes)
8758
8759
8760 class LUInstanceConsole(NoHooksLU):
8761   """Connect to an instance's console.
8762
8763   This is somewhat special in that it returns the command line that
8764   you need to run on the master node in order to connect to the
8765   console.
8766
8767   """
8768   REQ_BGL = False
8769
8770   def ExpandNames(self):
8771     self._ExpandAndLockInstance()
8772
8773   def CheckPrereq(self):
8774     """Check prerequisites.
8775
8776     This checks that the instance is in the cluster.
8777
8778     """
8779     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8780     assert self.instance is not None, \
8781       "Cannot retrieve locked instance %s" % self.op.instance_name
8782     _CheckNodeOnline(self, self.instance.primary_node)
8783
8784   def Exec(self, feedback_fn):
8785     """Connect to the console of an instance
8786
8787     """
8788     instance = self.instance
8789     node = instance.primary_node
8790
8791     node_insts = self.rpc.call_instance_list([node],
8792                                              [instance.hypervisor])[node]
8793     node_insts.Raise("Can't get node information from %s" % node)
8794
8795     if instance.name not in node_insts.payload:
8796       if instance.admin_up:
8797         state = constants.INSTST_ERRORDOWN
8798       else:
8799         state = constants.INSTST_ADMINDOWN
8800       raise errors.OpExecError("Instance %s is not running (state %s)" %
8801                                (instance.name, state))
8802
8803     logging.debug("Connecting to console of %s on %s", instance.name, node)
8804
8805     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8806
8807
8808 def _GetInstanceConsole(cluster, instance):
8809   """Returns console information for an instance.
8810
8811   @type cluster: L{objects.Cluster}
8812   @type instance: L{objects.Instance}
8813   @rtype: dict
8814
8815   """
8816   hyper = hypervisor.GetHypervisor(instance.hypervisor)
8817   # beparams and hvparams are passed separately, to avoid editing the
8818   # instance and then saving the defaults in the instance itself.
8819   hvparams = cluster.FillHV(instance)
8820   beparams = cluster.FillBE(instance)
8821   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8822
8823   assert console.instance == instance.name
8824   assert console.Validate()
8825
8826   return console.ToDict()
8827
8828
8829 class LUInstanceReplaceDisks(LogicalUnit):
8830   """Replace the disks of an instance.
8831
8832   """
8833   HPATH = "mirrors-replace"
8834   HTYPE = constants.HTYPE_INSTANCE
8835   REQ_BGL = False
8836
8837   def CheckArguments(self):
8838     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8839                                   self.op.iallocator)
8840
8841   def ExpandNames(self):
8842     self._ExpandAndLockInstance()
8843
8844     assert locking.LEVEL_NODE not in self.needed_locks
8845     assert locking.LEVEL_NODEGROUP not in self.needed_locks
8846
8847     assert self.op.iallocator is None or self.op.remote_node is None, \
8848       "Conflicting options"
8849
8850     if self.op.remote_node is not None:
8851       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8852
8853       # Warning: do not remove the locking of the new secondary here
8854       # unless DRBD8.AddChildren is changed to work in parallel;
8855       # currently it doesn't since parallel invocations of
8856       # FindUnusedMinor will conflict
8857       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8858       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8859     else:
8860       self.needed_locks[locking.LEVEL_NODE] = []
8861       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8862
8863       if self.op.iallocator is not None:
8864         # iallocator will select a new node in the same group
8865         self.needed_locks[locking.LEVEL_NODEGROUP] = []
8866
8867     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8868                                    self.op.iallocator, self.op.remote_node,
8869                                    self.op.disks, False, self.op.early_release)
8870
8871     self.tasklets = [self.replacer]
8872
8873   def DeclareLocks(self, level):
8874     if level == locking.LEVEL_NODEGROUP:
8875       assert self.op.remote_node is None
8876       assert self.op.iallocator is not None
8877       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8878
8879       self.share_locks[locking.LEVEL_NODEGROUP] = 1
8880       self.needed_locks[locking.LEVEL_NODEGROUP] = \
8881         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8882
8883     elif level == locking.LEVEL_NODE:
8884       if self.op.iallocator is not None:
8885         assert self.op.remote_node is None
8886         assert not self.needed_locks[locking.LEVEL_NODE]
8887
8888         # Lock member nodes of all locked groups
8889         self.needed_locks[locking.LEVEL_NODE] = [node_name
8890           for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8891           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8892       else:
8893         self._LockInstancesNodes()
8894
8895   def BuildHooksEnv(self):
8896     """Build hooks env.
8897
8898     This runs on the master, the primary and all the secondaries.
8899
8900     """
8901     instance = self.replacer.instance
8902     env = {
8903       "MODE": self.op.mode,
8904       "NEW_SECONDARY": self.op.remote_node,
8905       "OLD_SECONDARY": instance.secondary_nodes[0],
8906       }
8907     env.update(_BuildInstanceHookEnvByObject(self, instance))
8908     return env
8909
8910   def BuildHooksNodes(self):
8911     """Build hooks nodes.
8912
8913     """
8914     instance = self.replacer.instance
8915     nl = [
8916       self.cfg.GetMasterNode(),
8917       instance.primary_node,
8918       ]
8919     if self.op.remote_node is not None:
8920       nl.append(self.op.remote_node)
8921     return nl, nl
8922
8923   def CheckPrereq(self):
8924     """Check prerequisites.
8925
8926     """
8927     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8928             self.op.iallocator is None)
8929
8930     owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8931     if owned_groups:
8932       groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8933       if owned_groups != groups:
8934         raise errors.OpExecError("Node groups used by instance '%s' changed"
8935                                  " since lock was acquired, current list is %r,"
8936                                  " used to be '%s'" %
8937                                  (self.op.instance_name,
8938                                   utils.CommaJoin(groups),
8939                                   utils.CommaJoin(owned_groups)))
8940
8941     return LogicalUnit.CheckPrereq(self)
8942
8943
8944 class TLReplaceDisks(Tasklet):
8945   """Replaces disks for an instance.
8946
8947   Note: Locking is not within the scope of this class.
8948
8949   """
8950   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8951                disks, delay_iallocator, early_release):
8952     """Initializes this class.
8953
8954     """
8955     Tasklet.__init__(self, lu)
8956
8957     # Parameters
8958     self.instance_name = instance_name
8959     self.mode = mode
8960     self.iallocator_name = iallocator_name
8961     self.remote_node = remote_node
8962     self.disks = disks
8963     self.delay_iallocator = delay_iallocator
8964     self.early_release = early_release
8965
8966     # Runtime data
8967     self.instance = None
8968     self.new_node = None
8969     self.target_node = None
8970     self.other_node = None
8971     self.remote_node_info = None
8972     self.node_secondary_ip = None
8973
8974   @staticmethod
8975   def CheckArguments(mode, remote_node, iallocator):
8976     """Helper function for users of this class.
8977
8978     """
8979     # check for valid parameter combination
8980     if mode == constants.REPLACE_DISK_CHG:
8981       if remote_node is None and iallocator is None:
8982         raise errors.OpPrereqError("When changing the secondary either an"
8983                                    " iallocator script must be used or the"
8984                                    " new node given", errors.ECODE_INVAL)
8985
8986       if remote_node is not None and iallocator is not None:
8987         raise errors.OpPrereqError("Give either the iallocator or the new"
8988                                    " secondary, not both", errors.ECODE_INVAL)
8989
8990     elif remote_node is not None or iallocator is not None:
8991       # Not replacing the secondary
8992       raise errors.OpPrereqError("The iallocator and new node options can"
8993                                  " only be used when changing the"
8994                                  " secondary node", errors.ECODE_INVAL)
8995
8996   @staticmethod
8997   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8998     """Compute a new secondary node using an IAllocator.
8999
9000     """
9001     ial = IAllocator(lu.cfg, lu.rpc,
9002                      mode=constants.IALLOCATOR_MODE_RELOC,
9003                      name=instance_name,
9004                      relocate_from=relocate_from)
9005
9006     ial.Run(iallocator_name)
9007
9008     if not ial.success:
9009       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9010                                  " %s" % (iallocator_name, ial.info),
9011                                  errors.ECODE_NORES)
9012
9013     if len(ial.result) != ial.required_nodes:
9014       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9015                                  " of nodes (%s), required %s" %
9016                                  (iallocator_name,
9017                                   len(ial.result), ial.required_nodes),
9018                                  errors.ECODE_FAULT)
9019
9020     remote_node_name = ial.result[0]
9021
9022     lu.LogInfo("Selected new secondary for instance '%s': %s",
9023                instance_name, remote_node_name)
9024
9025     return remote_node_name
9026
9027   def _FindFaultyDisks(self, node_name):
9028     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9029                                     node_name, True)
9030
9031   def _CheckDisksActivated(self, instance):
9032     """Checks if the instance disks are activated.
9033
9034     @param instance: The instance to check disks
9035     @return: True if they are activated, False otherwise
9036
9037     """
9038     nodes = instance.all_nodes
9039
9040     for idx, dev in enumerate(instance.disks):
9041       for node in nodes:
9042         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9043         self.cfg.SetDiskID(dev, node)
9044
9045         result = self.rpc.call_blockdev_find(node, dev)
9046
9047         if result.offline:
9048           continue
9049         elif result.fail_msg or not result.payload:
9050           return False
9051
9052     return True
9053
9054   def CheckPrereq(self):
9055     """Check prerequisites.
9056
9057     This checks that the instance is in the cluster.
9058
9059     """
9060     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9061     assert instance is not None, \
9062       "Cannot retrieve locked instance %s" % self.instance_name
9063
9064     if instance.disk_template != constants.DT_DRBD8:
9065       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9066                                  " instances", errors.ECODE_INVAL)
9067
9068     if len(instance.secondary_nodes) != 1:
9069       raise errors.OpPrereqError("The instance has a strange layout,"
9070                                  " expected one secondary but found %d" %
9071                                  len(instance.secondary_nodes),
9072                                  errors.ECODE_FAULT)
9073
9074     if not self.delay_iallocator:
9075       self._CheckPrereq2()
9076
9077   def _CheckPrereq2(self):
9078     """Check prerequisites, second part.
9079
9080     This function should always be part of CheckPrereq. It was separated and is
9081     now called from Exec because during node evacuation iallocator was only
9082     called with an unmodified cluster model, not taking planned changes into
9083     account.
9084
9085     """
9086     instance = self.instance
9087     secondary_node = instance.secondary_nodes[0]
9088
9089     if self.iallocator_name is None:
9090       remote_node = self.remote_node
9091     else:
9092       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9093                                        instance.name, instance.secondary_nodes)
9094
9095     if remote_node is None:
9096       self.remote_node_info = None
9097     else:
9098       assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9099              "Remote node '%s' is not locked" % remote_node
9100
9101       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9102       assert self.remote_node_info is not None, \
9103         "Cannot retrieve locked node %s" % remote_node
9104
9105     if remote_node == self.instance.primary_node:
9106       raise errors.OpPrereqError("The specified node is the primary node of"
9107                                  " the instance", errors.ECODE_INVAL)
9108
9109     if remote_node == secondary_node:
9110       raise errors.OpPrereqError("The specified node is already the"
9111                                  " secondary node of the instance",
9112                                  errors.ECODE_INVAL)
9113
9114     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9115                                     constants.REPLACE_DISK_CHG):
9116       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9117                                  errors.ECODE_INVAL)
9118
9119     if self.mode == constants.REPLACE_DISK_AUTO:
9120       if not self._CheckDisksActivated(instance):
9121         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9122                                    " first" % self.instance_name,
9123                                    errors.ECODE_STATE)
9124       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9125       faulty_secondary = self._FindFaultyDisks(secondary_node)
9126
9127       if faulty_primary and faulty_secondary:
9128         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9129                                    " one node and can not be repaired"
9130                                    " automatically" % self.instance_name,
9131                                    errors.ECODE_STATE)
9132
9133       if faulty_primary:
9134         self.disks = faulty_primary
9135         self.target_node = instance.primary_node
9136         self.other_node = secondary_node
9137         check_nodes = [self.target_node, self.other_node]
9138       elif faulty_secondary:
9139         self.disks = faulty_secondary
9140         self.target_node = secondary_node
9141         self.other_node = instance.primary_node
9142         check_nodes = [self.target_node, self.other_node]
9143       else:
9144         self.disks = []
9145         check_nodes = []
9146
9147     else:
9148       # Non-automatic modes
9149       if self.mode == constants.REPLACE_DISK_PRI:
9150         self.target_node = instance.primary_node
9151         self.other_node = secondary_node
9152         check_nodes = [self.target_node, self.other_node]
9153
9154       elif self.mode == constants.REPLACE_DISK_SEC:
9155         self.target_node = secondary_node
9156         self.other_node = instance.primary_node
9157         check_nodes = [self.target_node, self.other_node]
9158
9159       elif self.mode == constants.REPLACE_DISK_CHG:
9160         self.new_node = remote_node
9161         self.other_node = instance.primary_node
9162         self.target_node = secondary_node
9163         check_nodes = [self.new_node, self.other_node]
9164
9165         _CheckNodeNotDrained(self.lu, remote_node)
9166         _CheckNodeVmCapable(self.lu, remote_node)
9167
9168         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9169         assert old_node_info is not None
9170         if old_node_info.offline and not self.early_release:
9171           # doesn't make sense to delay the release
9172           self.early_release = True
9173           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9174                           " early-release mode", secondary_node)
9175
9176       else:
9177         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9178                                      self.mode)
9179
9180       # If not specified all disks should be replaced
9181       if not self.disks:
9182         self.disks = range(len(self.instance.disks))
9183
9184     for node in check_nodes:
9185       _CheckNodeOnline(self.lu, node)
9186
9187     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9188                                                           self.other_node,
9189                                                           self.target_node]
9190                               if node_name is not None)
9191
9192     # Release unneeded node locks
9193     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9194
9195     # Release any owned node group
9196     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9197       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9198
9199     # Check whether disks are valid
9200     for disk_idx in self.disks:
9201       instance.FindDisk(disk_idx)
9202
9203     # Get secondary node IP addresses
9204     self.node_secondary_ip = \
9205       dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9206            for node_name in touched_nodes)
9207
9208   def Exec(self, feedback_fn):
9209     """Execute disk replacement.
9210
9211     This dispatches the disk replacement to the appropriate handler.
9212
9213     """
9214     if self.delay_iallocator:
9215       self._CheckPrereq2()
9216
9217     if __debug__:
9218       # Verify owned locks before starting operation
9219       owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9220       assert set(owned_locks) == set(self.node_secondary_ip), \
9221           ("Incorrect node locks, owning %s, expected %s" %
9222            (owned_locks, self.node_secondary_ip.keys()))
9223
9224       owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9225       assert list(owned_locks) == [self.instance_name], \
9226           "Instance '%s' not locked" % self.instance_name
9227
9228       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9229           "Should not own any node group lock at this point"
9230
9231     if not self.disks:
9232       feedback_fn("No disks need replacement")
9233       return
9234
9235     feedback_fn("Replacing disk(s) %s for %s" %
9236                 (utils.CommaJoin(self.disks), self.instance.name))
9237
9238     activate_disks = (not self.instance.admin_up)
9239
9240     # Activate the instance disks if we're replacing them on a down instance
9241     if activate_disks:
9242       _StartInstanceDisks(self.lu, self.instance, True)
9243
9244     try:
9245       # Should we replace the secondary node?
9246       if self.new_node is not None:
9247         fn = self._ExecDrbd8Secondary
9248       else:
9249         fn = self._ExecDrbd8DiskOnly
9250
9251       result = fn(feedback_fn)
9252     finally:
9253       # Deactivate the instance disks if we're replacing them on a
9254       # down instance
9255       if activate_disks:
9256         _SafeShutdownInstanceDisks(self.lu, self.instance)
9257
9258     if __debug__:
9259       # Verify owned locks
9260       owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9261       nodes = frozenset(self.node_secondary_ip)
9262       assert ((self.early_release and not owned_locks) or
9263               (not self.early_release and not (set(owned_locks) - nodes))), \
9264         ("Not owning the correct locks, early_release=%s, owned=%r,"
9265          " nodes=%r" % (self.early_release, owned_locks, nodes))
9266
9267     return result
9268
9269   def _CheckVolumeGroup(self, nodes):
9270     self.lu.LogInfo("Checking volume groups")
9271
9272     vgname = self.cfg.GetVGName()
9273
9274     # Make sure volume group exists on all involved nodes
9275     results = self.rpc.call_vg_list(nodes)
9276     if not results:
9277       raise errors.OpExecError("Can't list volume groups on the nodes")
9278
9279     for node in nodes:
9280       res = results[node]
9281       res.Raise("Error checking node %s" % node)
9282       if vgname not in res.payload:
9283         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9284                                  (vgname, node))
9285
9286   def _CheckDisksExistence(self, nodes):
9287     # Check disk existence
9288     for idx, dev in enumerate(self.instance.disks):
9289       if idx not in self.disks:
9290         continue
9291
9292       for node in nodes:
9293         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9294         self.cfg.SetDiskID(dev, node)
9295
9296         result = self.rpc.call_blockdev_find(node, dev)
9297
9298         msg = result.fail_msg
9299         if msg or not result.payload:
9300           if not msg:
9301             msg = "disk not found"
9302           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9303                                    (idx, node, msg))
9304
9305   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9306     for idx, dev in enumerate(self.instance.disks):
9307       if idx not in self.disks:
9308         continue
9309
9310       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9311                       (idx, node_name))
9312
9313       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9314                                    ldisk=ldisk):
9315         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9316                                  " replace disks for instance %s" %
9317                                  (node_name, self.instance.name))
9318
9319   def _CreateNewStorage(self, node_name):
9320     iv_names = {}
9321
9322     for idx, dev in enumerate(self.instance.disks):
9323       if idx not in self.disks:
9324         continue
9325
9326       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9327
9328       self.cfg.SetDiskID(dev, node_name)
9329
9330       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9331       names = _GenerateUniqueNames(self.lu, lv_names)
9332
9333       vg_data = dev.children[0].logical_id[0]
9334       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9335                              logical_id=(vg_data, names[0]))
9336       vg_meta = dev.children[1].logical_id[0]
9337       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9338                              logical_id=(vg_meta, names[1]))
9339
9340       new_lvs = [lv_data, lv_meta]
9341       old_lvs = dev.children
9342       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9343
9344       # we pass force_create=True to force the LVM creation
9345       for new_lv in new_lvs:
9346         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9347                         _GetInstanceInfoText(self.instance), False)
9348
9349     return iv_names
9350
9351   def _CheckDevices(self, node_name, iv_names):
9352     for name, (dev, _, _) in iv_names.iteritems():
9353       self.cfg.SetDiskID(dev, node_name)
9354
9355       result = self.rpc.call_blockdev_find(node_name, dev)
9356
9357       msg = result.fail_msg
9358       if msg or not result.payload:
9359         if not msg:
9360           msg = "disk not found"
9361         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9362                                  (name, msg))
9363
9364       if result.payload.is_degraded:
9365         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9366
9367   def _RemoveOldStorage(self, node_name, iv_names):
9368     for name, (_, old_lvs, _) in iv_names.iteritems():
9369       self.lu.LogInfo("Remove logical volumes for %s" % name)
9370
9371       for lv in old_lvs:
9372         self.cfg.SetDiskID(lv, node_name)
9373
9374         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9375         if msg:
9376           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9377                              hint="remove unused LVs manually")
9378
9379   def _ExecDrbd8DiskOnly(self, feedback_fn):
9380     """Replace a disk on the primary or secondary for DRBD 8.
9381
9382     The algorithm for replace is quite complicated:
9383
9384       1. for each disk to be replaced:
9385
9386         1. create new LVs on the target node with unique names
9387         1. detach old LVs from the drbd device
9388         1. rename old LVs to name_replaced.<time_t>
9389         1. rename new LVs to old LVs
9390         1. attach the new LVs (with the old names now) to the drbd device
9391
9392       1. wait for sync across all devices
9393
9394       1. for each modified disk:
9395
9396         1. remove old LVs (which have the name name_replaces.<time_t>)
9397
9398     Failures are not very well handled.
9399
9400     """
9401     steps_total = 6
9402
9403     # Step: check device activation
9404     self.lu.LogStep(1, steps_total, "Check device existence")
9405     self._CheckDisksExistence([self.other_node, self.target_node])
9406     self._CheckVolumeGroup([self.target_node, self.other_node])
9407
9408     # Step: check other node consistency
9409     self.lu.LogStep(2, steps_total, "Check peer consistency")
9410     self._CheckDisksConsistency(self.other_node,
9411                                 self.other_node == self.instance.primary_node,
9412                                 False)
9413
9414     # Step: create new storage
9415     self.lu.LogStep(3, steps_total, "Allocate new storage")
9416     iv_names = self._CreateNewStorage(self.target_node)
9417
9418     # Step: for each lv, detach+rename*2+attach
9419     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9420     for dev, old_lvs, new_lvs in iv_names.itervalues():
9421       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9422
9423       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9424                                                      old_lvs)
9425       result.Raise("Can't detach drbd from local storage on node"
9426                    " %s for device %s" % (self.target_node, dev.iv_name))
9427       #dev.children = []
9428       #cfg.Update(instance)
9429
9430       # ok, we created the new LVs, so now we know we have the needed
9431       # storage; as such, we proceed on the target node to rename
9432       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9433       # using the assumption that logical_id == physical_id (which in
9434       # turn is the unique_id on that node)
9435
9436       # FIXME(iustin): use a better name for the replaced LVs
9437       temp_suffix = int(time.time())
9438       ren_fn = lambda d, suff: (d.physical_id[0],
9439                                 d.physical_id[1] + "_replaced-%s" % suff)
9440
9441       # Build the rename list based on what LVs exist on the node
9442       rename_old_to_new = []
9443       for to_ren in old_lvs:
9444         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9445         if not result.fail_msg and result.payload:
9446           # device exists
9447           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9448
9449       self.lu.LogInfo("Renaming the old LVs on the target node")
9450       result = self.rpc.call_blockdev_rename(self.target_node,
9451                                              rename_old_to_new)
9452       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9453
9454       # Now we rename the new LVs to the old LVs
9455       self.lu.LogInfo("Renaming the new LVs on the target node")
9456       rename_new_to_old = [(new, old.physical_id)
9457                            for old, new in zip(old_lvs, new_lvs)]
9458       result = self.rpc.call_blockdev_rename(self.target_node,
9459                                              rename_new_to_old)
9460       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9461
9462       for old, new in zip(old_lvs, new_lvs):
9463         new.logical_id = old.logical_id
9464         self.cfg.SetDiskID(new, self.target_node)
9465
9466       for disk in old_lvs:
9467         disk.logical_id = ren_fn(disk, temp_suffix)
9468         self.cfg.SetDiskID(disk, self.target_node)
9469
9470       # Now that the new lvs have the old name, we can add them to the device
9471       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9472       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9473                                                   new_lvs)
9474       msg = result.fail_msg
9475       if msg:
9476         for new_lv in new_lvs:
9477           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9478                                                new_lv).fail_msg
9479           if msg2:
9480             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9481                                hint=("cleanup manually the unused logical"
9482                                      "volumes"))
9483         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9484
9485       dev.children = new_lvs
9486
9487       self.cfg.Update(self.instance, feedback_fn)
9488
9489     cstep = 5
9490     if self.early_release:
9491       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9492       cstep += 1
9493       self._RemoveOldStorage(self.target_node, iv_names)
9494       # WARNING: we release both node locks here, do not do other RPCs
9495       # than WaitForSync to the primary node
9496       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9497                     names=[self.target_node, self.other_node])
9498
9499     # Wait for sync
9500     # This can fail as the old devices are degraded and _WaitForSync
9501     # does a combined result over all disks, so we don't check its return value
9502     self.lu.LogStep(cstep, steps_total, "Sync devices")
9503     cstep += 1
9504     _WaitForSync(self.lu, self.instance)
9505
9506     # Check all devices manually
9507     self._CheckDevices(self.instance.primary_node, iv_names)
9508
9509     # Step: remove old storage
9510     if not self.early_release:
9511       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9512       cstep += 1
9513       self._RemoveOldStorage(self.target_node, iv_names)
9514
9515   def _ExecDrbd8Secondary(self, feedback_fn):
9516     """Replace the secondary node for DRBD 8.
9517
9518     The algorithm for replace is quite complicated:
9519       - for all disks of the instance:
9520         - create new LVs on the new node with same names
9521         - shutdown the drbd device on the old secondary
9522         - disconnect the drbd network on the primary
9523         - create the drbd device on the new secondary
9524         - network attach the drbd on the primary, using an artifice:
9525           the drbd code for Attach() will connect to the network if it
9526           finds a device which is connected to the good local disks but
9527           not network enabled
9528       - wait for sync across all devices
9529       - remove all disks from the old secondary
9530
9531     Failures are not very well handled.
9532
9533     """
9534     steps_total = 6
9535
9536     # Step: check device activation
9537     self.lu.LogStep(1, steps_total, "Check device existence")
9538     self._CheckDisksExistence([self.instance.primary_node])
9539     self._CheckVolumeGroup([self.instance.primary_node])
9540
9541     # Step: check other node consistency
9542     self.lu.LogStep(2, steps_total, "Check peer consistency")
9543     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9544
9545     # Step: create new storage
9546     self.lu.LogStep(3, steps_total, "Allocate new storage")
9547     for idx, dev in enumerate(self.instance.disks):
9548       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9549                       (self.new_node, idx))
9550       # we pass force_create=True to force LVM creation
9551       for new_lv in dev.children:
9552         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9553                         _GetInstanceInfoText(self.instance), False)
9554
9555     # Step 4: dbrd minors and drbd setups changes
9556     # after this, we must manually remove the drbd minors on both the
9557     # error and the success paths
9558     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9559     minors = self.cfg.AllocateDRBDMinor([self.new_node
9560                                          for dev in self.instance.disks],
9561                                         self.instance.name)
9562     logging.debug("Allocated minors %r", minors)
9563
9564     iv_names = {}
9565     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9566       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9567                       (self.new_node, idx))
9568       # create new devices on new_node; note that we create two IDs:
9569       # one without port, so the drbd will be activated without
9570       # networking information on the new node at this stage, and one
9571       # with network, for the latter activation in step 4
9572       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9573       if self.instance.primary_node == o_node1:
9574         p_minor = o_minor1
9575       else:
9576         assert self.instance.primary_node == o_node2, "Three-node instance?"
9577         p_minor = o_minor2
9578
9579       new_alone_id = (self.instance.primary_node, self.new_node, None,
9580                       p_minor, new_minor, o_secret)
9581       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9582                     p_minor, new_minor, o_secret)
9583
9584       iv_names[idx] = (dev, dev.children, new_net_id)
9585       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9586                     new_net_id)
9587       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9588                               logical_id=new_alone_id,
9589                               children=dev.children,
9590                               size=dev.size)
9591       try:
9592         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9593                               _GetInstanceInfoText(self.instance), False)
9594       except errors.GenericError:
9595         self.cfg.ReleaseDRBDMinors(self.instance.name)
9596         raise
9597
9598     # We have new devices, shutdown the drbd on the old secondary
9599     for idx, dev in enumerate(self.instance.disks):
9600       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9601       self.cfg.SetDiskID(dev, self.target_node)
9602       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9603       if msg:
9604         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9605                            "node: %s" % (idx, msg),
9606                            hint=("Please cleanup this device manually as"
9607                                  " soon as possible"))
9608
9609     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9610     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9611                                                self.node_secondary_ip,
9612                                                self.instance.disks)\
9613                                               [self.instance.primary_node]
9614
9615     msg = result.fail_msg
9616     if msg:
9617       # detaches didn't succeed (unlikely)
9618       self.cfg.ReleaseDRBDMinors(self.instance.name)
9619       raise errors.OpExecError("Can't detach the disks from the network on"
9620                                " old node: %s" % (msg,))
9621
9622     # if we managed to detach at least one, we update all the disks of
9623     # the instance to point to the new secondary
9624     self.lu.LogInfo("Updating instance configuration")
9625     for dev, _, new_logical_id in iv_names.itervalues():
9626       dev.logical_id = new_logical_id
9627       self.cfg.SetDiskID(dev, self.instance.primary_node)
9628
9629     self.cfg.Update(self.instance, feedback_fn)
9630
9631     # and now perform the drbd attach
9632     self.lu.LogInfo("Attaching primary drbds to new secondary"
9633                     " (standalone => connected)")
9634     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9635                                             self.new_node],
9636                                            self.node_secondary_ip,
9637                                            self.instance.disks,
9638                                            self.instance.name,
9639                                            False)
9640     for to_node, to_result in result.items():
9641       msg = to_result.fail_msg
9642       if msg:
9643         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9644                            to_node, msg,
9645                            hint=("please do a gnt-instance info to see the"
9646                                  " status of disks"))
9647     cstep = 5
9648     if self.early_release:
9649       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9650       cstep += 1
9651       self._RemoveOldStorage(self.target_node, iv_names)
9652       # WARNING: we release all node locks here, do not do other RPCs
9653       # than WaitForSync to the primary node
9654       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9655                     names=[self.instance.primary_node,
9656                            self.target_node,
9657                            self.new_node])
9658
9659     # Wait for sync
9660     # This can fail as the old devices are degraded and _WaitForSync
9661     # does a combined result over all disks, so we don't check its return value
9662     self.lu.LogStep(cstep, steps_total, "Sync devices")
9663     cstep += 1
9664     _WaitForSync(self.lu, self.instance)
9665
9666     # Check all devices manually
9667     self._CheckDevices(self.instance.primary_node, iv_names)
9668
9669     # Step: remove old storage
9670     if not self.early_release:
9671       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9672       self._RemoveOldStorage(self.target_node, iv_names)
9673
9674
9675 class LURepairNodeStorage(NoHooksLU):
9676   """Repairs the volume group on a node.
9677
9678   """
9679   REQ_BGL = False
9680
9681   def CheckArguments(self):
9682     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9683
9684     storage_type = self.op.storage_type
9685
9686     if (constants.SO_FIX_CONSISTENCY not in
9687         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9688       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9689                                  " repaired" % storage_type,
9690                                  errors.ECODE_INVAL)
9691
9692   def ExpandNames(self):
9693     self.needed_locks = {
9694       locking.LEVEL_NODE: [self.op.node_name],
9695       }
9696
9697   def _CheckFaultyDisks(self, instance, node_name):
9698     """Ensure faulty disks abort the opcode or at least warn."""
9699     try:
9700       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9701                                   node_name, True):
9702         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9703                                    " node '%s'" % (instance.name, node_name),
9704                                    errors.ECODE_STATE)
9705     except errors.OpPrereqError, err:
9706       if self.op.ignore_consistency:
9707         self.proc.LogWarning(str(err.args[0]))
9708       else:
9709         raise
9710
9711   def CheckPrereq(self):
9712     """Check prerequisites.
9713
9714     """
9715     # Check whether any instance on this node has faulty disks
9716     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9717       if not inst.admin_up:
9718         continue
9719       check_nodes = set(inst.all_nodes)
9720       check_nodes.discard(self.op.node_name)
9721       for inst_node_name in check_nodes:
9722         self._CheckFaultyDisks(inst, inst_node_name)
9723
9724   def Exec(self, feedback_fn):
9725     feedback_fn("Repairing storage unit '%s' on %s ..." %
9726                 (self.op.name, self.op.node_name))
9727
9728     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9729     result = self.rpc.call_storage_execute(self.op.node_name,
9730                                            self.op.storage_type, st_args,
9731                                            self.op.name,
9732                                            constants.SO_FIX_CONSISTENCY)
9733     result.Raise("Failed to repair storage unit '%s' on %s" %
9734                  (self.op.name, self.op.node_name))
9735
9736
9737 class LUNodeEvacStrategy(NoHooksLU):
9738   """Computes the node evacuation strategy.
9739
9740   """
9741   REQ_BGL = False
9742
9743   def CheckArguments(self):
9744     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9745
9746   def ExpandNames(self):
9747     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9748     self.needed_locks = locks = {}
9749     if self.op.remote_node is None:
9750       locks[locking.LEVEL_NODE] = locking.ALL_SET
9751     else:
9752       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9753       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9754
9755   def Exec(self, feedback_fn):
9756     instances = []
9757     for node in self.op.nodes:
9758       instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9759     if not instances:
9760       return []
9761
9762     if self.op.remote_node is not None:
9763       result = []
9764       for i in instances:
9765         if i.primary_node == self.op.remote_node:
9766           raise errors.OpPrereqError("Node %s is the primary node of"
9767                                      " instance %s, cannot use it as"
9768                                      " secondary" %
9769                                      (self.op.remote_node, i.name),
9770                                      errors.ECODE_INVAL)
9771         result.append([i.name, self.op.remote_node])
9772     else:
9773       ial = IAllocator(self.cfg, self.rpc,
9774                        mode=constants.IALLOCATOR_MODE_MEVAC,
9775                        evac_nodes=self.op.nodes)
9776       ial.Run(self.op.iallocator, validate=True)
9777       if not ial.success:
9778         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9779                                  errors.ECODE_NORES)
9780       result = ial.result
9781     return result
9782
9783
9784 class LUInstanceGrowDisk(LogicalUnit):
9785   """Grow a disk of an instance.
9786
9787   """
9788   HPATH = "disk-grow"
9789   HTYPE = constants.HTYPE_INSTANCE
9790   REQ_BGL = False
9791
9792   def ExpandNames(self):
9793     self._ExpandAndLockInstance()
9794     self.needed_locks[locking.LEVEL_NODE] = []
9795     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9796
9797   def DeclareLocks(self, level):
9798     if level == locking.LEVEL_NODE:
9799       self._LockInstancesNodes()
9800
9801   def BuildHooksEnv(self):
9802     """Build hooks env.
9803
9804     This runs on the master, the primary and all the secondaries.
9805
9806     """
9807     env = {
9808       "DISK": self.op.disk,
9809       "AMOUNT": self.op.amount,
9810       }
9811     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9812     return env
9813
9814   def BuildHooksNodes(self):
9815     """Build hooks nodes.
9816
9817     """
9818     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9819     return (nl, nl)
9820
9821   def CheckPrereq(self):
9822     """Check prerequisites.
9823
9824     This checks that the instance is in the cluster.
9825
9826     """
9827     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9828     assert instance is not None, \
9829       "Cannot retrieve locked instance %s" % self.op.instance_name
9830     nodenames = list(instance.all_nodes)
9831     for node in nodenames:
9832       _CheckNodeOnline(self, node)
9833
9834     self.instance = instance
9835
9836     if instance.disk_template not in constants.DTS_GROWABLE:
9837       raise errors.OpPrereqError("Instance's disk layout does not support"
9838                                  " growing", errors.ECODE_INVAL)
9839
9840     self.disk = instance.FindDisk(self.op.disk)
9841
9842     if instance.disk_template not in (constants.DT_FILE,
9843                                       constants.DT_SHARED_FILE):
9844       # TODO: check the free disk space for file, when that feature will be
9845       # supported
9846       _CheckNodesFreeDiskPerVG(self, nodenames,
9847                                self.disk.ComputeGrowth(self.op.amount))
9848
9849   def Exec(self, feedback_fn):
9850     """Execute disk grow.
9851
9852     """
9853     instance = self.instance
9854     disk = self.disk
9855
9856     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9857     if not disks_ok:
9858       raise errors.OpExecError("Cannot activate block device to grow")
9859
9860     # First run all grow ops in dry-run mode
9861     for node in instance.all_nodes:
9862       self.cfg.SetDiskID(disk, node)
9863       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
9864       result.Raise("Grow request failed to node %s" % node)
9865
9866     # We know that (as far as we can test) operations across different
9867     # nodes will succeed, time to run it for real
9868     for node in instance.all_nodes:
9869       self.cfg.SetDiskID(disk, node)
9870       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
9871       result.Raise("Grow request failed to node %s" % node)
9872
9873       # TODO: Rewrite code to work properly
9874       # DRBD goes into sync mode for a short amount of time after executing the
9875       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9876       # calling "resize" in sync mode fails. Sleeping for a short amount of
9877       # time is a work-around.
9878       time.sleep(5)
9879
9880     disk.RecordGrow(self.op.amount)
9881     self.cfg.Update(instance, feedback_fn)
9882     if self.op.wait_for_sync:
9883       disk_abort = not _WaitForSync(self, instance, disks=[disk])
9884       if disk_abort:
9885         self.proc.LogWarning("Disk sync-ing has not returned a good"
9886                              " status; please check the instance")
9887       if not instance.admin_up:
9888         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9889     elif not instance.admin_up:
9890       self.proc.LogWarning("Not shutting down the disk even if the instance is"
9891                            " not supposed to be running because no wait for"
9892                            " sync mode was requested")
9893
9894
9895 class LUInstanceQueryData(NoHooksLU):
9896   """Query runtime instance data.
9897
9898   """
9899   REQ_BGL = False
9900
9901   def ExpandNames(self):
9902     self.needed_locks = {}
9903
9904     # Use locking if requested or when non-static information is wanted
9905     if not (self.op.static or self.op.use_locking):
9906       self.LogWarning("Non-static data requested, locks need to be acquired")
9907       self.op.use_locking = True
9908
9909     if self.op.instances or not self.op.use_locking:
9910       # Expand instance names right here
9911       self.wanted_names = _GetWantedInstances(self, self.op.instances)
9912     else:
9913       # Will use acquired locks
9914       self.wanted_names = None
9915
9916     if self.op.use_locking:
9917       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9918
9919       if self.wanted_names is None:
9920         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9921       else:
9922         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9923
9924       self.needed_locks[locking.LEVEL_NODE] = []
9925       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9926       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9927
9928   def DeclareLocks(self, level):
9929     if self.op.use_locking and level == locking.LEVEL_NODE:
9930       self._LockInstancesNodes()
9931
9932   def CheckPrereq(self):
9933     """Check prerequisites.
9934
9935     This only checks the optional instance list against the existing names.
9936
9937     """
9938     if self.wanted_names is None:
9939       assert self.op.use_locking, "Locking was not used"
9940       self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9941
9942     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9943                              for name in self.wanted_names]
9944
9945   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9946     """Returns the status of a block device
9947
9948     """
9949     if self.op.static or not node:
9950       return None
9951
9952     self.cfg.SetDiskID(dev, node)
9953
9954     result = self.rpc.call_blockdev_find(node, dev)
9955     if result.offline:
9956       return None
9957
9958     result.Raise("Can't compute disk status for %s" % instance_name)
9959
9960     status = result.payload
9961     if status is None:
9962       return None
9963
9964     return (status.dev_path, status.major, status.minor,
9965             status.sync_percent, status.estimated_time,
9966             status.is_degraded, status.ldisk_status)
9967
9968   def _ComputeDiskStatus(self, instance, snode, dev):
9969     """Compute block device status.
9970
9971     """
9972     if dev.dev_type in constants.LDS_DRBD:
9973       # we change the snode then (otherwise we use the one passed in)
9974       if dev.logical_id[0] == instance.primary_node:
9975         snode = dev.logical_id[1]
9976       else:
9977         snode = dev.logical_id[0]
9978
9979     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9980                                               instance.name, dev)
9981     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9982
9983     if dev.children:
9984       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9985                       for child in dev.children]
9986     else:
9987       dev_children = []
9988
9989     return {
9990       "iv_name": dev.iv_name,
9991       "dev_type": dev.dev_type,
9992       "logical_id": dev.logical_id,
9993       "physical_id": dev.physical_id,
9994       "pstatus": dev_pstatus,
9995       "sstatus": dev_sstatus,
9996       "children": dev_children,
9997       "mode": dev.mode,
9998       "size": dev.size,
9999       }
10000
10001   def Exec(self, feedback_fn):
10002     """Gather and return data"""
10003     result = {}
10004
10005     cluster = self.cfg.GetClusterInfo()
10006
10007     for instance in self.wanted_instances:
10008       if not self.op.static:
10009         remote_info = self.rpc.call_instance_info(instance.primary_node,
10010                                                   instance.name,
10011                                                   instance.hypervisor)
10012         remote_info.Raise("Error checking node %s" % instance.primary_node)
10013         remote_info = remote_info.payload
10014         if remote_info and "state" in remote_info:
10015           remote_state = "up"
10016         else:
10017           remote_state = "down"
10018       else:
10019         remote_state = None
10020       if instance.admin_up:
10021         config_state = "up"
10022       else:
10023         config_state = "down"
10024
10025       disks = [self._ComputeDiskStatus(instance, None, device)
10026                for device in instance.disks]
10027
10028       result[instance.name] = {
10029         "name": instance.name,
10030         "config_state": config_state,
10031         "run_state": remote_state,
10032         "pnode": instance.primary_node,
10033         "snodes": instance.secondary_nodes,
10034         "os": instance.os,
10035         # this happens to be the same format used for hooks
10036         "nics": _NICListToTuple(self, instance.nics),
10037         "disk_template": instance.disk_template,
10038         "disks": disks,
10039         "hypervisor": instance.hypervisor,
10040         "network_port": instance.network_port,
10041         "hv_instance": instance.hvparams,
10042         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10043         "be_instance": instance.beparams,
10044         "be_actual": cluster.FillBE(instance),
10045         "os_instance": instance.osparams,
10046         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10047         "serial_no": instance.serial_no,
10048         "mtime": instance.mtime,
10049         "ctime": instance.ctime,
10050         "uuid": instance.uuid,
10051         }
10052
10053     return result
10054
10055
10056 class LUInstanceSetParams(LogicalUnit):
10057   """Modifies an instances's parameters.
10058
10059   """
10060   HPATH = "instance-modify"
10061   HTYPE = constants.HTYPE_INSTANCE
10062   REQ_BGL = False
10063
10064   def CheckArguments(self):
10065     if not (self.op.nics or self.op.disks or self.op.disk_template or
10066             self.op.hvparams or self.op.beparams or self.op.os_name):
10067       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10068
10069     if self.op.hvparams:
10070       _CheckGlobalHvParams(self.op.hvparams)
10071
10072     # Disk validation
10073     disk_addremove = 0
10074     for disk_op, disk_dict in self.op.disks:
10075       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10076       if disk_op == constants.DDM_REMOVE:
10077         disk_addremove += 1
10078         continue
10079       elif disk_op == constants.DDM_ADD:
10080         disk_addremove += 1
10081       else:
10082         if not isinstance(disk_op, int):
10083           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10084         if not isinstance(disk_dict, dict):
10085           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10086           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10087
10088       if disk_op == constants.DDM_ADD:
10089         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10090         if mode not in constants.DISK_ACCESS_SET:
10091           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10092                                      errors.ECODE_INVAL)
10093         size = disk_dict.get(constants.IDISK_SIZE, None)
10094         if size is None:
10095           raise errors.OpPrereqError("Required disk parameter size missing",
10096                                      errors.ECODE_INVAL)
10097         try:
10098           size = int(size)
10099         except (TypeError, ValueError), err:
10100           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10101                                      str(err), errors.ECODE_INVAL)
10102         disk_dict[constants.IDISK_SIZE] = size
10103       else:
10104         # modification of disk
10105         if constants.IDISK_SIZE in disk_dict:
10106           raise errors.OpPrereqError("Disk size change not possible, use"
10107                                      " grow-disk", errors.ECODE_INVAL)
10108
10109     if disk_addremove > 1:
10110       raise errors.OpPrereqError("Only one disk add or remove operation"
10111                                  " supported at a time", errors.ECODE_INVAL)
10112
10113     if self.op.disks and self.op.disk_template is not None:
10114       raise errors.OpPrereqError("Disk template conversion and other disk"
10115                                  " changes not supported at the same time",
10116                                  errors.ECODE_INVAL)
10117
10118     if (self.op.disk_template and
10119         self.op.disk_template in constants.DTS_INT_MIRROR and
10120         self.op.remote_node is None):
10121       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10122                                  " one requires specifying a secondary node",
10123                                  errors.ECODE_INVAL)
10124
10125     # NIC validation
10126     nic_addremove = 0
10127     for nic_op, nic_dict in self.op.nics:
10128       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10129       if nic_op == constants.DDM_REMOVE:
10130         nic_addremove += 1
10131         continue
10132       elif nic_op == constants.DDM_ADD:
10133         nic_addremove += 1
10134       else:
10135         if not isinstance(nic_op, int):
10136           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10137         if not isinstance(nic_dict, dict):
10138           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10139           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10140
10141       # nic_dict should be a dict
10142       nic_ip = nic_dict.get(constants.INIC_IP, None)
10143       if nic_ip is not None:
10144         if nic_ip.lower() == constants.VALUE_NONE:
10145           nic_dict[constants.INIC_IP] = None
10146         else:
10147           if not netutils.IPAddress.IsValid(nic_ip):
10148             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10149                                        errors.ECODE_INVAL)
10150
10151       nic_bridge = nic_dict.get('bridge', None)
10152       nic_link = nic_dict.get(constants.INIC_LINK, None)
10153       if nic_bridge and nic_link:
10154         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10155                                    " at the same time", errors.ECODE_INVAL)
10156       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10157         nic_dict['bridge'] = None
10158       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10159         nic_dict[constants.INIC_LINK] = None
10160
10161       if nic_op == constants.DDM_ADD:
10162         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10163         if nic_mac is None:
10164           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10165
10166       if constants.INIC_MAC in nic_dict:
10167         nic_mac = nic_dict[constants.INIC_MAC]
10168         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10169           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10170
10171         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10172           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10173                                      " modifying an existing nic",
10174                                      errors.ECODE_INVAL)
10175
10176     if nic_addremove > 1:
10177       raise errors.OpPrereqError("Only one NIC add or remove operation"
10178                                  " supported at a time", errors.ECODE_INVAL)
10179
10180   def ExpandNames(self):
10181     self._ExpandAndLockInstance()
10182     self.needed_locks[locking.LEVEL_NODE] = []
10183     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10184
10185   def DeclareLocks(self, level):
10186     if level == locking.LEVEL_NODE:
10187       self._LockInstancesNodes()
10188       if self.op.disk_template and self.op.remote_node:
10189         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10190         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10191
10192   def BuildHooksEnv(self):
10193     """Build hooks env.
10194
10195     This runs on the master, primary and secondaries.
10196
10197     """
10198     args = dict()
10199     if constants.BE_MEMORY in self.be_new:
10200       args['memory'] = self.be_new[constants.BE_MEMORY]
10201     if constants.BE_VCPUS in self.be_new:
10202       args['vcpus'] = self.be_new[constants.BE_VCPUS]
10203     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10204     # information at all.
10205     if self.op.nics:
10206       args['nics'] = []
10207       nic_override = dict(self.op.nics)
10208       for idx, nic in enumerate(self.instance.nics):
10209         if idx in nic_override:
10210           this_nic_override = nic_override[idx]
10211         else:
10212           this_nic_override = {}
10213         if constants.INIC_IP in this_nic_override:
10214           ip = this_nic_override[constants.INIC_IP]
10215         else:
10216           ip = nic.ip
10217         if constants.INIC_MAC in this_nic_override:
10218           mac = this_nic_override[constants.INIC_MAC]
10219         else:
10220           mac = nic.mac
10221         if idx in self.nic_pnew:
10222           nicparams = self.nic_pnew[idx]
10223         else:
10224           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10225         mode = nicparams[constants.NIC_MODE]
10226         link = nicparams[constants.NIC_LINK]
10227         args['nics'].append((ip, mac, mode, link))
10228       if constants.DDM_ADD in nic_override:
10229         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10230         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10231         nicparams = self.nic_pnew[constants.DDM_ADD]
10232         mode = nicparams[constants.NIC_MODE]
10233         link = nicparams[constants.NIC_LINK]
10234         args['nics'].append((ip, mac, mode, link))
10235       elif constants.DDM_REMOVE in nic_override:
10236         del args['nics'][-1]
10237
10238     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10239     if self.op.disk_template:
10240       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10241
10242     return env
10243
10244   def BuildHooksNodes(self):
10245     """Build hooks nodes.
10246
10247     """
10248     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10249     return (nl, nl)
10250
10251   def CheckPrereq(self):
10252     """Check prerequisites.
10253
10254     This only checks the instance list against the existing names.
10255
10256     """
10257     # checking the new params on the primary/secondary nodes
10258
10259     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10260     cluster = self.cluster = self.cfg.GetClusterInfo()
10261     assert self.instance is not None, \
10262       "Cannot retrieve locked instance %s" % self.op.instance_name
10263     pnode = instance.primary_node
10264     nodelist = list(instance.all_nodes)
10265
10266     # OS change
10267     if self.op.os_name and not self.op.force:
10268       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10269                       self.op.force_variant)
10270       instance_os = self.op.os_name
10271     else:
10272       instance_os = instance.os
10273
10274     if self.op.disk_template:
10275       if instance.disk_template == self.op.disk_template:
10276         raise errors.OpPrereqError("Instance already has disk template %s" %
10277                                    instance.disk_template, errors.ECODE_INVAL)
10278
10279       if (instance.disk_template,
10280           self.op.disk_template) not in self._DISK_CONVERSIONS:
10281         raise errors.OpPrereqError("Unsupported disk template conversion from"
10282                                    " %s to %s" % (instance.disk_template,
10283                                                   self.op.disk_template),
10284                                    errors.ECODE_INVAL)
10285       _CheckInstanceDown(self, instance, "cannot change disk template")
10286       if self.op.disk_template in constants.DTS_INT_MIRROR:
10287         if self.op.remote_node == pnode:
10288           raise errors.OpPrereqError("Given new secondary node %s is the same"
10289                                      " as the primary node of the instance" %
10290                                      self.op.remote_node, errors.ECODE_STATE)
10291         _CheckNodeOnline(self, self.op.remote_node)
10292         _CheckNodeNotDrained(self, self.op.remote_node)
10293         # FIXME: here we assume that the old instance type is DT_PLAIN
10294         assert instance.disk_template == constants.DT_PLAIN
10295         disks = [{constants.IDISK_SIZE: d.size,
10296                   constants.IDISK_VG: d.logical_id[0]}
10297                  for d in instance.disks]
10298         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10299         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10300
10301     # hvparams processing
10302     if self.op.hvparams:
10303       hv_type = instance.hypervisor
10304       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10305       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10306       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10307
10308       # local check
10309       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10310       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10311       self.hv_new = hv_new # the new actual values
10312       self.hv_inst = i_hvdict # the new dict (without defaults)
10313     else:
10314       self.hv_new = self.hv_inst = {}
10315
10316     # beparams processing
10317     if self.op.beparams:
10318       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10319                                    use_none=True)
10320       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10321       be_new = cluster.SimpleFillBE(i_bedict)
10322       self.be_new = be_new # the new actual values
10323       self.be_inst = i_bedict # the new dict (without defaults)
10324     else:
10325       self.be_new = self.be_inst = {}
10326     be_old = cluster.FillBE(instance)
10327
10328     # osparams processing
10329     if self.op.osparams:
10330       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10331       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10332       self.os_inst = i_osdict # the new dict (without defaults)
10333     else:
10334       self.os_inst = {}
10335
10336     self.warn = []
10337
10338     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10339         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10340       mem_check_list = [pnode]
10341       if be_new[constants.BE_AUTO_BALANCE]:
10342         # either we changed auto_balance to yes or it was from before
10343         mem_check_list.extend(instance.secondary_nodes)
10344       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10345                                                   instance.hypervisor)
10346       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10347                                          instance.hypervisor)
10348       pninfo = nodeinfo[pnode]
10349       msg = pninfo.fail_msg
10350       if msg:
10351         # Assume the primary node is unreachable and go ahead
10352         self.warn.append("Can't get info from primary node %s: %s" %
10353                          (pnode,  msg))
10354       elif not isinstance(pninfo.payload.get('memory_free', None), int):
10355         self.warn.append("Node data from primary node %s doesn't contain"
10356                          " free memory information" % pnode)
10357       elif instance_info.fail_msg:
10358         self.warn.append("Can't get instance runtime information: %s" %
10359                         instance_info.fail_msg)
10360       else:
10361         if instance_info.payload:
10362           current_mem = int(instance_info.payload['memory'])
10363         else:
10364           # Assume instance not running
10365           # (there is a slight race condition here, but it's not very probable,
10366           # and we have no other way to check)
10367           current_mem = 0
10368         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10369                     pninfo.payload['memory_free'])
10370         if miss_mem > 0:
10371           raise errors.OpPrereqError("This change will prevent the instance"
10372                                      " from starting, due to %d MB of memory"
10373                                      " missing on its primary node" % miss_mem,
10374                                      errors.ECODE_NORES)
10375
10376       if be_new[constants.BE_AUTO_BALANCE]:
10377         for node, nres in nodeinfo.items():
10378           if node not in instance.secondary_nodes:
10379             continue
10380           nres.Raise("Can't get info from secondary node %s" % node,
10381                      prereq=True, ecode=errors.ECODE_STATE)
10382           if not isinstance(nres.payload.get('memory_free', None), int):
10383             raise errors.OpPrereqError("Secondary node %s didn't return free"
10384                                        " memory information" % node,
10385                                        errors.ECODE_STATE)
10386           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10387             raise errors.OpPrereqError("This change will prevent the instance"
10388                                        " from failover to its secondary node"
10389                                        " %s, due to not enough memory" % node,
10390                                        errors.ECODE_STATE)
10391
10392     # NIC processing
10393     self.nic_pnew = {}
10394     self.nic_pinst = {}
10395     for nic_op, nic_dict in self.op.nics:
10396       if nic_op == constants.DDM_REMOVE:
10397         if not instance.nics:
10398           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10399                                      errors.ECODE_INVAL)
10400         continue
10401       if nic_op != constants.DDM_ADD:
10402         # an existing nic
10403         if not instance.nics:
10404           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10405                                      " no NICs" % nic_op,
10406                                      errors.ECODE_INVAL)
10407         if nic_op < 0 or nic_op >= len(instance.nics):
10408           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10409                                      " are 0 to %d" %
10410                                      (nic_op, len(instance.nics) - 1),
10411                                      errors.ECODE_INVAL)
10412         old_nic_params = instance.nics[nic_op].nicparams
10413         old_nic_ip = instance.nics[nic_op].ip
10414       else:
10415         old_nic_params = {}
10416         old_nic_ip = None
10417
10418       update_params_dict = dict([(key, nic_dict[key])
10419                                  for key in constants.NICS_PARAMETERS
10420                                  if key in nic_dict])
10421
10422       if 'bridge' in nic_dict:
10423         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10424
10425       new_nic_params = _GetUpdatedParams(old_nic_params,
10426                                          update_params_dict)
10427       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10428       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10429       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10430       self.nic_pinst[nic_op] = new_nic_params
10431       self.nic_pnew[nic_op] = new_filled_nic_params
10432       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10433
10434       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10435         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10436         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10437         if msg:
10438           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10439           if self.op.force:
10440             self.warn.append(msg)
10441           else:
10442             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10443       if new_nic_mode == constants.NIC_MODE_ROUTED:
10444         if constants.INIC_IP in nic_dict:
10445           nic_ip = nic_dict[constants.INIC_IP]
10446         else:
10447           nic_ip = old_nic_ip
10448         if nic_ip is None:
10449           raise errors.OpPrereqError('Cannot set the nic ip to None'
10450                                      ' on a routed nic', errors.ECODE_INVAL)
10451       if constants.INIC_MAC in nic_dict:
10452         nic_mac = nic_dict[constants.INIC_MAC]
10453         if nic_mac is None:
10454           raise errors.OpPrereqError('Cannot set the nic mac to None',
10455                                      errors.ECODE_INVAL)
10456         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10457           # otherwise generate the mac
10458           nic_dict[constants.INIC_MAC] = \
10459             self.cfg.GenerateMAC(self.proc.GetECId())
10460         else:
10461           # or validate/reserve the current one
10462           try:
10463             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10464           except errors.ReservationError:
10465             raise errors.OpPrereqError("MAC address %s already in use"
10466                                        " in cluster" % nic_mac,
10467                                        errors.ECODE_NOTUNIQUE)
10468
10469     # DISK processing
10470     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10471       raise errors.OpPrereqError("Disk operations not supported for"
10472                                  " diskless instances",
10473                                  errors.ECODE_INVAL)
10474     for disk_op, _ in self.op.disks:
10475       if disk_op == constants.DDM_REMOVE:
10476         if len(instance.disks) == 1:
10477           raise errors.OpPrereqError("Cannot remove the last disk of"
10478                                      " an instance", errors.ECODE_INVAL)
10479         _CheckInstanceDown(self, instance, "cannot remove disks")
10480
10481       if (disk_op == constants.DDM_ADD and
10482           len(instance.disks) >= constants.MAX_DISKS):
10483         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10484                                    " add more" % constants.MAX_DISKS,
10485                                    errors.ECODE_STATE)
10486       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10487         # an existing disk
10488         if disk_op < 0 or disk_op >= len(instance.disks):
10489           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10490                                      " are 0 to %d" %
10491                                      (disk_op, len(instance.disks)),
10492                                      errors.ECODE_INVAL)
10493
10494     return
10495
10496   def _ConvertPlainToDrbd(self, feedback_fn):
10497     """Converts an instance from plain to drbd.
10498
10499     """
10500     feedback_fn("Converting template to drbd")
10501     instance = self.instance
10502     pnode = instance.primary_node
10503     snode = self.op.remote_node
10504
10505     # create a fake disk info for _GenerateDiskTemplate
10506     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10507                   constants.IDISK_VG: d.logical_id[0]}
10508                  for d in instance.disks]
10509     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10510                                       instance.name, pnode, [snode],
10511                                       disk_info, None, None, 0, feedback_fn)
10512     info = _GetInstanceInfoText(instance)
10513     feedback_fn("Creating aditional volumes...")
10514     # first, create the missing data and meta devices
10515     for disk in new_disks:
10516       # unfortunately this is... not too nice
10517       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10518                             info, True)
10519       for child in disk.children:
10520         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10521     # at this stage, all new LVs have been created, we can rename the
10522     # old ones
10523     feedback_fn("Renaming original volumes...")
10524     rename_list = [(o, n.children[0].logical_id)
10525                    for (o, n) in zip(instance.disks, new_disks)]
10526     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10527     result.Raise("Failed to rename original LVs")
10528
10529     feedback_fn("Initializing DRBD devices...")
10530     # all child devices are in place, we can now create the DRBD devices
10531     for disk in new_disks:
10532       for node in [pnode, snode]:
10533         f_create = node == pnode
10534         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10535
10536     # at this point, the instance has been modified
10537     instance.disk_template = constants.DT_DRBD8
10538     instance.disks = new_disks
10539     self.cfg.Update(instance, feedback_fn)
10540
10541     # disks are created, waiting for sync
10542     disk_abort = not _WaitForSync(self, instance,
10543                                   oneshot=not self.op.wait_for_sync)
10544     if disk_abort:
10545       raise errors.OpExecError("There are some degraded disks for"
10546                                " this instance, please cleanup manually")
10547
10548   def _ConvertDrbdToPlain(self, feedback_fn):
10549     """Converts an instance from drbd to plain.
10550
10551     """
10552     instance = self.instance
10553     assert len(instance.secondary_nodes) == 1
10554     pnode = instance.primary_node
10555     snode = instance.secondary_nodes[0]
10556     feedback_fn("Converting template to plain")
10557
10558     old_disks = instance.disks
10559     new_disks = [d.children[0] for d in old_disks]
10560
10561     # copy over size and mode
10562     for parent, child in zip(old_disks, new_disks):
10563       child.size = parent.size
10564       child.mode = parent.mode
10565
10566     # update instance structure
10567     instance.disks = new_disks
10568     instance.disk_template = constants.DT_PLAIN
10569     self.cfg.Update(instance, feedback_fn)
10570
10571     feedback_fn("Removing volumes on the secondary node...")
10572     for disk in old_disks:
10573       self.cfg.SetDiskID(disk, snode)
10574       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10575       if msg:
10576         self.LogWarning("Could not remove block device %s on node %s,"
10577                         " continuing anyway: %s", disk.iv_name, snode, msg)
10578
10579     feedback_fn("Removing unneeded volumes on the primary node...")
10580     for idx, disk in enumerate(old_disks):
10581       meta = disk.children[1]
10582       self.cfg.SetDiskID(meta, pnode)
10583       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10584       if msg:
10585         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10586                         " continuing anyway: %s", idx, pnode, msg)
10587
10588   def Exec(self, feedback_fn):
10589     """Modifies an instance.
10590
10591     All parameters take effect only at the next restart of the instance.
10592
10593     """
10594     # Process here the warnings from CheckPrereq, as we don't have a
10595     # feedback_fn there.
10596     for warn in self.warn:
10597       feedback_fn("WARNING: %s" % warn)
10598
10599     result = []
10600     instance = self.instance
10601     # disk changes
10602     for disk_op, disk_dict in self.op.disks:
10603       if disk_op == constants.DDM_REMOVE:
10604         # remove the last disk
10605         device = instance.disks.pop()
10606         device_idx = len(instance.disks)
10607         for node, disk in device.ComputeNodeTree(instance.primary_node):
10608           self.cfg.SetDiskID(disk, node)
10609           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10610           if msg:
10611             self.LogWarning("Could not remove disk/%d on node %s: %s,"
10612                             " continuing anyway", device_idx, node, msg)
10613         result.append(("disk/%d" % device_idx, "remove"))
10614       elif disk_op == constants.DDM_ADD:
10615         # add a new disk
10616         if instance.disk_template in (constants.DT_FILE,
10617                                         constants.DT_SHARED_FILE):
10618           file_driver, file_path = instance.disks[0].logical_id
10619           file_path = os.path.dirname(file_path)
10620         else:
10621           file_driver = file_path = None
10622         disk_idx_base = len(instance.disks)
10623         new_disk = _GenerateDiskTemplate(self,
10624                                          instance.disk_template,
10625                                          instance.name, instance.primary_node,
10626                                          instance.secondary_nodes,
10627                                          [disk_dict],
10628                                          file_path,
10629                                          file_driver,
10630                                          disk_idx_base, feedback_fn)[0]
10631         instance.disks.append(new_disk)
10632         info = _GetInstanceInfoText(instance)
10633
10634         logging.info("Creating volume %s for instance %s",
10635                      new_disk.iv_name, instance.name)
10636         # Note: this needs to be kept in sync with _CreateDisks
10637         #HARDCODE
10638         for node in instance.all_nodes:
10639           f_create = node == instance.primary_node
10640           try:
10641             _CreateBlockDev(self, node, instance, new_disk,
10642                             f_create, info, f_create)
10643           except errors.OpExecError, err:
10644             self.LogWarning("Failed to create volume %s (%s) on"
10645                             " node %s: %s",
10646                             new_disk.iv_name, new_disk, node, err)
10647         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10648                        (new_disk.size, new_disk.mode)))
10649       else:
10650         # change a given disk
10651         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10652         result.append(("disk.mode/%d" % disk_op,
10653                        disk_dict[constants.IDISK_MODE]))
10654
10655     if self.op.disk_template:
10656       r_shut = _ShutdownInstanceDisks(self, instance)
10657       if not r_shut:
10658         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10659                                  " proceed with disk template conversion")
10660       mode = (instance.disk_template, self.op.disk_template)
10661       try:
10662         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10663       except:
10664         self.cfg.ReleaseDRBDMinors(instance.name)
10665         raise
10666       result.append(("disk_template", self.op.disk_template))
10667
10668     # NIC changes
10669     for nic_op, nic_dict in self.op.nics:
10670       if nic_op == constants.DDM_REMOVE:
10671         # remove the last nic
10672         del instance.nics[-1]
10673         result.append(("nic.%d" % len(instance.nics), "remove"))
10674       elif nic_op == constants.DDM_ADD:
10675         # mac and bridge should be set, by now
10676         mac = nic_dict[constants.INIC_MAC]
10677         ip = nic_dict.get(constants.INIC_IP, None)
10678         nicparams = self.nic_pinst[constants.DDM_ADD]
10679         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10680         instance.nics.append(new_nic)
10681         result.append(("nic.%d" % (len(instance.nics) - 1),
10682                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
10683                        (new_nic.mac, new_nic.ip,
10684                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10685                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10686                        )))
10687       else:
10688         for key in (constants.INIC_MAC, constants.INIC_IP):
10689           if key in nic_dict:
10690             setattr(instance.nics[nic_op], key, nic_dict[key])
10691         if nic_op in self.nic_pinst:
10692           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10693         for key, val in nic_dict.iteritems():
10694           result.append(("nic.%s/%d" % (key, nic_op), val))
10695
10696     # hvparams changes
10697     if self.op.hvparams:
10698       instance.hvparams = self.hv_inst
10699       for key, val in self.op.hvparams.iteritems():
10700         result.append(("hv/%s" % key, val))
10701
10702     # beparams changes
10703     if self.op.beparams:
10704       instance.beparams = self.be_inst
10705       for key, val in self.op.beparams.iteritems():
10706         result.append(("be/%s" % key, val))
10707
10708     # OS change
10709     if self.op.os_name:
10710       instance.os = self.op.os_name
10711
10712     # osparams changes
10713     if self.op.osparams:
10714       instance.osparams = self.os_inst
10715       for key, val in self.op.osparams.iteritems():
10716         result.append(("os/%s" % key, val))
10717
10718     self.cfg.Update(instance, feedback_fn)
10719
10720     return result
10721
10722   _DISK_CONVERSIONS = {
10723     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10724     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10725     }
10726
10727
10728 class LUBackupQuery(NoHooksLU):
10729   """Query the exports list
10730
10731   """
10732   REQ_BGL = False
10733
10734   def ExpandNames(self):
10735     self.needed_locks = {}
10736     self.share_locks[locking.LEVEL_NODE] = 1
10737     if not self.op.nodes:
10738       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10739     else:
10740       self.needed_locks[locking.LEVEL_NODE] = \
10741         _GetWantedNodes(self, self.op.nodes)
10742
10743   def Exec(self, feedback_fn):
10744     """Compute the list of all the exported system images.
10745
10746     @rtype: dict
10747     @return: a dictionary with the structure node->(export-list)
10748         where export-list is a list of the instances exported on
10749         that node.
10750
10751     """
10752     self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10753     rpcresult = self.rpc.call_export_list(self.nodes)
10754     result = {}
10755     for node in rpcresult:
10756       if rpcresult[node].fail_msg:
10757         result[node] = False
10758       else:
10759         result[node] = rpcresult[node].payload
10760
10761     return result
10762
10763
10764 class LUBackupPrepare(NoHooksLU):
10765   """Prepares an instance for an export and returns useful information.
10766
10767   """
10768   REQ_BGL = False
10769
10770   def ExpandNames(self):
10771     self._ExpandAndLockInstance()
10772
10773   def CheckPrereq(self):
10774     """Check prerequisites.
10775
10776     """
10777     instance_name = self.op.instance_name
10778
10779     self.instance = self.cfg.GetInstanceInfo(instance_name)
10780     assert self.instance is not None, \
10781           "Cannot retrieve locked instance %s" % self.op.instance_name
10782     _CheckNodeOnline(self, self.instance.primary_node)
10783
10784     self._cds = _GetClusterDomainSecret()
10785
10786   def Exec(self, feedback_fn):
10787     """Prepares an instance for an export.
10788
10789     """
10790     instance = self.instance
10791
10792     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10793       salt = utils.GenerateSecret(8)
10794
10795       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10796       result = self.rpc.call_x509_cert_create(instance.primary_node,
10797                                               constants.RIE_CERT_VALIDITY)
10798       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10799
10800       (name, cert_pem) = result.payload
10801
10802       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10803                                              cert_pem)
10804
10805       return {
10806         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10807         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10808                           salt),
10809         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10810         }
10811
10812     return None
10813
10814
10815 class LUBackupExport(LogicalUnit):
10816   """Export an instance to an image in the cluster.
10817
10818   """
10819   HPATH = "instance-export"
10820   HTYPE = constants.HTYPE_INSTANCE
10821   REQ_BGL = False
10822
10823   def CheckArguments(self):
10824     """Check the arguments.
10825
10826     """
10827     self.x509_key_name = self.op.x509_key_name
10828     self.dest_x509_ca_pem = self.op.destination_x509_ca
10829
10830     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10831       if not self.x509_key_name:
10832         raise errors.OpPrereqError("Missing X509 key name for encryption",
10833                                    errors.ECODE_INVAL)
10834
10835       if not self.dest_x509_ca_pem:
10836         raise errors.OpPrereqError("Missing destination X509 CA",
10837                                    errors.ECODE_INVAL)
10838
10839   def ExpandNames(self):
10840     self._ExpandAndLockInstance()
10841
10842     # Lock all nodes for local exports
10843     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10844       # FIXME: lock only instance primary and destination node
10845       #
10846       # Sad but true, for now we have do lock all nodes, as we don't know where
10847       # the previous export might be, and in this LU we search for it and
10848       # remove it from its current node. In the future we could fix this by:
10849       #  - making a tasklet to search (share-lock all), then create the
10850       #    new one, then one to remove, after
10851       #  - removing the removal operation altogether
10852       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10853
10854   def DeclareLocks(self, level):
10855     """Last minute lock declaration."""
10856     # All nodes are locked anyway, so nothing to do here.
10857
10858   def BuildHooksEnv(self):
10859     """Build hooks env.
10860
10861     This will run on the master, primary node and target node.
10862
10863     """
10864     env = {
10865       "EXPORT_MODE": self.op.mode,
10866       "EXPORT_NODE": self.op.target_node,
10867       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10868       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10869       # TODO: Generic function for boolean env variables
10870       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10871       }
10872
10873     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10874
10875     return env
10876
10877   def BuildHooksNodes(self):
10878     """Build hooks nodes.
10879
10880     """
10881     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10882
10883     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10884       nl.append(self.op.target_node)
10885
10886     return (nl, nl)
10887
10888   def CheckPrereq(self):
10889     """Check prerequisites.
10890
10891     This checks that the instance and node names are valid.
10892
10893     """
10894     instance_name = self.op.instance_name
10895
10896     self.instance = self.cfg.GetInstanceInfo(instance_name)
10897     assert self.instance is not None, \
10898           "Cannot retrieve locked instance %s" % self.op.instance_name
10899     _CheckNodeOnline(self, self.instance.primary_node)
10900
10901     if (self.op.remove_instance and self.instance.admin_up and
10902         not self.op.shutdown):
10903       raise errors.OpPrereqError("Can not remove instance without shutting it"
10904                                  " down before")
10905
10906     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10907       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10908       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10909       assert self.dst_node is not None
10910
10911       _CheckNodeOnline(self, self.dst_node.name)
10912       _CheckNodeNotDrained(self, self.dst_node.name)
10913
10914       self._cds = None
10915       self.dest_disk_info = None
10916       self.dest_x509_ca = None
10917
10918     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10919       self.dst_node = None
10920
10921       if len(self.op.target_node) != len(self.instance.disks):
10922         raise errors.OpPrereqError(("Received destination information for %s"
10923                                     " disks, but instance %s has %s disks") %
10924                                    (len(self.op.target_node), instance_name,
10925                                     len(self.instance.disks)),
10926                                    errors.ECODE_INVAL)
10927
10928       cds = _GetClusterDomainSecret()
10929
10930       # Check X509 key name
10931       try:
10932         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10933       except (TypeError, ValueError), err:
10934         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10935
10936       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10937         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10938                                    errors.ECODE_INVAL)
10939
10940       # Load and verify CA
10941       try:
10942         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10943       except OpenSSL.crypto.Error, err:
10944         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10945                                    (err, ), errors.ECODE_INVAL)
10946
10947       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10948       if errcode is not None:
10949         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10950                                    (msg, ), errors.ECODE_INVAL)
10951
10952       self.dest_x509_ca = cert
10953
10954       # Verify target information
10955       disk_info = []
10956       for idx, disk_data in enumerate(self.op.target_node):
10957         try:
10958           (host, port, magic) = \
10959             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10960         except errors.GenericError, err:
10961           raise errors.OpPrereqError("Target info for disk %s: %s" %
10962                                      (idx, err), errors.ECODE_INVAL)
10963
10964         disk_info.append((host, port, magic))
10965
10966       assert len(disk_info) == len(self.op.target_node)
10967       self.dest_disk_info = disk_info
10968
10969     else:
10970       raise errors.ProgrammerError("Unhandled export mode %r" %
10971                                    self.op.mode)
10972
10973     # instance disk type verification
10974     # TODO: Implement export support for file-based disks
10975     for disk in self.instance.disks:
10976       if disk.dev_type == constants.LD_FILE:
10977         raise errors.OpPrereqError("Export not supported for instances with"
10978                                    " file-based disks", errors.ECODE_INVAL)
10979
10980   def _CleanupExports(self, feedback_fn):
10981     """Removes exports of current instance from all other nodes.
10982
10983     If an instance in a cluster with nodes A..D was exported to node C, its
10984     exports will be removed from the nodes A, B and D.
10985
10986     """
10987     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10988
10989     nodelist = self.cfg.GetNodeList()
10990     nodelist.remove(self.dst_node.name)
10991
10992     # on one-node clusters nodelist will be empty after the removal
10993     # if we proceed the backup would be removed because OpBackupQuery
10994     # substitutes an empty list with the full cluster node list.
10995     iname = self.instance.name
10996     if nodelist:
10997       feedback_fn("Removing old exports for instance %s" % iname)
10998       exportlist = self.rpc.call_export_list(nodelist)
10999       for node in exportlist:
11000         if exportlist[node].fail_msg:
11001           continue
11002         if iname in exportlist[node].payload:
11003           msg = self.rpc.call_export_remove(node, iname).fail_msg
11004           if msg:
11005             self.LogWarning("Could not remove older export for instance %s"
11006                             " on node %s: %s", iname, node, msg)
11007
11008   def Exec(self, feedback_fn):
11009     """Export an instance to an image in the cluster.
11010
11011     """
11012     assert self.op.mode in constants.EXPORT_MODES
11013
11014     instance = self.instance
11015     src_node = instance.primary_node
11016
11017     if self.op.shutdown:
11018       # shutdown the instance, but not the disks
11019       feedback_fn("Shutting down instance %s" % instance.name)
11020       result = self.rpc.call_instance_shutdown(src_node, instance,
11021                                                self.op.shutdown_timeout)
11022       # TODO: Maybe ignore failures if ignore_remove_failures is set
11023       result.Raise("Could not shutdown instance %s on"
11024                    " node %s" % (instance.name, src_node))
11025
11026     # set the disks ID correctly since call_instance_start needs the
11027     # correct drbd minor to create the symlinks
11028     for disk in instance.disks:
11029       self.cfg.SetDiskID(disk, src_node)
11030
11031     activate_disks = (not instance.admin_up)
11032
11033     if activate_disks:
11034       # Activate the instance disks if we'exporting a stopped instance
11035       feedback_fn("Activating disks for %s" % instance.name)
11036       _StartInstanceDisks(self, instance, None)
11037
11038     try:
11039       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11040                                                      instance)
11041
11042       helper.CreateSnapshots()
11043       try:
11044         if (self.op.shutdown and instance.admin_up and
11045             not self.op.remove_instance):
11046           assert not activate_disks
11047           feedback_fn("Starting instance %s" % instance.name)
11048           result = self.rpc.call_instance_start(src_node, instance, None, None)
11049           msg = result.fail_msg
11050           if msg:
11051             feedback_fn("Failed to start instance: %s" % msg)
11052             _ShutdownInstanceDisks(self, instance)
11053             raise errors.OpExecError("Could not start instance: %s" % msg)
11054
11055         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11056           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11057         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11058           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11059           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11060
11061           (key_name, _, _) = self.x509_key_name
11062
11063           dest_ca_pem = \
11064             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11065                                             self.dest_x509_ca)
11066
11067           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11068                                                      key_name, dest_ca_pem,
11069                                                      timeouts)
11070       finally:
11071         helper.Cleanup()
11072
11073       # Check for backwards compatibility
11074       assert len(dresults) == len(instance.disks)
11075       assert compat.all(isinstance(i, bool) for i in dresults), \
11076              "Not all results are boolean: %r" % dresults
11077
11078     finally:
11079       if activate_disks:
11080         feedback_fn("Deactivating disks for %s" % instance.name)
11081         _ShutdownInstanceDisks(self, instance)
11082
11083     if not (compat.all(dresults) and fin_resu):
11084       failures = []
11085       if not fin_resu:
11086         failures.append("export finalization")
11087       if not compat.all(dresults):
11088         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11089                                if not dsk)
11090         failures.append("disk export: disk(s) %s" % fdsk)
11091
11092       raise errors.OpExecError("Export failed, errors in %s" %
11093                                utils.CommaJoin(failures))
11094
11095     # At this point, the export was successful, we can cleanup/finish
11096
11097     # Remove instance if requested
11098     if self.op.remove_instance:
11099       feedback_fn("Removing instance %s" % instance.name)
11100       _RemoveInstance(self, feedback_fn, instance,
11101                       self.op.ignore_remove_failures)
11102
11103     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11104       self._CleanupExports(feedback_fn)
11105
11106     return fin_resu, dresults
11107
11108
11109 class LUBackupRemove(NoHooksLU):
11110   """Remove exports related to the named instance.
11111
11112   """
11113   REQ_BGL = False
11114
11115   def ExpandNames(self):
11116     self.needed_locks = {}
11117     # We need all nodes to be locked in order for RemoveExport to work, but we
11118     # don't need to lock the instance itself, as nothing will happen to it (and
11119     # we can remove exports also for a removed instance)
11120     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11121
11122   def Exec(self, feedback_fn):
11123     """Remove any export.
11124
11125     """
11126     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11127     # If the instance was not found we'll try with the name that was passed in.
11128     # This will only work if it was an FQDN, though.
11129     fqdn_warn = False
11130     if not instance_name:
11131       fqdn_warn = True
11132       instance_name = self.op.instance_name
11133
11134     locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11135     exportlist = self.rpc.call_export_list(locked_nodes)
11136     found = False
11137     for node in exportlist:
11138       msg = exportlist[node].fail_msg
11139       if msg:
11140         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11141         continue
11142       if instance_name in exportlist[node].payload:
11143         found = True
11144         result = self.rpc.call_export_remove(node, instance_name)
11145         msg = result.fail_msg
11146         if msg:
11147           logging.error("Could not remove export for instance %s"
11148                         " on node %s: %s", instance_name, node, msg)
11149
11150     if fqdn_warn and not found:
11151       feedback_fn("Export not found. If trying to remove an export belonging"
11152                   " to a deleted instance please use its Fully Qualified"
11153                   " Domain Name.")
11154
11155
11156 class LUGroupAdd(LogicalUnit):
11157   """Logical unit for creating node groups.
11158
11159   """
11160   HPATH = "group-add"
11161   HTYPE = constants.HTYPE_GROUP
11162   REQ_BGL = False
11163
11164   def ExpandNames(self):
11165     # We need the new group's UUID here so that we can create and acquire the
11166     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11167     # that it should not check whether the UUID exists in the configuration.
11168     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11169     self.needed_locks = {}
11170     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11171
11172   def CheckPrereq(self):
11173     """Check prerequisites.
11174
11175     This checks that the given group name is not an existing node group
11176     already.
11177
11178     """
11179     try:
11180       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11181     except errors.OpPrereqError:
11182       pass
11183     else:
11184       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11185                                  " node group (UUID: %s)" %
11186                                  (self.op.group_name, existing_uuid),
11187                                  errors.ECODE_EXISTS)
11188
11189     if self.op.ndparams:
11190       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11191
11192   def BuildHooksEnv(self):
11193     """Build hooks env.
11194
11195     """
11196     return {
11197       "GROUP_NAME": self.op.group_name,
11198       }
11199
11200   def BuildHooksNodes(self):
11201     """Build hooks nodes.
11202
11203     """
11204     mn = self.cfg.GetMasterNode()
11205     return ([mn], [mn])
11206
11207   def Exec(self, feedback_fn):
11208     """Add the node group to the cluster.
11209
11210     """
11211     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11212                                   uuid=self.group_uuid,
11213                                   alloc_policy=self.op.alloc_policy,
11214                                   ndparams=self.op.ndparams)
11215
11216     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11217     del self.remove_locks[locking.LEVEL_NODEGROUP]
11218
11219
11220 class LUGroupAssignNodes(NoHooksLU):
11221   """Logical unit for assigning nodes to groups.
11222
11223   """
11224   REQ_BGL = False
11225
11226   def ExpandNames(self):
11227     # These raise errors.OpPrereqError on their own:
11228     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11229     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11230
11231     # We want to lock all the affected nodes and groups. We have readily
11232     # available the list of nodes, and the *destination* group. To gather the
11233     # list of "source" groups, we need to fetch node information later on.
11234     self.needed_locks = {
11235       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11236       locking.LEVEL_NODE: self.op.nodes,
11237       }
11238
11239   def DeclareLocks(self, level):
11240     if level == locking.LEVEL_NODEGROUP:
11241       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11242
11243       # Try to get all affected nodes' groups without having the group or node
11244       # lock yet. Needs verification later in the code flow.
11245       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11246
11247       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11248
11249   def CheckPrereq(self):
11250     """Check prerequisites.
11251
11252     """
11253     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11254     assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11255             frozenset(self.op.nodes))
11256
11257     expected_locks = (set([self.group_uuid]) |
11258                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11259     actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11260     if actual_locks != expected_locks:
11261       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11262                                " current groups are '%s', used to be '%s'" %
11263                                (utils.CommaJoin(expected_locks),
11264                                 utils.CommaJoin(actual_locks)))
11265
11266     self.node_data = self.cfg.GetAllNodesInfo()
11267     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11268     instance_data = self.cfg.GetAllInstancesInfo()
11269
11270     if self.group is None:
11271       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11272                                (self.op.group_name, self.group_uuid))
11273
11274     (new_splits, previous_splits) = \
11275       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11276                                              for node in self.op.nodes],
11277                                             self.node_data, instance_data)
11278
11279     if new_splits:
11280       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11281
11282       if not self.op.force:
11283         raise errors.OpExecError("The following instances get split by this"
11284                                  " change and --force was not given: %s" %
11285                                  fmt_new_splits)
11286       else:
11287         self.LogWarning("This operation will split the following instances: %s",
11288                         fmt_new_splits)
11289
11290         if previous_splits:
11291           self.LogWarning("In addition, these already-split instances continue"
11292                           " to be split across groups: %s",
11293                           utils.CommaJoin(utils.NiceSort(previous_splits)))
11294
11295   def Exec(self, feedback_fn):
11296     """Assign nodes to a new group.
11297
11298     """
11299     for node in self.op.nodes:
11300       self.node_data[node].group = self.group_uuid
11301
11302     # FIXME: Depends on side-effects of modifying the result of
11303     # C{cfg.GetAllNodesInfo}
11304
11305     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11306
11307   @staticmethod
11308   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11309     """Check for split instances after a node assignment.
11310
11311     This method considers a series of node assignments as an atomic operation,
11312     and returns information about split instances after applying the set of
11313     changes.
11314
11315     In particular, it returns information about newly split instances, and
11316     instances that were already split, and remain so after the change.
11317
11318     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11319     considered.
11320
11321     @type changes: list of (node_name, new_group_uuid) pairs.
11322     @param changes: list of node assignments to consider.
11323     @param node_data: a dict with data for all nodes
11324     @param instance_data: a dict with all instances to consider
11325     @rtype: a two-tuple
11326     @return: a list of instances that were previously okay and result split as a
11327       consequence of this change, and a list of instances that were previously
11328       split and this change does not fix.
11329
11330     """
11331     changed_nodes = dict((node, group) for node, group in changes
11332                          if node_data[node].group != group)
11333
11334     all_split_instances = set()
11335     previously_split_instances = set()
11336
11337     def InstanceNodes(instance):
11338       return [instance.primary_node] + list(instance.secondary_nodes)
11339
11340     for inst in instance_data.values():
11341       if inst.disk_template not in constants.DTS_INT_MIRROR:
11342         continue
11343
11344       instance_nodes = InstanceNodes(inst)
11345
11346       if len(set(node_data[node].group for node in instance_nodes)) > 1:
11347         previously_split_instances.add(inst.name)
11348
11349       if len(set(changed_nodes.get(node, node_data[node].group)
11350                  for node in instance_nodes)) > 1:
11351         all_split_instances.add(inst.name)
11352
11353     return (list(all_split_instances - previously_split_instances),
11354             list(previously_split_instances & all_split_instances))
11355
11356
11357 class _GroupQuery(_QueryBase):
11358   FIELDS = query.GROUP_FIELDS
11359
11360   def ExpandNames(self, lu):
11361     lu.needed_locks = {}
11362
11363     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11364     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11365
11366     if not self.names:
11367       self.wanted = [name_to_uuid[name]
11368                      for name in utils.NiceSort(name_to_uuid.keys())]
11369     else:
11370       # Accept names to be either names or UUIDs.
11371       missing = []
11372       self.wanted = []
11373       all_uuid = frozenset(self._all_groups.keys())
11374
11375       for name in self.names:
11376         if name in all_uuid:
11377           self.wanted.append(name)
11378         elif name in name_to_uuid:
11379           self.wanted.append(name_to_uuid[name])
11380         else:
11381           missing.append(name)
11382
11383       if missing:
11384         raise errors.OpPrereqError("Some groups do not exist: %s" %
11385                                    utils.CommaJoin(missing),
11386                                    errors.ECODE_NOENT)
11387
11388   def DeclareLocks(self, lu, level):
11389     pass
11390
11391   def _GetQueryData(self, lu):
11392     """Computes the list of node groups and their attributes.
11393
11394     """
11395     do_nodes = query.GQ_NODE in self.requested_data
11396     do_instances = query.GQ_INST in self.requested_data
11397
11398     group_to_nodes = None
11399     group_to_instances = None
11400
11401     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11402     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11403     # latter GetAllInstancesInfo() is not enough, for we have to go through
11404     # instance->node. Hence, we will need to process nodes even if we only need
11405     # instance information.
11406     if do_nodes or do_instances:
11407       all_nodes = lu.cfg.GetAllNodesInfo()
11408       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11409       node_to_group = {}
11410
11411       for node in all_nodes.values():
11412         if node.group in group_to_nodes:
11413           group_to_nodes[node.group].append(node.name)
11414           node_to_group[node.name] = node.group
11415
11416       if do_instances:
11417         all_instances = lu.cfg.GetAllInstancesInfo()
11418         group_to_instances = dict((uuid, []) for uuid in self.wanted)
11419
11420         for instance in all_instances.values():
11421           node = instance.primary_node
11422           if node in node_to_group:
11423             group_to_instances[node_to_group[node]].append(instance.name)
11424
11425         if not do_nodes:
11426           # Do not pass on node information if it was not requested.
11427           group_to_nodes = None
11428
11429     return query.GroupQueryData([self._all_groups[uuid]
11430                                  for uuid in self.wanted],
11431                                 group_to_nodes, group_to_instances)
11432
11433
11434 class LUGroupQuery(NoHooksLU):
11435   """Logical unit for querying node groups.
11436
11437   """
11438   REQ_BGL = False
11439
11440   def CheckArguments(self):
11441     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11442                           self.op.output_fields, False)
11443
11444   def ExpandNames(self):
11445     self.gq.ExpandNames(self)
11446
11447   def Exec(self, feedback_fn):
11448     return self.gq.OldStyleQuery(self)
11449
11450
11451 class LUGroupSetParams(LogicalUnit):
11452   """Modifies the parameters of a node group.
11453
11454   """
11455   HPATH = "group-modify"
11456   HTYPE = constants.HTYPE_GROUP
11457   REQ_BGL = False
11458
11459   def CheckArguments(self):
11460     all_changes = [
11461       self.op.ndparams,
11462       self.op.alloc_policy,
11463       ]
11464
11465     if all_changes.count(None) == len(all_changes):
11466       raise errors.OpPrereqError("Please pass at least one modification",
11467                                  errors.ECODE_INVAL)
11468
11469   def ExpandNames(self):
11470     # This raises errors.OpPrereqError on its own:
11471     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11472
11473     self.needed_locks = {
11474       locking.LEVEL_NODEGROUP: [self.group_uuid],
11475       }
11476
11477   def CheckPrereq(self):
11478     """Check prerequisites.
11479
11480     """
11481     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11482
11483     if self.group is None:
11484       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11485                                (self.op.group_name, self.group_uuid))
11486
11487     if self.op.ndparams:
11488       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11489       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11490       self.new_ndparams = new_ndparams
11491
11492   def BuildHooksEnv(self):
11493     """Build hooks env.
11494
11495     """
11496     return {
11497       "GROUP_NAME": self.op.group_name,
11498       "NEW_ALLOC_POLICY": self.op.alloc_policy,
11499       }
11500
11501   def BuildHooksNodes(self):
11502     """Build hooks nodes.
11503
11504     """
11505     mn = self.cfg.GetMasterNode()
11506     return ([mn], [mn])
11507
11508   def Exec(self, feedback_fn):
11509     """Modifies the node group.
11510
11511     """
11512     result = []
11513
11514     if self.op.ndparams:
11515       self.group.ndparams = self.new_ndparams
11516       result.append(("ndparams", str(self.group.ndparams)))
11517
11518     if self.op.alloc_policy:
11519       self.group.alloc_policy = self.op.alloc_policy
11520
11521     self.cfg.Update(self.group, feedback_fn)
11522     return result
11523
11524
11525
11526 class LUGroupRemove(LogicalUnit):
11527   HPATH = "group-remove"
11528   HTYPE = constants.HTYPE_GROUP
11529   REQ_BGL = False
11530
11531   def ExpandNames(self):
11532     # This will raises errors.OpPrereqError on its own:
11533     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11534     self.needed_locks = {
11535       locking.LEVEL_NODEGROUP: [self.group_uuid],
11536       }
11537
11538   def CheckPrereq(self):
11539     """Check prerequisites.
11540
11541     This checks that the given group name exists as a node group, that is
11542     empty (i.e., contains no nodes), and that is not the last group of the
11543     cluster.
11544
11545     """
11546     # Verify that the group is empty.
11547     group_nodes = [node.name
11548                    for node in self.cfg.GetAllNodesInfo().values()
11549                    if node.group == self.group_uuid]
11550
11551     if group_nodes:
11552       raise errors.OpPrereqError("Group '%s' not empty, has the following"
11553                                  " nodes: %s" %
11554                                  (self.op.group_name,
11555                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
11556                                  errors.ECODE_STATE)
11557
11558     # Verify the cluster would not be left group-less.
11559     if len(self.cfg.GetNodeGroupList()) == 1:
11560       raise errors.OpPrereqError("Group '%s' is the only group,"
11561                                  " cannot be removed" %
11562                                  self.op.group_name,
11563                                  errors.ECODE_STATE)
11564
11565   def BuildHooksEnv(self):
11566     """Build hooks env.
11567
11568     """
11569     return {
11570       "GROUP_NAME": self.op.group_name,
11571       }
11572
11573   def BuildHooksNodes(self):
11574     """Build hooks nodes.
11575
11576     """
11577     mn = self.cfg.GetMasterNode()
11578     return ([mn], [mn])
11579
11580   def Exec(self, feedback_fn):
11581     """Remove the node group.
11582
11583     """
11584     try:
11585       self.cfg.RemoveNodeGroup(self.group_uuid)
11586     except errors.ConfigurationError:
11587       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11588                                (self.op.group_name, self.group_uuid))
11589
11590     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11591
11592
11593 class LUGroupRename(LogicalUnit):
11594   HPATH = "group-rename"
11595   HTYPE = constants.HTYPE_GROUP
11596   REQ_BGL = False
11597
11598   def ExpandNames(self):
11599     # This raises errors.OpPrereqError on its own:
11600     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11601
11602     self.needed_locks = {
11603       locking.LEVEL_NODEGROUP: [self.group_uuid],
11604       }
11605
11606   def CheckPrereq(self):
11607     """Check prerequisites.
11608
11609     Ensures requested new name is not yet used.
11610
11611     """
11612     try:
11613       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11614     except errors.OpPrereqError:
11615       pass
11616     else:
11617       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11618                                  " node group (UUID: %s)" %
11619                                  (self.op.new_name, new_name_uuid),
11620                                  errors.ECODE_EXISTS)
11621
11622   def BuildHooksEnv(self):
11623     """Build hooks env.
11624
11625     """
11626     return {
11627       "OLD_NAME": self.op.group_name,
11628       "NEW_NAME": self.op.new_name,
11629       }
11630
11631   def BuildHooksNodes(self):
11632     """Build hooks nodes.
11633
11634     """
11635     mn = self.cfg.GetMasterNode()
11636
11637     all_nodes = self.cfg.GetAllNodesInfo()
11638     all_nodes.pop(mn, None)
11639
11640     run_nodes = [mn]
11641     run_nodes.extend(node.name for node in all_nodes.values()
11642                      if node.group == self.group_uuid)
11643
11644     return (run_nodes, run_nodes)
11645
11646   def Exec(self, feedback_fn):
11647     """Rename the node group.
11648
11649     """
11650     group = self.cfg.GetNodeGroup(self.group_uuid)
11651
11652     if group is None:
11653       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11654                                (self.op.group_name, self.group_uuid))
11655
11656     group.name = self.op.new_name
11657     self.cfg.Update(group, feedback_fn)
11658
11659     return self.op.new_name
11660
11661
11662 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11663   """Generic tags LU.
11664
11665   This is an abstract class which is the parent of all the other tags LUs.
11666
11667   """
11668   def ExpandNames(self):
11669     self.group_uuid = None
11670     self.needed_locks = {}
11671     if self.op.kind == constants.TAG_NODE:
11672       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11673       self.needed_locks[locking.LEVEL_NODE] = self.op.name
11674     elif self.op.kind == constants.TAG_INSTANCE:
11675       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11676       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11677     elif self.op.kind == constants.TAG_NODEGROUP:
11678       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11679
11680     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11681     # not possible to acquire the BGL based on opcode parameters)
11682
11683   def CheckPrereq(self):
11684     """Check prerequisites.
11685
11686     """
11687     if self.op.kind == constants.TAG_CLUSTER:
11688       self.target = self.cfg.GetClusterInfo()
11689     elif self.op.kind == constants.TAG_NODE:
11690       self.target = self.cfg.GetNodeInfo(self.op.name)
11691     elif self.op.kind == constants.TAG_INSTANCE:
11692       self.target = self.cfg.GetInstanceInfo(self.op.name)
11693     elif self.op.kind == constants.TAG_NODEGROUP:
11694       self.target = self.cfg.GetNodeGroup(self.group_uuid)
11695     else:
11696       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11697                                  str(self.op.kind), errors.ECODE_INVAL)
11698
11699
11700 class LUTagsGet(TagsLU):
11701   """Returns the tags of a given object.
11702
11703   """
11704   REQ_BGL = False
11705
11706   def ExpandNames(self):
11707     TagsLU.ExpandNames(self)
11708
11709     # Share locks as this is only a read operation
11710     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11711
11712   def Exec(self, feedback_fn):
11713     """Returns the tag list.
11714
11715     """
11716     return list(self.target.GetTags())
11717
11718
11719 class LUTagsSearch(NoHooksLU):
11720   """Searches the tags for a given pattern.
11721
11722   """
11723   REQ_BGL = False
11724
11725   def ExpandNames(self):
11726     self.needed_locks = {}
11727
11728   def CheckPrereq(self):
11729     """Check prerequisites.
11730
11731     This checks the pattern passed for validity by compiling it.
11732
11733     """
11734     try:
11735       self.re = re.compile(self.op.pattern)
11736     except re.error, err:
11737       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11738                                  (self.op.pattern, err), errors.ECODE_INVAL)
11739
11740   def Exec(self, feedback_fn):
11741     """Returns the tag list.
11742
11743     """
11744     cfg = self.cfg
11745     tgts = [("/cluster", cfg.GetClusterInfo())]
11746     ilist = cfg.GetAllInstancesInfo().values()
11747     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11748     nlist = cfg.GetAllNodesInfo().values()
11749     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11750     tgts.extend(("/nodegroup/%s" % n.name, n)
11751                 for n in cfg.GetAllNodeGroupsInfo().values())
11752     results = []
11753     for path, target in tgts:
11754       for tag in target.GetTags():
11755         if self.re.search(tag):
11756           results.append((path, tag))
11757     return results
11758
11759
11760 class LUTagsSet(TagsLU):
11761   """Sets a tag on a given object.
11762
11763   """
11764   REQ_BGL = False
11765
11766   def CheckPrereq(self):
11767     """Check prerequisites.
11768
11769     This checks the type and length of the tag name and value.
11770
11771     """
11772     TagsLU.CheckPrereq(self)
11773     for tag in self.op.tags:
11774       objects.TaggableObject.ValidateTag(tag)
11775
11776   def Exec(self, feedback_fn):
11777     """Sets the tag.
11778
11779     """
11780     try:
11781       for tag in self.op.tags:
11782         self.target.AddTag(tag)
11783     except errors.TagError, err:
11784       raise errors.OpExecError("Error while setting tag: %s" % str(err))
11785     self.cfg.Update(self.target, feedback_fn)
11786
11787
11788 class LUTagsDel(TagsLU):
11789   """Delete a list of tags from a given object.
11790
11791   """
11792   REQ_BGL = False
11793
11794   def CheckPrereq(self):
11795     """Check prerequisites.
11796
11797     This checks that we have the given tag.
11798
11799     """
11800     TagsLU.CheckPrereq(self)
11801     for tag in self.op.tags:
11802       objects.TaggableObject.ValidateTag(tag)
11803     del_tags = frozenset(self.op.tags)
11804     cur_tags = self.target.GetTags()
11805
11806     diff_tags = del_tags - cur_tags
11807     if diff_tags:
11808       diff_names = ("'%s'" % i for i in sorted(diff_tags))
11809       raise errors.OpPrereqError("Tag(s) %s not found" %
11810                                  (utils.CommaJoin(diff_names), ),
11811                                  errors.ECODE_NOENT)
11812
11813   def Exec(self, feedback_fn):
11814     """Remove the tag from the object.
11815
11816     """
11817     for tag in self.op.tags:
11818       self.target.RemoveTag(tag)
11819     self.cfg.Update(self.target, feedback_fn)
11820
11821
11822 class LUTestDelay(NoHooksLU):
11823   """Sleep for a specified amount of time.
11824
11825   This LU sleeps on the master and/or nodes for a specified amount of
11826   time.
11827
11828   """
11829   REQ_BGL = False
11830
11831   def ExpandNames(self):
11832     """Expand names and set required locks.
11833
11834     This expands the node list, if any.
11835
11836     """
11837     self.needed_locks = {}
11838     if self.op.on_nodes:
11839       # _GetWantedNodes can be used here, but is not always appropriate to use
11840       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11841       # more information.
11842       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11843       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11844
11845   def _TestDelay(self):
11846     """Do the actual sleep.
11847
11848     """
11849     if self.op.on_master:
11850       if not utils.TestDelay(self.op.duration):
11851         raise errors.OpExecError("Error during master delay test")
11852     if self.op.on_nodes:
11853       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11854       for node, node_result in result.items():
11855         node_result.Raise("Failure during rpc call to node %s" % node)
11856
11857   def Exec(self, feedback_fn):
11858     """Execute the test delay opcode, with the wanted repetitions.
11859
11860     """
11861     if self.op.repeat == 0:
11862       self._TestDelay()
11863     else:
11864       top_value = self.op.repeat - 1
11865       for i in range(self.op.repeat):
11866         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11867         self._TestDelay()
11868
11869
11870 class LUTestJqueue(NoHooksLU):
11871   """Utility LU to test some aspects of the job queue.
11872
11873   """
11874   REQ_BGL = False
11875
11876   # Must be lower than default timeout for WaitForJobChange to see whether it
11877   # notices changed jobs
11878   _CLIENT_CONNECT_TIMEOUT = 20.0
11879   _CLIENT_CONFIRM_TIMEOUT = 60.0
11880
11881   @classmethod
11882   def _NotifyUsingSocket(cls, cb, errcls):
11883     """Opens a Unix socket and waits for another program to connect.
11884
11885     @type cb: callable
11886     @param cb: Callback to send socket name to client
11887     @type errcls: class
11888     @param errcls: Exception class to use for errors
11889
11890     """
11891     # Using a temporary directory as there's no easy way to create temporary
11892     # sockets without writing a custom loop around tempfile.mktemp and
11893     # socket.bind
11894     tmpdir = tempfile.mkdtemp()
11895     try:
11896       tmpsock = utils.PathJoin(tmpdir, "sock")
11897
11898       logging.debug("Creating temporary socket at %s", tmpsock)
11899       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11900       try:
11901         sock.bind(tmpsock)
11902         sock.listen(1)
11903
11904         # Send details to client
11905         cb(tmpsock)
11906
11907         # Wait for client to connect before continuing
11908         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11909         try:
11910           (conn, _) = sock.accept()
11911         except socket.error, err:
11912           raise errcls("Client didn't connect in time (%s)" % err)
11913       finally:
11914         sock.close()
11915     finally:
11916       # Remove as soon as client is connected
11917       shutil.rmtree(tmpdir)
11918
11919     # Wait for client to close
11920     try:
11921       try:
11922         # pylint: disable-msg=E1101
11923         # Instance of '_socketobject' has no ... member
11924         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11925         conn.recv(1)
11926       except socket.error, err:
11927         raise errcls("Client failed to confirm notification (%s)" % err)
11928     finally:
11929       conn.close()
11930
11931   def _SendNotification(self, test, arg, sockname):
11932     """Sends a notification to the client.
11933
11934     @type test: string
11935     @param test: Test name
11936     @param arg: Test argument (depends on test)
11937     @type sockname: string
11938     @param sockname: Socket path
11939
11940     """
11941     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11942
11943   def _Notify(self, prereq, test, arg):
11944     """Notifies the client of a test.
11945
11946     @type prereq: bool
11947     @param prereq: Whether this is a prereq-phase test
11948     @type test: string
11949     @param test: Test name
11950     @param arg: Test argument (depends on test)
11951
11952     """
11953     if prereq:
11954       errcls = errors.OpPrereqError
11955     else:
11956       errcls = errors.OpExecError
11957
11958     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11959                                                   test, arg),
11960                                    errcls)
11961
11962   def CheckArguments(self):
11963     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11964     self.expandnames_calls = 0
11965
11966   def ExpandNames(self):
11967     checkargs_calls = getattr(self, "checkargs_calls", 0)
11968     if checkargs_calls < 1:
11969       raise errors.ProgrammerError("CheckArguments was not called")
11970
11971     self.expandnames_calls += 1
11972
11973     if self.op.notify_waitlock:
11974       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11975
11976     self.LogInfo("Expanding names")
11977
11978     # Get lock on master node (just to get a lock, not for a particular reason)
11979     self.needed_locks = {
11980       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11981       }
11982
11983   def Exec(self, feedback_fn):
11984     if self.expandnames_calls < 1:
11985       raise errors.ProgrammerError("ExpandNames was not called")
11986
11987     if self.op.notify_exec:
11988       self._Notify(False, constants.JQT_EXEC, None)
11989
11990     self.LogInfo("Executing")
11991
11992     if self.op.log_messages:
11993       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11994       for idx, msg in enumerate(self.op.log_messages):
11995         self.LogInfo("Sending log message %s", idx + 1)
11996         feedback_fn(constants.JQT_MSGPREFIX + msg)
11997         # Report how many test messages have been sent
11998         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11999
12000     if self.op.fail:
12001       raise errors.OpExecError("Opcode failure was requested")
12002
12003     return True
12004
12005
12006 class IAllocator(object):
12007   """IAllocator framework.
12008
12009   An IAllocator instance has three sets of attributes:
12010     - cfg that is needed to query the cluster
12011     - input data (all members of the _KEYS class attribute are required)
12012     - four buffer attributes (in|out_data|text), that represent the
12013       input (to the external script) in text and data structure format,
12014       and the output from it, again in two formats
12015     - the result variables from the script (success, info, nodes) for
12016       easy usage
12017
12018   """
12019   # pylint: disable-msg=R0902
12020   # lots of instance attributes
12021
12022   def __init__(self, cfg, rpc, mode, **kwargs):
12023     self.cfg = cfg
12024     self.rpc = rpc
12025     # init buffer variables
12026     self.in_text = self.out_text = self.in_data = self.out_data = None
12027     # init all input fields so that pylint is happy
12028     self.mode = mode
12029     self.memory = self.disks = self.disk_template = None
12030     self.os = self.tags = self.nics = self.vcpus = None
12031     self.hypervisor = None
12032     self.relocate_from = None
12033     self.name = None
12034     self.evac_nodes = None
12035     self.instances = None
12036     self.evac_mode = None
12037     self.target_groups = []
12038     # computed fields
12039     self.required_nodes = None
12040     # init result fields
12041     self.success = self.info = self.result = None
12042
12043     try:
12044       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12045     except KeyError:
12046       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12047                                    " IAllocator" % self.mode)
12048
12049     keyset = [n for (n, _) in keydata]
12050
12051     for key in kwargs:
12052       if key not in keyset:
12053         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12054                                      " IAllocator" % key)
12055       setattr(self, key, kwargs[key])
12056
12057     for key in keyset:
12058       if key not in kwargs:
12059         raise errors.ProgrammerError("Missing input parameter '%s' to"
12060                                      " IAllocator" % key)
12061     self._BuildInputData(compat.partial(fn, self), keydata)
12062
12063   def _ComputeClusterData(self):
12064     """Compute the generic allocator input data.
12065
12066     This is the data that is independent of the actual operation.
12067
12068     """
12069     cfg = self.cfg
12070     cluster_info = cfg.GetClusterInfo()
12071     # cluster data
12072     data = {
12073       "version": constants.IALLOCATOR_VERSION,
12074       "cluster_name": cfg.GetClusterName(),
12075       "cluster_tags": list(cluster_info.GetTags()),
12076       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12077       # we don't have job IDs
12078       }
12079     ninfo = cfg.GetAllNodesInfo()
12080     iinfo = cfg.GetAllInstancesInfo().values()
12081     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12082
12083     # node data
12084     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12085
12086     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12087       hypervisor_name = self.hypervisor
12088     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12089       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12090     else:
12091       hypervisor_name = cluster_info.enabled_hypervisors[0]
12092
12093     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12094                                         hypervisor_name)
12095     node_iinfo = \
12096       self.rpc.call_all_instances_info(node_list,
12097                                        cluster_info.enabled_hypervisors)
12098
12099     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12100
12101     config_ndata = self._ComputeBasicNodeData(ninfo)
12102     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12103                                                  i_list, config_ndata)
12104     assert len(data["nodes"]) == len(ninfo), \
12105         "Incomplete node data computed"
12106
12107     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12108
12109     self.in_data = data
12110
12111   @staticmethod
12112   def _ComputeNodeGroupData(cfg):
12113     """Compute node groups data.
12114
12115     """
12116     ng = dict((guuid, {
12117       "name": gdata.name,
12118       "alloc_policy": gdata.alloc_policy,
12119       })
12120       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12121
12122     return ng
12123
12124   @staticmethod
12125   def _ComputeBasicNodeData(node_cfg):
12126     """Compute global node data.
12127
12128     @rtype: dict
12129     @returns: a dict of name: (node dict, node config)
12130
12131     """
12132     # fill in static (config-based) values
12133     node_results = dict((ninfo.name, {
12134       "tags": list(ninfo.GetTags()),
12135       "primary_ip": ninfo.primary_ip,
12136       "secondary_ip": ninfo.secondary_ip,
12137       "offline": ninfo.offline,
12138       "drained": ninfo.drained,
12139       "master_candidate": ninfo.master_candidate,
12140       "group": ninfo.group,
12141       "master_capable": ninfo.master_capable,
12142       "vm_capable": ninfo.vm_capable,
12143       })
12144       for ninfo in node_cfg.values())
12145
12146     return node_results
12147
12148   @staticmethod
12149   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12150                               node_results):
12151     """Compute global node data.
12152
12153     @param node_results: the basic node structures as filled from the config
12154
12155     """
12156     # make a copy of the current dict
12157     node_results = dict(node_results)
12158     for nname, nresult in node_data.items():
12159       assert nname in node_results, "Missing basic data for node %s" % nname
12160       ninfo = node_cfg[nname]
12161
12162       if not (ninfo.offline or ninfo.drained):
12163         nresult.Raise("Can't get data for node %s" % nname)
12164         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12165                                 nname)
12166         remote_info = nresult.payload
12167
12168         for attr in ['memory_total', 'memory_free', 'memory_dom0',
12169                      'vg_size', 'vg_free', 'cpu_total']:
12170           if attr not in remote_info:
12171             raise errors.OpExecError("Node '%s' didn't return attribute"
12172                                      " '%s'" % (nname, attr))
12173           if not isinstance(remote_info[attr], int):
12174             raise errors.OpExecError("Node '%s' returned invalid value"
12175                                      " for '%s': %s" %
12176                                      (nname, attr, remote_info[attr]))
12177         # compute memory used by primary instances
12178         i_p_mem = i_p_up_mem = 0
12179         for iinfo, beinfo in i_list:
12180           if iinfo.primary_node == nname:
12181             i_p_mem += beinfo[constants.BE_MEMORY]
12182             if iinfo.name not in node_iinfo[nname].payload:
12183               i_used_mem = 0
12184             else:
12185               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
12186             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12187             remote_info['memory_free'] -= max(0, i_mem_diff)
12188
12189             if iinfo.admin_up:
12190               i_p_up_mem += beinfo[constants.BE_MEMORY]
12191
12192         # compute memory used by instances
12193         pnr_dyn = {
12194           "total_memory": remote_info['memory_total'],
12195           "reserved_memory": remote_info['memory_dom0'],
12196           "free_memory": remote_info['memory_free'],
12197           "total_disk": remote_info['vg_size'],
12198           "free_disk": remote_info['vg_free'],
12199           "total_cpus": remote_info['cpu_total'],
12200           "i_pri_memory": i_p_mem,
12201           "i_pri_up_memory": i_p_up_mem,
12202           }
12203         pnr_dyn.update(node_results[nname])
12204         node_results[nname] = pnr_dyn
12205
12206     return node_results
12207
12208   @staticmethod
12209   def _ComputeInstanceData(cluster_info, i_list):
12210     """Compute global instance data.
12211
12212     """
12213     instance_data = {}
12214     for iinfo, beinfo in i_list:
12215       nic_data = []
12216       for nic in iinfo.nics:
12217         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12218         nic_dict = {
12219           "mac": nic.mac,
12220           "ip": nic.ip,
12221           "mode": filled_params[constants.NIC_MODE],
12222           "link": filled_params[constants.NIC_LINK],
12223           }
12224         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12225           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12226         nic_data.append(nic_dict)
12227       pir = {
12228         "tags": list(iinfo.GetTags()),
12229         "admin_up": iinfo.admin_up,
12230         "vcpus": beinfo[constants.BE_VCPUS],
12231         "memory": beinfo[constants.BE_MEMORY],
12232         "os": iinfo.os,
12233         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12234         "nics": nic_data,
12235         "disks": [{constants.IDISK_SIZE: dsk.size,
12236                    constants.IDISK_MODE: dsk.mode}
12237                   for dsk in iinfo.disks],
12238         "disk_template": iinfo.disk_template,
12239         "hypervisor": iinfo.hypervisor,
12240         }
12241       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12242                                                  pir["disks"])
12243       instance_data[iinfo.name] = pir
12244
12245     return instance_data
12246
12247   def _AddNewInstance(self):
12248     """Add new instance data to allocator structure.
12249
12250     This in combination with _AllocatorGetClusterData will create the
12251     correct structure needed as input for the allocator.
12252
12253     The checks for the completeness of the opcode must have already been
12254     done.
12255
12256     """
12257     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12258
12259     if self.disk_template in constants.DTS_INT_MIRROR:
12260       self.required_nodes = 2
12261     else:
12262       self.required_nodes = 1
12263
12264     request = {
12265       "name": self.name,
12266       "disk_template": self.disk_template,
12267       "tags": self.tags,
12268       "os": self.os,
12269       "vcpus": self.vcpus,
12270       "memory": self.memory,
12271       "disks": self.disks,
12272       "disk_space_total": disk_space,
12273       "nics": self.nics,
12274       "required_nodes": self.required_nodes,
12275       "hypervisor": self.hypervisor,
12276       }
12277
12278     return request
12279
12280   def _AddRelocateInstance(self):
12281     """Add relocate instance data to allocator structure.
12282
12283     This in combination with _IAllocatorGetClusterData will create the
12284     correct structure needed as input for the allocator.
12285
12286     The checks for the completeness of the opcode must have already been
12287     done.
12288
12289     """
12290     instance = self.cfg.GetInstanceInfo(self.name)
12291     if instance is None:
12292       raise errors.ProgrammerError("Unknown instance '%s' passed to"
12293                                    " IAllocator" % self.name)
12294
12295     if instance.disk_template not in constants.DTS_MIRRORED:
12296       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12297                                  errors.ECODE_INVAL)
12298
12299     if instance.disk_template in constants.DTS_INT_MIRROR and \
12300         len(instance.secondary_nodes) != 1:
12301       raise errors.OpPrereqError("Instance has not exactly one secondary node",
12302                                  errors.ECODE_STATE)
12303
12304     self.required_nodes = 1
12305     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12306     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12307
12308     request = {
12309       "name": self.name,
12310       "disk_space_total": disk_space,
12311       "required_nodes": self.required_nodes,
12312       "relocate_from": self.relocate_from,
12313       }
12314     return request
12315
12316   def _AddEvacuateNodes(self):
12317     """Add evacuate nodes data to allocator structure.
12318
12319     """
12320     request = {
12321       "evac_nodes": self.evac_nodes
12322       }
12323     return request
12324
12325   def _AddNodeEvacuate(self):
12326     """Get data for node-evacuate requests.
12327
12328     """
12329     return {
12330       "instances": self.instances,
12331       "evac_mode": self.evac_mode,
12332       }
12333
12334   def _AddChangeGroup(self):
12335     """Get data for node-evacuate requests.
12336
12337     """
12338     return {
12339       "instances": self.instances,
12340       "target_groups": self.target_groups,
12341       }
12342
12343   def _BuildInputData(self, fn, keydata):
12344     """Build input data structures.
12345
12346     """
12347     self._ComputeClusterData()
12348
12349     request = fn()
12350     request["type"] = self.mode
12351     for keyname, keytype in keydata:
12352       if keyname not in request:
12353         raise errors.ProgrammerError("Request parameter %s is missing" %
12354                                      keyname)
12355       val = request[keyname]
12356       if not keytype(val):
12357         raise errors.ProgrammerError("Request parameter %s doesn't pass"
12358                                      " validation, value %s, expected"
12359                                      " type %s" % (keyname, val, keytype))
12360     self.in_data["request"] = request
12361
12362     self.in_text = serializer.Dump(self.in_data)
12363
12364   _STRING_LIST = ht.TListOf(ht.TString)
12365   _JOBSET_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12366      # pylint: disable-msg=E1101
12367      # Class '...' has no 'OP_ID' member
12368      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12369                           opcodes.OpInstanceMigrate.OP_ID,
12370                           opcodes.OpInstanceReplaceDisks.OP_ID])
12371      })))
12372   _MODE_DATA = {
12373     constants.IALLOCATOR_MODE_ALLOC:
12374       (_AddNewInstance,
12375        [
12376         ("name", ht.TString),
12377         ("memory", ht.TInt),
12378         ("disks", ht.TListOf(ht.TDict)),
12379         ("disk_template", ht.TString),
12380         ("os", ht.TString),
12381         ("tags", _STRING_LIST),
12382         ("nics", ht.TListOf(ht.TDict)),
12383         ("vcpus", ht.TInt),
12384         ("hypervisor", ht.TString),
12385         ], ht.TList),
12386     constants.IALLOCATOR_MODE_RELOC:
12387       (_AddRelocateInstance,
12388        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12389        ht.TList),
12390     constants.IALLOCATOR_MODE_MEVAC:
12391       (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12392        ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12393      constants.IALLOCATOR_MODE_NODE_EVAC:
12394       (_AddNodeEvacuate, [
12395         ("instances", _STRING_LIST),
12396         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12397         ], _JOBSET_LIST),
12398      constants.IALLOCATOR_MODE_CHG_GROUP:
12399       (_AddChangeGroup, [
12400         ("instances", _STRING_LIST),
12401         ("target_groups", _STRING_LIST),
12402         ], _JOBSET_LIST),
12403     }
12404
12405   def Run(self, name, validate=True, call_fn=None):
12406     """Run an instance allocator and return the results.
12407
12408     """
12409     if call_fn is None:
12410       call_fn = self.rpc.call_iallocator_runner
12411
12412     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12413     result.Raise("Failure while running the iallocator script")
12414
12415     self.out_text = result.payload
12416     if validate:
12417       self._ValidateResult()
12418
12419   def _ValidateResult(self):
12420     """Process the allocator results.
12421
12422     This will process and if successful save the result in
12423     self.out_data and the other parameters.
12424
12425     """
12426     try:
12427       rdict = serializer.Load(self.out_text)
12428     except Exception, err:
12429       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12430
12431     if not isinstance(rdict, dict):
12432       raise errors.OpExecError("Can't parse iallocator results: not a dict")
12433
12434     # TODO: remove backwards compatiblity in later versions
12435     if "nodes" in rdict and "result" not in rdict:
12436       rdict["result"] = rdict["nodes"]
12437       del rdict["nodes"]
12438
12439     for key in "success", "info", "result":
12440       if key not in rdict:
12441         raise errors.OpExecError("Can't parse iallocator results:"
12442                                  " missing key '%s'" % key)
12443       setattr(self, key, rdict[key])
12444
12445     if not self._result_check(self.result):
12446       raise errors.OpExecError("Iallocator returned invalid result,"
12447                                " expected %s, got %s" %
12448                                (self._result_check, self.result),
12449                                errors.ECODE_INVAL)
12450
12451     if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12452                      constants.IALLOCATOR_MODE_MEVAC):
12453       node2group = dict((name, ndata["group"])
12454                         for (name, ndata) in self.in_data["nodes"].items())
12455
12456       fn = compat.partial(self._NodesToGroups, node2group,
12457                           self.in_data["nodegroups"])
12458
12459       if self.mode == constants.IALLOCATOR_MODE_RELOC:
12460         assert self.relocate_from is not None
12461         assert self.required_nodes == 1
12462
12463         request_groups = fn(self.relocate_from)
12464         result_groups = fn(rdict["result"])
12465
12466         if result_groups != request_groups:
12467           raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12468                                    " differ from original groups (%s)" %
12469                                    (utils.CommaJoin(result_groups),
12470                                     utils.CommaJoin(request_groups)))
12471       elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12472         request_groups = fn(self.evac_nodes)
12473         for (instance_name, secnode) in self.result:
12474           result_groups = fn([secnode])
12475           if result_groups != request_groups:
12476             raise errors.OpExecError("Iallocator returned new secondary node"
12477                                      " '%s' (group '%s') for instance '%s'"
12478                                      " which is not in original group '%s'" %
12479                                      (secnode, utils.CommaJoin(result_groups),
12480                                       instance_name,
12481                                       utils.CommaJoin(request_groups)))
12482       else:
12483         raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12484
12485     self.out_data = rdict
12486
12487   @staticmethod
12488   def _NodesToGroups(node2group, groups, nodes):
12489     """Returns a list of unique group names for a list of nodes.
12490
12491     @type node2group: dict
12492     @param node2group: Map from node name to group UUID
12493     @type groups: dict
12494     @param groups: Group information
12495     @type nodes: list
12496     @param nodes: Node names
12497
12498     """
12499     result = set()
12500
12501     for node in nodes:
12502       try:
12503         group_uuid = node2group[node]
12504       except KeyError:
12505         # Ignore unknown node
12506         pass
12507       else:
12508         try:
12509           group = groups[group_uuid]
12510         except KeyError:
12511           # Can't find group, let's use UUID
12512           group_name = group_uuid
12513         else:
12514           group_name = group["name"]
12515
12516         result.add(group_name)
12517
12518     return sorted(result)
12519
12520
12521 class LUTestAllocator(NoHooksLU):
12522   """Run allocator tests.
12523
12524   This LU runs the allocator tests
12525
12526   """
12527   def CheckPrereq(self):
12528     """Check prerequisites.
12529
12530     This checks the opcode parameters depending on the director and mode test.
12531
12532     """
12533     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12534       for attr in ["memory", "disks", "disk_template",
12535                    "os", "tags", "nics", "vcpus"]:
12536         if not hasattr(self.op, attr):
12537           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12538                                      attr, errors.ECODE_INVAL)
12539       iname = self.cfg.ExpandInstanceName(self.op.name)
12540       if iname is not None:
12541         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12542                                    iname, errors.ECODE_EXISTS)
12543       if not isinstance(self.op.nics, list):
12544         raise errors.OpPrereqError("Invalid parameter 'nics'",
12545                                    errors.ECODE_INVAL)
12546       if not isinstance(self.op.disks, list):
12547         raise errors.OpPrereqError("Invalid parameter 'disks'",
12548                                    errors.ECODE_INVAL)
12549       for row in self.op.disks:
12550         if (not isinstance(row, dict) or
12551             constants.IDISK_SIZE not in row or
12552             not isinstance(row[constants.IDISK_SIZE], int) or
12553             constants.IDISK_MODE not in row or
12554             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
12555           raise errors.OpPrereqError("Invalid contents of the 'disks'"
12556                                      " parameter", errors.ECODE_INVAL)
12557       if self.op.hypervisor is None:
12558         self.op.hypervisor = self.cfg.GetHypervisorType()
12559     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12560       fname = _ExpandInstanceName(self.cfg, self.op.name)
12561       self.op.name = fname
12562       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12563     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12564       if not hasattr(self.op, "evac_nodes"):
12565         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12566                                    " opcode input", errors.ECODE_INVAL)
12567     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
12568                           constants.IALLOCATOR_MODE_NODE_EVAC):
12569       if not self.op.instances:
12570         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
12571       self.op.instances = _GetWantedInstances(self, self.op.instances)
12572     else:
12573       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12574                                  self.op.mode, errors.ECODE_INVAL)
12575
12576     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12577       if self.op.allocator is None:
12578         raise errors.OpPrereqError("Missing allocator name",
12579                                    errors.ECODE_INVAL)
12580     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12581       raise errors.OpPrereqError("Wrong allocator test '%s'" %
12582                                  self.op.direction, errors.ECODE_INVAL)
12583
12584   def Exec(self, feedback_fn):
12585     """Run the allocator test.
12586
12587     """
12588     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12589       ial = IAllocator(self.cfg, self.rpc,
12590                        mode=self.op.mode,
12591                        name=self.op.name,
12592                        memory=self.op.memory,
12593                        disks=self.op.disks,
12594                        disk_template=self.op.disk_template,
12595                        os=self.op.os,
12596                        tags=self.op.tags,
12597                        nics=self.op.nics,
12598                        vcpus=self.op.vcpus,
12599                        hypervisor=self.op.hypervisor,
12600                        )
12601     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12602       ial = IAllocator(self.cfg, self.rpc,
12603                        mode=self.op.mode,
12604                        name=self.op.name,
12605                        relocate_from=list(self.relocate_from),
12606                        )
12607     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12608       ial = IAllocator(self.cfg, self.rpc,
12609                        mode=self.op.mode,
12610                        evac_nodes=self.op.evac_nodes)
12611     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
12612       ial = IAllocator(self.cfg, self.rpc,
12613                        mode=self.op.mode,
12614                        instances=self.op.instances,
12615                        target_groups=self.op.target_groups)
12616     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
12617       ial = IAllocator(self.cfg, self.rpc,
12618                        mode=self.op.mode,
12619                        instances=self.op.instances,
12620                        evac_mode=self.op.evac_mode)
12621     else:
12622       raise errors.ProgrammerError("Uncatched mode %s in"
12623                                    " LUTestAllocator.Exec", self.op.mode)
12624
12625     if self.op.direction == constants.IALLOCATOR_DIR_IN:
12626       result = ial.in_text
12627     else:
12628       ial.Run(self.op.allocator, validate=False)
12629       result = ial.out_text
12630     return result
12631
12632
12633 #: Query type implementations
12634 _QUERY_IMPL = {
12635   constants.QR_INSTANCE: _InstanceQuery,
12636   constants.QR_NODE: _NodeQuery,
12637   constants.QR_GROUP: _GroupQuery,
12638   constants.QR_OS: _OsQuery,
12639   }
12640
12641 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12642
12643
12644 def _GetQueryImplementation(name):
12645   """Returns the implemtnation for a query type.
12646
12647   @param name: Query type, must be one of L{constants.QR_VIA_OP}
12648
12649   """
12650   try:
12651     return _QUERY_IMPL[name]
12652   except KeyError:
12653     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12654                                errors.ECODE_INVAL)