code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60 from ganeti import ht
  61
  62 import ganeti.masterd.instance # pylint: disable-msg=W0611
  63
  64
  65 def _SupportsOob(cfg, node):
  66   """Tells if node supports OOB.
  67
  68   @type cfg: L{config.ConfigWriter}
  69   @param cfg: The cluster configuration
  70   @type node: L{objects.Node}
  71   @param node: The node
  72   @return: The OOB script if supported or an empty string otherwise
  73
  74   """
  75   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     self.context = context
 135     self.rpc = rpc
 136     # Dicts used to declare locking needs to mcpu
 137     self.needed_locks = None
 138     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 139     self.add_locks = {}
 140     self.remove_locks = {}
 141     # Used to force good behavior when calling helper functions
 142     self.recalculate_locks = {}
 143     # logging
 144     self.Log = processor.Log # pylint: disable-msg=C0103
 145     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 146     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 147     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 148     # support for dry-run
 149     self.dry_run_result = None
 150     # support for generic debug attribute
 151     if (not hasattr(self.op, "debug_level") or
 152         not isinstance(self.op.debug_level, int)):
 153       self.op.debug_level = 0
 154
 155     # Tasklets
 156     self.tasklets = None
 157
 158     # Validate opcode parameters and set defaults
 159     self.op.Validate(True)
 160
 161     self.CheckArguments()
 162
 163   def CheckArguments(self):
 164     """Check syntactic validity for the opcode arguments.
 165
 166     This method is for doing a simple syntactic check and ensure
 167     validity of opcode parameters, without any cluster-related
 168     checks. While the same can be accomplished in ExpandNames and/or
 169     CheckPrereq, doing these separate is better because:
 170
 171       - ExpandNames is left as as purely a lock-related function
 172       - CheckPrereq is run after we have acquired locks (and possible
 173         waited for them)
 174
 175     The function is allowed to change the self.op attribute so that
 176     later methods can no longer worry about missing parameters.
 177
 178     """
 179     pass
 180
 181   def ExpandNames(self):
 182     """Expand names for this LU.
 183
 184     This method is called before starting to execute the opcode, and it should
 185     update all the parameters of the opcode to their canonical form (e.g. a
 186     short node name must be fully expanded after this method has successfully
 187     completed). This way locking, hooks, logging, etc. can work correctly.
 188
 189     LUs which implement this method must also populate the self.needed_locks
 190     member, as a dict with lock levels as keys, and a list of needed lock names
 191     as values. Rules:
 192
 193       - use an empty dict if you don't need any lock
 194       - if you don't need any lock at a particular level omit that level
 195       - don't put anything for the BGL level
 196       - if you want all locks at a level use locking.ALL_SET as a value
 197
 198     If you need to share locks (rather than acquire them exclusively) at one
 199     level you can modify self.share_locks, setting a true value (usually 1) for
 200     that level. By default locks are not shared.
 201
 202     This function can also define a list of tasklets, which then will be
 203     executed in order instead of the usual LU-level CheckPrereq and Exec
 204     functions, if those are not defined by the LU.
 205
 206     Examples::
 207
 208       # Acquire all nodes and one instance
 209       self.needed_locks = {
 210         locking.LEVEL_NODE: locking.ALL_SET,
 211         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 212       }
 213       # Acquire just two nodes
 214       self.needed_locks = {
 215         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 216       }
 217       # Acquire no locks
 218       self.needed_locks = {} # No, you can't leave it to the default value None
 219
 220     """
 221     # The implementation of this method is mandatory only if the new LU is
 222     # concurrent, so that old LUs don't need to be changed all at the same
 223     # time.
 224     if self.REQ_BGL:
 225       self.needed_locks = {} # Exclusive LUs don't need locks.
 226     else:
 227       raise NotImplementedError
 228
 229   def DeclareLocks(self, level):
 230     """Declare LU locking needs for a level
 231
 232     While most LUs can just declare their locking needs at ExpandNames time,
 233     sometimes there's the need to calculate some locks after having acquired
 234     the ones before. This function is called just before acquiring locks at a
 235     particular level, but after acquiring the ones at lower levels, and permits
 236     such calculations. It can be used to modify self.needed_locks, and by
 237     default it does nothing.
 238
 239     This function is only called if you have something already set in
 240     self.needed_locks for the level.
 241
 242     @param level: Locking level which is going to be locked
 243     @type level: member of ganeti.locking.LEVELS
 244
 245     """
 246
 247   def CheckPrereq(self):
 248     """Check prerequisites for this LU.
 249
 250     This method should check that the prerequisites for the execution
 251     of this LU are fulfilled. It can do internode communication, but
 252     it should be idempotent - no cluster or system changes are
 253     allowed.
 254
 255     The method should raise errors.OpPrereqError in case something is
 256     not fulfilled. Its return value is ignored.
 257
 258     This method should also update all the parameters of the opcode to
 259     their canonical form if it hasn't been done by ExpandNames before.
 260
 261     """
 262     if self.tasklets is not None:
 263       for (idx, tl) in enumerate(self.tasklets):
 264         logging.debug("Checking prerequisites for tasklet %s/%s",
 265                       idx + 1, len(self.tasklets))
 266         tl.CheckPrereq()
 267     else:
 268       pass
 269
 270   def Exec(self, feedback_fn):
 271     """Execute the LU.
 272
 273     This method should implement the actual work. It should raise
 274     errors.OpExecError for failures that are somewhat dealt with in
 275     code, or expected.
 276
 277     """
 278     if self.tasklets is not None:
 279       for (idx, tl) in enumerate(self.tasklets):
 280         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 281         tl.Exec(feedback_fn)
 282     else:
 283       raise NotImplementedError
 284
 285   def BuildHooksEnv(self):
 286     """Build hooks environment for this LU.
 287
 288     @rtype: dict
 289     @return: Dictionary containing the environment that will be used for
 290       running the hooks for this LU. The keys of the dict must not be prefixed
 291       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 292       will extend the environment with additional variables. If no environment
 293       should be defined, an empty dictionary should be returned (not C{None}).
 294     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 295       will not be called.
 296
 297     """
 298     raise NotImplementedError
 299
 300   def BuildHooksNodes(self):
 301     """Build list of nodes to run LU's hooks.
 302
 303     @rtype: tuple; (list, list)
 304     @return: Tuple containing a list of node names on which the hook
 305       should run before the execution and a list of node names on which the
 306       hook should run after the execution. No nodes should be returned as an
 307       empty list (and not None).
 308     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 309       will not be called.
 310
 311     """
 312     raise NotImplementedError
 313
 314   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 315     """Notify the LU about the results of its hooks.
 316
 317     This method is called every time a hooks phase is executed, and notifies
 318     the Logical Unit about the hooks' result. The LU can then use it to alter
 319     its result based on the hooks.  By default the method does nothing and the
 320     previous result is passed back unchanged but any LU can define it if it
 321     wants to use the local cluster hook-scripts somehow.
 322
 323     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 324         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 325     @param hook_results: the results of the multi-node hooks rpc call
 326     @param feedback_fn: function used send feedback back to the caller
 327     @param lu_result: the previous Exec result this LU had, or None
 328         in the PRE phase
 329     @return: the new Exec result, based on the previous result
 330         and hook results
 331
 332     """
 333     # API must be kept, thus we ignore the unused argument and could
 334     # be a function warnings
 335     # pylint: disable-msg=W0613,R0201
 336     return lu_result
 337
 338   def _ExpandAndLockInstance(self):
 339     """Helper function to expand and lock an instance.
 340
 341     Many LUs that work on an instance take its name in self.op.instance_name
 342     and need to expand it and then declare the expanded name for locking. This
 343     function does it, and then updates self.op.instance_name to the expanded
 344     name. It also initializes needed_locks as a dict, if this hasn't been done
 345     before.
 346
 347     """
 348     if self.needed_locks is None:
 349       self.needed_locks = {}
 350     else:
 351       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 352         "_ExpandAndLockInstance called with instance-level locks set"
 353     self.op.instance_name = _ExpandInstanceName(self.cfg,
 354                                                 self.op.instance_name)
 355     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 356
 357   def _LockInstancesNodes(self, primary_only=False):
 358     """Helper function to declare instances' nodes for locking.
 359
 360     This function should be called after locking one or more instances to lock
 361     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 362     with all primary or secondary nodes for instances already locked and
 363     present in self.needed_locks[locking.LEVEL_INSTANCE].
 364
 365     It should be called from DeclareLocks, and for safety only works if
 366     self.recalculate_locks[locking.LEVEL_NODE] is set.
 367
 368     In the future it may grow parameters to just lock some instance's nodes, or
 369     to just lock primaries or secondary nodes, if needed.
 370
 371     If should be called in DeclareLocks in a way similar to::
 372
 373       if level == locking.LEVEL_NODE:
 374         self._LockInstancesNodes()
 375
 376     @type primary_only: boolean
 377     @param primary_only: only lock primary nodes of locked instances
 378
 379     """
 380     assert locking.LEVEL_NODE in self.recalculate_locks, \
 381       "_LockInstancesNodes helper function called with no nodes to recalculate"
 382
 383     # TODO: check if we're really been called with the instance locks held
 384
 385     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 386     # future we might want to have different behaviors depending on the value
 387     # of self.recalculate_locks[locking.LEVEL_NODE]
 388     wanted_nodes = []
 389     for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
 390       instance = self.context.cfg.GetInstanceInfo(instance_name)
 391       wanted_nodes.append(instance.primary_node)
 392       if not primary_only:
 393         wanted_nodes.extend(instance.secondary_nodes)
 394
 395     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 396       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 397     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 398       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 399
 400     del self.recalculate_locks[locking.LEVEL_NODE]
 401
 402
 403 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 404   """Simple LU which runs no hooks.
 405
 406   This LU is intended as a parent for other LogicalUnits which will
 407   run no hooks, in order to reduce duplicate code.
 408
 409   """
 410   HPATH = None
 411   HTYPE = None
 412
 413   def BuildHooksEnv(self):
 414     """Empty BuildHooksEnv for NoHooksLu.
 415
 416     This just raises an error.
 417
 418     """
 419     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 420
 421   def BuildHooksNodes(self):
 422     """Empty BuildHooksNodes for NoHooksLU.
 423
 424     """
 425     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 426
 427
 428 class Tasklet:
 429   """Tasklet base class.
 430
 431   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 432   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 433   tasklets know nothing about locks.
 434
 435   Subclasses must follow these rules:
 436     - Implement CheckPrereq
 437     - Implement Exec
 438
 439   """
 440   def __init__(self, lu):
 441     self.lu = lu
 442
 443     # Shortcuts
 444     self.cfg = lu.cfg
 445     self.rpc = lu.rpc
 446
 447   def CheckPrereq(self):
 448     """Check prerequisites for this tasklets.
 449
 450     This method should check whether the prerequisites for the execution of
 451     this tasklet are fulfilled. It can do internode communication, but it
 452     should be idempotent - no cluster or system changes are allowed.
 453
 454     The method should raise errors.OpPrereqError in case something is not
 455     fulfilled. Its return value is ignored.
 456
 457     This method should also update all parameters to their canonical form if it
 458     hasn't been done before.
 459
 460     """
 461     pass
 462
 463   def Exec(self, feedback_fn):
 464     """Execute the tasklet.
 465
 466     This method should implement the actual work. It should raise
 467     errors.OpExecError for failures that are somewhat dealt with in code, or
 468     expected.
 469
 470     """
 471     raise NotImplementedError
 472
 473
 474 class _QueryBase:
 475   """Base for query utility classes.
 476
 477   """
 478   #: Attribute holding field definitions
 479   FIELDS = None
 480
 481   def __init__(self, filter_, fields, use_locking):
 482     """Initializes this class.
 483
 484     """
 485     self.use_locking = use_locking
 486
 487     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 488                              namefield="name")
 489     self.requested_data = self.query.RequestedData()
 490     self.names = self.query.RequestedNames()
 491
 492     # Sort only if no names were requested
 493     self.sort_by_name = not self.names
 494
 495     self.do_locking = None
 496     self.wanted = None
 497
 498   def _GetNames(self, lu, all_names, lock_level):
 499     """Helper function to determine names asked for in the query.
 500
 501     """
 502     if self.do_locking:
 503       names = lu.glm.list_owned(lock_level)
 504     else:
 505       names = all_names
 506
 507     if self.wanted == locking.ALL_SET:
 508       assert not self.names
 509       # caller didn't specify names, so ordering is not important
 510       return utils.NiceSort(names)
 511
 512     # caller specified names and we must keep the same order
 513     assert self.names
 514     assert not self.do_locking or lu.glm.is_owned(lock_level)
 515
 516     missing = set(self.wanted).difference(names)
 517     if missing:
 518       raise errors.OpExecError("Some items were removed before retrieving"
 519                                " their data: %s" % missing)
 520
 521     # Return expanded names
 522     return self.wanted
 523
 524   def ExpandNames(self, lu):
 525     """Expand names for this query.
 526
 527     See L{LogicalUnit.ExpandNames}.
 528
 529     """
 530     raise NotImplementedError()
 531
 532   def DeclareLocks(self, lu, level):
 533     """Declare locks for this query.
 534
 535     See L{LogicalUnit.DeclareLocks}.
 536
 537     """
 538     raise NotImplementedError()
 539
 540   def _GetQueryData(self, lu):
 541     """Collects all data for this query.
 542
 543     @return: Query data object
 544
 545     """
 546     raise NotImplementedError()
 547
 548   def NewStyleQuery(self, lu):
 549     """Collect data and execute query.
 550
 551     """
 552     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 553                                   sort_by_name=self.sort_by_name)
 554
 555   def OldStyleQuery(self, lu):
 556     """Collect data and execute query.
 557
 558     """
 559     return self.query.OldStyleQuery(self._GetQueryData(lu),
 560                                     sort_by_name=self.sort_by_name)
 561
 562
 563 def _GetWantedNodes(lu, nodes):
 564   """Returns list of checked and expanded node names.
 565
 566   @type lu: L{LogicalUnit}
 567   @param lu: the logical unit on whose behalf we execute
 568   @type nodes: list
 569   @param nodes: list of node names or None for all nodes
 570   @rtype: list
 571   @return: the list of nodes, sorted
 572   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 573
 574   """
 575   if nodes:
 576     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 577
 578   return utils.NiceSort(lu.cfg.GetNodeList())
 579
 580
 581 def _GetWantedInstances(lu, instances):
 582   """Returns list of checked and expanded instance names.
 583
 584   @type lu: L{LogicalUnit}
 585   @param lu: the logical unit on whose behalf we execute
 586   @type instances: list
 587   @param instances: list of instance names or None for all instances
 588   @rtype: list
 589   @return: the list of instances, sorted
 590   @raise errors.OpPrereqError: if the instances parameter is wrong type
 591   @raise errors.OpPrereqError: if any of the passed instances is not found
 592
 593   """
 594   if instances:
 595     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 596   else:
 597     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 598   return wanted
 599
 600
 601 def _GetUpdatedParams(old_params, update_dict,
 602                       use_default=True, use_none=False):
 603   """Return the new version of a parameter dictionary.
 604
 605   @type old_params: dict
 606   @param old_params: old parameters
 607   @type update_dict: dict
 608   @param update_dict: dict containing new parameter values, or
 609       constants.VALUE_DEFAULT to reset the parameter to its default
 610       value
 611   @param use_default: boolean
 612   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 613       values as 'to be deleted' values
 614   @param use_none: boolean
 615   @type use_none: whether to recognise C{None} values as 'to be
 616       deleted' values
 617   @rtype: dict
 618   @return: the new parameter dictionary
 619
 620   """
 621   params_copy = copy.deepcopy(old_params)
 622   for key, val in update_dict.iteritems():
 623     if ((use_default and val == constants.VALUE_DEFAULT) or
 624         (use_none and val is None)):
 625       try:
 626         del params_copy[key]
 627       except KeyError:
 628         pass
 629     else:
 630       params_copy[key] = val
 631   return params_copy
 632
 633
 634 def _ReleaseLocks(lu, level, names=None, keep=None):
 635   """Releases locks owned by an LU.
 636
 637   @type lu: L{LogicalUnit}
 638   @param level: Lock level
 639   @type names: list or None
 640   @param names: Names of locks to release
 641   @type keep: list or None
 642   @param keep: Names of locks to retain
 643
 644   """
 645   assert not (keep is not None and names is not None), \
 646          "Only one of the 'names' and the 'keep' parameters can be given"
 647
 648   if names is not None:
 649     should_release = names.__contains__
 650   elif keep:
 651     should_release = lambda name: name not in keep
 652   else:
 653     should_release = None
 654
 655   if should_release:
 656     retain = []
 657     release = []
 658
 659     # Determine which locks to release
 660     for name in lu.glm.list_owned(level):
 661       if should_release(name):
 662         release.append(name)
 663       else:
 664         retain.append(name)
 665
 666     assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
 667
 668     # Release just some locks
 669     lu.glm.release(level, names=release)
 670
 671     assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
 672   else:
 673     # Release everything
 674     lu.glm.release(level)
 675
 676     assert not lu.glm.is_owned(level), "No locks should be owned"
 677
 678
 679 def _RunPostHook(lu, node_name):
 680   """Runs the post-hook for an opcode on a single node.
 681
 682   """
 683   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 684   try:
 685     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 686   except:
 687     # pylint: disable-msg=W0702
 688     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 689
 690
 691 def _CheckOutputFields(static, dynamic, selected):
 692   """Checks whether all selected fields are valid.
 693
 694   @type static: L{utils.FieldSet}
 695   @param static: static fields set
 696   @type dynamic: L{utils.FieldSet}
 697   @param dynamic: dynamic fields set
 698
 699   """
 700   f = utils.FieldSet()
 701   f.Extend(static)
 702   f.Extend(dynamic)
 703
 704   delta = f.NonMatching(selected)
 705   if delta:
 706     raise errors.OpPrereqError("Unknown output fields selected: %s"
 707                                % ",".join(delta), errors.ECODE_INVAL)
 708
 709
 710 def _CheckGlobalHvParams(params):
 711   """Validates that given hypervisor params are not global ones.
 712
 713   This will ensure that instances don't get customised versions of
 714   global params.
 715
 716   """
 717   used_globals = constants.HVC_GLOBALS.intersection(params)
 718   if used_globals:
 719     msg = ("The following hypervisor parameters are global and cannot"
 720            " be customized at instance level, please modify them at"
 721            " cluster level: %s" % utils.CommaJoin(used_globals))
 722     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 723
 724
 725 def _CheckNodeOnline(lu, node, msg=None):
 726   """Ensure that a given node is online.
 727
 728   @param lu: the LU on behalf of which we make the check
 729   @param node: the node to check
 730   @param msg: if passed, should be a message to replace the default one
 731   @raise errors.OpPrereqError: if the node is offline
 732
 733   """
 734   if msg is None:
 735     msg = "Can't use offline node"
 736   if lu.cfg.GetNodeInfo(node).offline:
 737     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 738
 739
 740 def _CheckNodeNotDrained(lu, node):
 741   """Ensure that a given node is not drained.
 742
 743   @param lu: the LU on behalf of which we make the check
 744   @param node: the node to check
 745   @raise errors.OpPrereqError: if the node is drained
 746
 747   """
 748   if lu.cfg.GetNodeInfo(node).drained:
 749     raise errors.OpPrereqError("Can't use drained node %s" % node,
 750                                errors.ECODE_STATE)
 751
 752
 753 def _CheckNodeVmCapable(lu, node):
 754   """Ensure that a given node is vm capable.
 755
 756   @param lu: the LU on behalf of which we make the check
 757   @param node: the node to check
 758   @raise errors.OpPrereqError: if the node is not vm capable
 759
 760   """
 761   if not lu.cfg.GetNodeInfo(node).vm_capable:
 762     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 763                                errors.ECODE_STATE)
 764
 765
 766 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 767   """Ensure that a node supports a given OS.
 768
 769   @param lu: the LU on behalf of which we make the check
 770   @param node: the node to check
 771   @param os_name: the OS to query about
 772   @param force_variant: whether to ignore variant errors
 773   @raise errors.OpPrereqError: if the node is not supporting the OS
 774
 775   """
 776   result = lu.rpc.call_os_get(node, os_name)
 777   result.Raise("OS '%s' not in supported OS list for node %s" %
 778                (os_name, node),
 779                prereq=True, ecode=errors.ECODE_INVAL)
 780   if not force_variant:
 781     _CheckOSVariant(result.payload, os_name)
 782
 783
 784 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 785   """Ensure that a node has the given secondary ip.
 786
 787   @type lu: L{LogicalUnit}
 788   @param lu: the LU on behalf of which we make the check
 789   @type node: string
 790   @param node: the node to check
 791   @type secondary_ip: string
 792   @param secondary_ip: the ip to check
 793   @type prereq: boolean
 794   @param prereq: whether to throw a prerequisite or an execute error
 795   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 796   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 797
 798   """
 799   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 800   result.Raise("Failure checking secondary ip on node %s" % node,
 801                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 802   if not result.payload:
 803     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 804            " please fix and re-run this command" % secondary_ip)
 805     if prereq:
 806       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 807     else:
 808       raise errors.OpExecError(msg)
 809
 810
 811 def _GetClusterDomainSecret():
 812   """Reads the cluster domain secret.
 813
 814   """
 815   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 816                                strict=True)
 817
 818
 819 def _CheckInstanceDown(lu, instance, reason):
 820   """Ensure that an instance is not running."""
 821   if instance.admin_up:
 822     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 823                                (instance.name, reason), errors.ECODE_STATE)
 824
 825   pnode = instance.primary_node
 826   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 827   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 828               prereq=True, ecode=errors.ECODE_ENVIRON)
 829
 830   if instance.name in ins_l.payload:
 831     raise errors.OpPrereqError("Instance %s is running, %s" %
 832                                (instance.name, reason), errors.ECODE_STATE)
 833
 834
 835 def _ExpandItemName(fn, name, kind):
 836   """Expand an item name.
 837
 838   @param fn: the function to use for expansion
 839   @param name: requested item name
 840   @param kind: text description ('Node' or 'Instance')
 841   @return: the resolved (full) name
 842   @raise errors.OpPrereqError: if the item is not found
 843
 844   """
 845   full_name = fn(name)
 846   if full_name is None:
 847     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 848                                errors.ECODE_NOENT)
 849   return full_name
 850
 851
 852 def _ExpandNodeName(cfg, name):
 853   """Wrapper over L{_ExpandItemName} for nodes."""
 854   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 855
 856
 857 def _ExpandInstanceName(cfg, name):
 858   """Wrapper over L{_ExpandItemName} for instance."""
 859   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 860
 861
 862 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 863                           memory, vcpus, nics, disk_template, disks,
 864                           bep, hvp, hypervisor_name, tags):
 865   """Builds instance related env variables for hooks
 866
 867   This builds the hook environment from individual variables.
 868
 869   @type name: string
 870   @param name: the name of the instance
 871   @type primary_node: string
 872   @param primary_node: the name of the instance's primary node
 873   @type secondary_nodes: list
 874   @param secondary_nodes: list of secondary nodes as strings
 875   @type os_type: string
 876   @param os_type: the name of the instance's OS
 877   @type status: boolean
 878   @param status: the should_run status of the instance
 879   @type memory: string
 880   @param memory: the memory size of the instance
 881   @type vcpus: string
 882   @param vcpus: the count of VCPUs the instance has
 883   @type nics: list
 884   @param nics: list of tuples (ip, mac, mode, link) representing
 885       the NICs the instance has
 886   @type disk_template: string
 887   @param disk_template: the disk template of the instance
 888   @type disks: list
 889   @param disks: the list of (size, mode) pairs
 890   @type bep: dict
 891   @param bep: the backend parameters for the instance
 892   @type hvp: dict
 893   @param hvp: the hypervisor parameters for the instance
 894   @type hypervisor_name: string
 895   @param hypervisor_name: the hypervisor for the instance
 896   @type tags: list
 897   @param tags: list of instance tags as strings
 898   @rtype: dict
 899   @return: the hook environment for this instance
 900
 901   """
 902   if status:
 903     str_status = "up"
 904   else:
 905     str_status = "down"
 906   env = {
 907     "OP_TARGET": name,
 908     "INSTANCE_NAME": name,
 909     "INSTANCE_PRIMARY": primary_node,
 910     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 911     "INSTANCE_OS_TYPE": os_type,
 912     "INSTANCE_STATUS": str_status,
 913     "INSTANCE_MEMORY": memory,
 914     "INSTANCE_VCPUS": vcpus,
 915     "INSTANCE_DISK_TEMPLATE": disk_template,
 916     "INSTANCE_HYPERVISOR": hypervisor_name,
 917   }
 918
 919   if nics:
 920     nic_count = len(nics)
 921     for idx, (ip, mac, mode, link) in enumerate(nics):
 922       if ip is None:
 923         ip = ""
 924       env["INSTANCE_NIC%d_IP" % idx] = ip
 925       env["INSTANCE_NIC%d_MAC" % idx] = mac
 926       env["INSTANCE_NIC%d_MODE" % idx] = mode
 927       env["INSTANCE_NIC%d_LINK" % idx] = link
 928       if mode == constants.NIC_MODE_BRIDGED:
 929         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 930   else:
 931     nic_count = 0
 932
 933   env["INSTANCE_NIC_COUNT"] = nic_count
 934
 935   if disks:
 936     disk_count = len(disks)
 937     for idx, (size, mode) in enumerate(disks):
 938       env["INSTANCE_DISK%d_SIZE" % idx] = size
 939       env["INSTANCE_DISK%d_MODE" % idx] = mode
 940   else:
 941     disk_count = 0
 942
 943   env["INSTANCE_DISK_COUNT"] = disk_count
 944
 945   if not tags:
 946     tags = []
 947
 948   env["INSTANCE_TAGS"] = " ".join(tags)
 949
 950   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 951     for key, value in source.items():
 952       env["INSTANCE_%s_%s" % (kind, key)] = value
 953
 954   return env
 955
 956
 957 def _NICListToTuple(lu, nics):
 958   """Build a list of nic information tuples.
 959
 960   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 961   value in LUInstanceQueryData.
 962
 963   @type lu:  L{LogicalUnit}
 964   @param lu: the logical unit on whose behalf we execute
 965   @type nics: list of L{objects.NIC}
 966   @param nics: list of nics to convert to hooks tuples
 967
 968   """
 969   hooks_nics = []
 970   cluster = lu.cfg.GetClusterInfo()
 971   for nic in nics:
 972     ip = nic.ip
 973     mac = nic.mac
 974     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 975     mode = filled_params[constants.NIC_MODE]
 976     link = filled_params[constants.NIC_LINK]
 977     hooks_nics.append((ip, mac, mode, link))
 978   return hooks_nics
 979
 980
 981 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 982   """Builds instance related env variables for hooks from an object.
 983
 984   @type lu: L{LogicalUnit}
 985   @param lu: the logical unit on whose behalf we execute
 986   @type instance: L{objects.Instance}
 987   @param instance: the instance for which we should build the
 988       environment
 989   @type override: dict
 990   @param override: dictionary with key/values that will override
 991       our values
 992   @rtype: dict
 993   @return: the hook environment dictionary
 994
 995   """
 996   cluster = lu.cfg.GetClusterInfo()
 997   bep = cluster.FillBE(instance)
 998   hvp = cluster.FillHV(instance)
 999   args = {
1000     'name': instance.name,
1001     'primary_node': instance.primary_node,
1002     'secondary_nodes': instance.secondary_nodes,
1003     'os_type': instance.os,
1004     'status': instance.admin_up,
1005     'memory': bep[constants.BE_MEMORY],
1006     'vcpus': bep[constants.BE_VCPUS],
1007     'nics': _NICListToTuple(lu, instance.nics),
1008     'disk_template': instance.disk_template,
1009     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1010     'bep': bep,
1011     'hvp': hvp,
1012     'hypervisor_name': instance.hypervisor,
1013     'tags': instance.tags,
1014   }
1015   if override:
1016     args.update(override)
1017   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1018
1019
1020 def _AdjustCandidatePool(lu, exceptions):
1021   """Adjust the candidate pool after node operations.
1022
1023   """
1024   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1025   if mod_list:
1026     lu.LogInfo("Promoted nodes to master candidate role: %s",
1027                utils.CommaJoin(node.name for node in mod_list))
1028     for name in mod_list:
1029       lu.context.ReaddNode(name)
1030   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1031   if mc_now > mc_max:
1032     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1033                (mc_now, mc_max))
1034
1035
1036 def _DecideSelfPromotion(lu, exceptions=None):
1037   """Decide whether I should promote myself as a master candidate.
1038
1039   """
1040   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1041   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1042   # the new node will increase mc_max with one, so:
1043   mc_should = min(mc_should + 1, cp_size)
1044   return mc_now < mc_should
1045
1046
1047 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1048   """Check that the brigdes needed by a list of nics exist.
1049
1050   """
1051   cluster = lu.cfg.GetClusterInfo()
1052   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1053   brlist = [params[constants.NIC_LINK] for params in paramslist
1054             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1055   if brlist:
1056     result = lu.rpc.call_bridges_exist(target_node, brlist)
1057     result.Raise("Error checking bridges on destination node '%s'" %
1058                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1059
1060
1061 def _CheckInstanceBridgesExist(lu, instance, node=None):
1062   """Check that the brigdes needed by an instance exist.
1063
1064   """
1065   if node is None:
1066     node = instance.primary_node
1067   _CheckNicsBridgesExist(lu, instance.nics, node)
1068
1069
1070 def _CheckOSVariant(os_obj, name):
1071   """Check whether an OS name conforms to the os variants specification.
1072
1073   @type os_obj: L{objects.OS}
1074   @param os_obj: OS object to check
1075   @type name: string
1076   @param name: OS name passed by the user, to check for validity
1077
1078   """
1079   if not os_obj.supported_variants:
1080     return
1081   variant = objects.OS.GetVariant(name)
1082   if not variant:
1083     raise errors.OpPrereqError("OS name must include a variant",
1084                                errors.ECODE_INVAL)
1085
1086   if variant not in os_obj.supported_variants:
1087     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1088
1089
1090 def _GetNodeInstancesInner(cfg, fn):
1091   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1092
1093
1094 def _GetNodeInstances(cfg, node_name):
1095   """Returns a list of all primary and secondary instances on a node.
1096
1097   """
1098
1099   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1100
1101
1102 def _GetNodePrimaryInstances(cfg, node_name):
1103   """Returns primary instances on a node.
1104
1105   """
1106   return _GetNodeInstancesInner(cfg,
1107                                 lambda inst: node_name == inst.primary_node)
1108
1109
1110 def _GetNodeSecondaryInstances(cfg, node_name):
1111   """Returns secondary instances on a node.
1112
1113   """
1114   return _GetNodeInstancesInner(cfg,
1115                                 lambda inst: node_name in inst.secondary_nodes)
1116
1117
1118 def _GetStorageTypeArgs(cfg, storage_type):
1119   """Returns the arguments for a storage type.
1120
1121   """
1122   # Special case for file storage
1123   if storage_type == constants.ST_FILE:
1124     # storage.FileStorage wants a list of storage directories
1125     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1126
1127   return []
1128
1129
1130 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1131   faulty = []
1132
1133   for dev in instance.disks:
1134     cfg.SetDiskID(dev, node_name)
1135
1136   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1137   result.Raise("Failed to get disk status from node %s" % node_name,
1138                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1139
1140   for idx, bdev_status in enumerate(result.payload):
1141     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1142       faulty.append(idx)
1143
1144   return faulty
1145
1146
1147 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1148   """Check the sanity of iallocator and node arguments and use the
1149   cluster-wide iallocator if appropriate.
1150
1151   Check that at most one of (iallocator, node) is specified. If none is
1152   specified, then the LU's opcode's iallocator slot is filled with the
1153   cluster-wide default iallocator.
1154
1155   @type iallocator_slot: string
1156   @param iallocator_slot: the name of the opcode iallocator slot
1157   @type node_slot: string
1158   @param node_slot: the name of the opcode target node slot
1159
1160   """
1161   node = getattr(lu.op, node_slot, None)
1162   iallocator = getattr(lu.op, iallocator_slot, None)
1163
1164   if node is not None and iallocator is not None:
1165     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1166                                errors.ECODE_INVAL)
1167   elif node is None and iallocator is None:
1168     default_iallocator = lu.cfg.GetDefaultIAllocator()
1169     if default_iallocator:
1170       setattr(lu.op, iallocator_slot, default_iallocator)
1171     else:
1172       raise errors.OpPrereqError("No iallocator or node given and no"
1173                                  " cluster-wide default iallocator found;"
1174                                  " please specify either an iallocator or a"
1175                                  " node, or set a cluster-wide default"
1176                                  " iallocator")
1177
1178
1179 class LUClusterPostInit(LogicalUnit):
1180   """Logical unit for running hooks after cluster initialization.
1181
1182   """
1183   HPATH = "cluster-init"
1184   HTYPE = constants.HTYPE_CLUSTER
1185
1186   def BuildHooksEnv(self):
1187     """Build hooks env.
1188
1189     """
1190     return {
1191       "OP_TARGET": self.cfg.GetClusterName(),
1192       }
1193
1194   def BuildHooksNodes(self):
1195     """Build hooks nodes.
1196
1197     """
1198     return ([], [self.cfg.GetMasterNode()])
1199
1200   def Exec(self, feedback_fn):
1201     """Nothing to do.
1202
1203     """
1204     return True
1205
1206
1207 class LUClusterDestroy(LogicalUnit):
1208   """Logical unit for destroying the cluster.
1209
1210   """
1211   HPATH = "cluster-destroy"
1212   HTYPE = constants.HTYPE_CLUSTER
1213
1214   def BuildHooksEnv(self):
1215     """Build hooks env.
1216
1217     """
1218     return {
1219       "OP_TARGET": self.cfg.GetClusterName(),
1220       }
1221
1222   def BuildHooksNodes(self):
1223     """Build hooks nodes.
1224
1225     """
1226     return ([], [])
1227
1228   def CheckPrereq(self):
1229     """Check prerequisites.
1230
1231     This checks whether the cluster is empty.
1232
1233     Any errors are signaled by raising errors.OpPrereqError.
1234
1235     """
1236     master = self.cfg.GetMasterNode()
1237
1238     nodelist = self.cfg.GetNodeList()
1239     if len(nodelist) != 1 or nodelist[0] != master:
1240       raise errors.OpPrereqError("There are still %d node(s) in"
1241                                  " this cluster." % (len(nodelist) - 1),
1242                                  errors.ECODE_INVAL)
1243     instancelist = self.cfg.GetInstanceList()
1244     if instancelist:
1245       raise errors.OpPrereqError("There are still %d instance(s) in"
1246                                  " this cluster." % len(instancelist),
1247                                  errors.ECODE_INVAL)
1248
1249   def Exec(self, feedback_fn):
1250     """Destroys the cluster.
1251
1252     """
1253     master = self.cfg.GetMasterNode()
1254
1255     # Run post hooks on master node before it's removed
1256     _RunPostHook(self, master)
1257
1258     result = self.rpc.call_node_stop_master(master, False)
1259     result.Raise("Could not disable the master role")
1260
1261     return master
1262
1263
1264 def _VerifyCertificate(filename):
1265   """Verifies a certificate for L{LUClusterVerifyConfig}.
1266
1267   @type filename: string
1268   @param filename: Path to PEM file
1269
1270   """
1271   try:
1272     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1273                                            utils.ReadFile(filename))
1274   except Exception, err: # pylint: disable-msg=W0703
1275     return (LUClusterVerifyConfig.ETYPE_ERROR,
1276             "Failed to load X509 certificate %s: %s" % (filename, err))
1277
1278   (errcode, msg) = \
1279     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1280                                 constants.SSL_CERT_EXPIRATION_ERROR)
1281
1282   if msg:
1283     fnamemsg = "While verifying %s: %s" % (filename, msg)
1284   else:
1285     fnamemsg = None
1286
1287   if errcode is None:
1288     return (None, fnamemsg)
1289   elif errcode == utils.CERT_WARNING:
1290     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1291   elif errcode == utils.CERT_ERROR:
1292     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1293
1294   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1295
1296
1297 def _GetAllHypervisorParameters(cluster, instances):
1298   """Compute the set of all hypervisor parameters.
1299
1300   @type cluster: L{objects.Cluster}
1301   @param cluster: the cluster object
1302   @param instances: list of L{objects.Instance}
1303   @param instances: additional instances from which to obtain parameters
1304   @rtype: list of (origin, hypervisor, parameters)
1305   @return: a list with all parameters found, indicating the hypervisor they
1306        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1307
1308   """
1309   hvp_data = []
1310
1311   for hv_name in cluster.enabled_hypervisors:
1312     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1313
1314   for os_name, os_hvp in cluster.os_hvp.items():
1315     for hv_name, hv_params in os_hvp.items():
1316       if hv_params:
1317         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1318         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1319
1320   # TODO: collapse identical parameter values in a single one
1321   for instance in instances:
1322     if instance.hvparams:
1323       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1324                        cluster.FillHV(instance)))
1325
1326   return hvp_data
1327
1328
1329 class _VerifyErrors(object):
1330   """Mix-in for cluster/group verify LUs.
1331
1332   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1333   self.op and self._feedback_fn to be available.)
1334
1335   """
1336   TCLUSTER = "cluster"
1337   TNODE = "node"
1338   TINSTANCE = "instance"
1339
1340   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1341   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1342   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1343   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1344   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1345   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1346   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1347   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1348   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1349   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1350   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1351   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1352   ENODEDRBD = (TNODE, "ENODEDRBD")
1353   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1354   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1355   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1356   ENODEHV = (TNODE, "ENODEHV")
1357   ENODELVM = (TNODE, "ENODELVM")
1358   ENODEN1 = (TNODE, "ENODEN1")
1359   ENODENET = (TNODE, "ENODENET")
1360   ENODEOS = (TNODE, "ENODEOS")
1361   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1362   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1363   ENODERPC = (TNODE, "ENODERPC")
1364   ENODESSH = (TNODE, "ENODESSH")
1365   ENODEVERSION = (TNODE, "ENODEVERSION")
1366   ENODESETUP = (TNODE, "ENODESETUP")
1367   ENODETIME = (TNODE, "ENODETIME")
1368   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1369
1370   ETYPE_FIELD = "code"
1371   ETYPE_ERROR = "ERROR"
1372   ETYPE_WARNING = "WARNING"
1373
1374   def _Error(self, ecode, item, msg, *args, **kwargs):
1375     """Format an error message.
1376
1377     Based on the opcode's error_codes parameter, either format a
1378     parseable error code, or a simpler error string.
1379
1380     This must be called only from Exec and functions called from Exec.
1381
1382     """
1383     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1384     itype, etxt = ecode
1385     # first complete the msg
1386     if args:
1387       msg = msg % args
1388     # then format the whole message
1389     if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1390       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1391     else:
1392       if item:
1393         item = " " + item
1394       else:
1395         item = ""
1396       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1397     # and finally report it via the feedback_fn
1398     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1399
1400   def _ErrorIf(self, cond, *args, **kwargs):
1401     """Log an error message if the passed condition is True.
1402
1403     """
1404     cond = (bool(cond)
1405             or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1406     if cond:
1407       self._Error(*args, **kwargs)
1408     # do not mark the operation as failed for WARN cases only
1409     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1410       self.bad = self.bad or cond
1411
1412
1413 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1414   """Verifies the cluster config.
1415
1416   """
1417   REQ_BGL = True
1418
1419   def _VerifyHVP(self, hvp_data):
1420     """Verifies locally the syntax of the hypervisor parameters.
1421
1422     """
1423     for item, hv_name, hv_params in hvp_data:
1424       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1425              (item, hv_name))
1426       try:
1427         hv_class = hypervisor.GetHypervisor(hv_name)
1428         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1429         hv_class.CheckParameterSyntax(hv_params)
1430       except errors.GenericError, err:
1431         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1432
1433   def ExpandNames(self):
1434     # Information can be safely retrieved as the BGL is acquired in exclusive
1435     # mode
1436     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1437     self.all_node_info = self.cfg.GetAllNodesInfo()
1438     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1439     self.needed_locks = {}
1440
1441   def Exec(self, feedback_fn):
1442     """Verify integrity of cluster, performing various test on nodes.
1443
1444     """
1445     self.bad = False
1446     self._feedback_fn = feedback_fn
1447
1448     feedback_fn("* Verifying cluster config")
1449
1450     for msg in self.cfg.VerifyConfig():
1451       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1452
1453     feedback_fn("* Verifying cluster certificate files")
1454
1455     for cert_filename in constants.ALL_CERT_FILES:
1456       (errcode, msg) = _VerifyCertificate(cert_filename)
1457       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1458
1459     feedback_fn("* Verifying hypervisor parameters")
1460
1461     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1462                                                 self.all_inst_info.values()))
1463
1464     feedback_fn("* Verifying all nodes belong to an existing group")
1465
1466     # We do this verification here because, should this bogus circumstance
1467     # occur, it would never be caught by VerifyGroup, which only acts on
1468     # nodes/instances reachable from existing node groups.
1469
1470     dangling_nodes = set(node.name for node in self.all_node_info.values()
1471                          if node.group not in self.all_group_info)
1472
1473     dangling_instances = {}
1474     no_node_instances = []
1475
1476     for inst in self.all_inst_info.values():
1477       if inst.primary_node in dangling_nodes:
1478         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1479       elif inst.primary_node not in self.all_node_info:
1480         no_node_instances.append(inst.name)
1481
1482     pretty_dangling = [
1483         "%s (%s)" %
1484         (node.name,
1485          utils.CommaJoin(dangling_instances.get(node.name,
1486                                                 ["no instances"])))
1487         for node in dangling_nodes]
1488
1489     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1490                   "the following nodes (and their instances) belong to a non"
1491                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1492
1493     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1494                   "the following instances have a non-existing primary-node:"
1495                   " %s", utils.CommaJoin(no_node_instances))
1496
1497     return (not self.bad, [g.name for g in self.all_group_info.values()])
1498
1499
1500 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1501   """Verifies the status of a node group.
1502
1503   """
1504   HPATH = "cluster-verify"
1505   HTYPE = constants.HTYPE_CLUSTER
1506   REQ_BGL = False
1507
1508   _HOOKS_INDENT_RE = re.compile("^", re.M)
1509
1510   class NodeImage(object):
1511     """A class representing the logical and physical status of a node.
1512
1513     @type name: string
1514     @ivar name: the node name to which this object refers
1515     @ivar volumes: a structure as returned from
1516         L{ganeti.backend.GetVolumeList} (runtime)
1517     @ivar instances: a list of running instances (runtime)
1518     @ivar pinst: list of configured primary instances (config)
1519     @ivar sinst: list of configured secondary instances (config)
1520     @ivar sbp: dictionary of {primary-node: list of instances} for all
1521         instances for which this node is secondary (config)
1522     @ivar mfree: free memory, as reported by hypervisor (runtime)
1523     @ivar dfree: free disk, as reported by the node (runtime)
1524     @ivar offline: the offline status (config)
1525     @type rpc_fail: boolean
1526     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1527         not whether the individual keys were correct) (runtime)
1528     @type lvm_fail: boolean
1529     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1530     @type hyp_fail: boolean
1531     @ivar hyp_fail: whether the RPC call didn't return the instance list
1532     @type ghost: boolean
1533     @ivar ghost: whether this is a known node or not (config)
1534     @type os_fail: boolean
1535     @ivar os_fail: whether the RPC call didn't return valid OS data
1536     @type oslist: list
1537     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1538     @type vm_capable: boolean
1539     @ivar vm_capable: whether the node can host instances
1540
1541     """
1542     def __init__(self, offline=False, name=None, vm_capable=True):
1543       self.name = name
1544       self.volumes = {}
1545       self.instances = []
1546       self.pinst = []
1547       self.sinst = []
1548       self.sbp = {}
1549       self.mfree = 0
1550       self.dfree = 0
1551       self.offline = offline
1552       self.vm_capable = vm_capable
1553       self.rpc_fail = False
1554       self.lvm_fail = False
1555       self.hyp_fail = False
1556       self.ghost = False
1557       self.os_fail = False
1558       self.oslist = {}
1559
1560   def ExpandNames(self):
1561     # This raises errors.OpPrereqError on its own:
1562     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1563
1564     # Get instances in node group; this is unsafe and needs verification later
1565     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1566
1567     self.needed_locks = {
1568       locking.LEVEL_INSTANCE: inst_names,
1569       locking.LEVEL_NODEGROUP: [self.group_uuid],
1570       locking.LEVEL_NODE: [],
1571       }
1572
1573     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1574
1575   def DeclareLocks(self, level):
1576     if level == locking.LEVEL_NODE:
1577       # Get members of node group; this is unsafe and needs verification later
1578       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1579
1580       all_inst_info = self.cfg.GetAllInstancesInfo()
1581
1582       # In Exec(), we warn about mirrored instances that have primary and
1583       # secondary living in separate node groups. To fully verify that
1584       # volumes for these instances are healthy, we will need to do an
1585       # extra call to their secondaries. We ensure here those nodes will
1586       # be locked.
1587       for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1588         # Important: access only the instances whose lock is owned
1589         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1590           nodes.update(all_inst_info[inst].secondary_nodes)
1591
1592       self.needed_locks[locking.LEVEL_NODE] = nodes
1593
1594   def CheckPrereq(self):
1595     group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1596     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1597
1598     unlocked_nodes = \
1599         group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1600
1601     unlocked_instances = \
1602         group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1603
1604     if unlocked_nodes:
1605       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1606                                  utils.CommaJoin(unlocked_nodes))
1607
1608     if unlocked_instances:
1609       raise errors.OpPrereqError("Missing lock for instances: %s" %
1610                                  utils.CommaJoin(unlocked_instances))
1611
1612     self.all_node_info = self.cfg.GetAllNodesInfo()
1613     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1614
1615     self.my_node_names = utils.NiceSort(group_nodes)
1616     self.my_inst_names = utils.NiceSort(group_instances)
1617
1618     self.my_node_info = dict((name, self.all_node_info[name])
1619                              for name in self.my_node_names)
1620
1621     self.my_inst_info = dict((name, self.all_inst_info[name])
1622                              for name in self.my_inst_names)
1623
1624     # We detect here the nodes that will need the extra RPC calls for verifying
1625     # split LV volumes; they should be locked.
1626     extra_lv_nodes = set()
1627
1628     for inst in self.my_inst_info.values():
1629       if inst.disk_template in constants.DTS_INT_MIRROR:
1630         group = self.my_node_info[inst.primary_node].group
1631         for nname in inst.secondary_nodes:
1632           if self.all_node_info[nname].group != group:
1633             extra_lv_nodes.add(nname)
1634
1635     unlocked_lv_nodes = \
1636         extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1637
1638     if unlocked_lv_nodes:
1639       raise errors.OpPrereqError("these nodes could be locked: %s" %
1640                                  utils.CommaJoin(unlocked_lv_nodes))
1641     self.extra_lv_nodes = list(extra_lv_nodes)
1642
1643   def _VerifyNode(self, ninfo, nresult):
1644     """Perform some basic validation on data returned from a node.
1645
1646       - check the result data structure is well formed and has all the
1647         mandatory fields
1648       - check ganeti version
1649
1650     @type ninfo: L{objects.Node}
1651     @param ninfo: the node to check
1652     @param nresult: the results from the node
1653     @rtype: boolean
1654     @return: whether overall this call was successful (and we can expect
1655          reasonable values in the respose)
1656
1657     """
1658     node = ninfo.name
1659     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1660
1661     # main result, nresult should be a non-empty dict
1662     test = not nresult or not isinstance(nresult, dict)
1663     _ErrorIf(test, self.ENODERPC, node,
1664                   "unable to verify node: no data returned")
1665     if test:
1666       return False
1667
1668     # compares ganeti version
1669     local_version = constants.PROTOCOL_VERSION
1670     remote_version = nresult.get("version", None)
1671     test = not (remote_version and
1672                 isinstance(remote_version, (list, tuple)) and
1673                 len(remote_version) == 2)
1674     _ErrorIf(test, self.ENODERPC, node,
1675              "connection to node returned invalid data")
1676     if test:
1677       return False
1678
1679     test = local_version != remote_version[0]
1680     _ErrorIf(test, self.ENODEVERSION, node,
1681              "incompatible protocol versions: master %s,"
1682              " node %s", local_version, remote_version[0])
1683     if test:
1684       return False
1685
1686     # node seems compatible, we can actually try to look into its results
1687
1688     # full package version
1689     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1690                   self.ENODEVERSION, node,
1691                   "software version mismatch: master %s, node %s",
1692                   constants.RELEASE_VERSION, remote_version[1],
1693                   code=self.ETYPE_WARNING)
1694
1695     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1696     if ninfo.vm_capable and isinstance(hyp_result, dict):
1697       for hv_name, hv_result in hyp_result.iteritems():
1698         test = hv_result is not None
1699         _ErrorIf(test, self.ENODEHV, node,
1700                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1701
1702     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1703     if ninfo.vm_capable and isinstance(hvp_result, list):
1704       for item, hv_name, hv_result in hvp_result:
1705         _ErrorIf(True, self.ENODEHV, node,
1706                  "hypervisor %s parameter verify failure (source %s): %s",
1707                  hv_name, item, hv_result)
1708
1709     test = nresult.get(constants.NV_NODESETUP,
1710                        ["Missing NODESETUP results"])
1711     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1712              "; ".join(test))
1713
1714     return True
1715
1716   def _VerifyNodeTime(self, ninfo, nresult,
1717                       nvinfo_starttime, nvinfo_endtime):
1718     """Check the node time.
1719
1720     @type ninfo: L{objects.Node}
1721     @param ninfo: the node to check
1722     @param nresult: the remote results for the node
1723     @param nvinfo_starttime: the start time of the RPC call
1724     @param nvinfo_endtime: the end time of the RPC call
1725
1726     """
1727     node = ninfo.name
1728     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1729
1730     ntime = nresult.get(constants.NV_TIME, None)
1731     try:
1732       ntime_merged = utils.MergeTime(ntime)
1733     except (ValueError, TypeError):
1734       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1735       return
1736
1737     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1738       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1739     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1740       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1741     else:
1742       ntime_diff = None
1743
1744     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1745              "Node time diverges by at least %s from master node time",
1746              ntime_diff)
1747
1748   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1749     """Check the node LVM results.
1750
1751     @type ninfo: L{objects.Node}
1752     @param ninfo: the node to check
1753     @param nresult: the remote results for the node
1754     @param vg_name: the configured VG name
1755
1756     """
1757     if vg_name is None:
1758       return
1759
1760     node = ninfo.name
1761     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1762
1763     # checks vg existence and size > 20G
1764     vglist = nresult.get(constants.NV_VGLIST, None)
1765     test = not vglist
1766     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1767     if not test:
1768       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1769                                             constants.MIN_VG_SIZE)
1770       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1771
1772     # check pv names
1773     pvlist = nresult.get(constants.NV_PVLIST, None)
1774     test = pvlist is None
1775     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1776     if not test:
1777       # check that ':' is not present in PV names, since it's a
1778       # special character for lvcreate (denotes the range of PEs to
1779       # use on the PV)
1780       for _, pvname, owner_vg in pvlist:
1781         test = ":" in pvname
1782         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1783                  " '%s' of VG '%s'", pvname, owner_vg)
1784
1785   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1786     """Check the node bridges.
1787
1788     @type ninfo: L{objects.Node}
1789     @param ninfo: the node to check
1790     @param nresult: the remote results for the node
1791     @param bridges: the expected list of bridges
1792
1793     """
1794     if not bridges:
1795       return
1796
1797     node = ninfo.name
1798     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1799
1800     missing = nresult.get(constants.NV_BRIDGES, None)
1801     test = not isinstance(missing, list)
1802     _ErrorIf(test, self.ENODENET, node,
1803              "did not return valid bridge information")
1804     if not test:
1805       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1806                utils.CommaJoin(sorted(missing)))
1807
1808   def _VerifyNodeNetwork(self, ninfo, nresult):
1809     """Check the node network connectivity results.
1810
1811     @type ninfo: L{objects.Node}
1812     @param ninfo: the node to check
1813     @param nresult: the remote results for the node
1814
1815     """
1816     node = ninfo.name
1817     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1818
1819     test = constants.NV_NODELIST not in nresult
1820     _ErrorIf(test, self.ENODESSH, node,
1821              "node hasn't returned node ssh connectivity data")
1822     if not test:
1823       if nresult[constants.NV_NODELIST]:
1824         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1825           _ErrorIf(True, self.ENODESSH, node,
1826                    "ssh communication with node '%s': %s", a_node, a_msg)
1827
1828     test = constants.NV_NODENETTEST not in nresult
1829     _ErrorIf(test, self.ENODENET, node,
1830              "node hasn't returned node tcp connectivity data")
1831     if not test:
1832       if nresult[constants.NV_NODENETTEST]:
1833         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1834         for anode in nlist:
1835           _ErrorIf(True, self.ENODENET, node,
1836                    "tcp communication with node '%s': %s",
1837                    anode, nresult[constants.NV_NODENETTEST][anode])
1838
1839     test = constants.NV_MASTERIP not in nresult
1840     _ErrorIf(test, self.ENODENET, node,
1841              "node hasn't returned node master IP reachability data")
1842     if not test:
1843       if not nresult[constants.NV_MASTERIP]:
1844         if node == self.master_node:
1845           msg = "the master node cannot reach the master IP (not configured?)"
1846         else:
1847           msg = "cannot reach the master IP"
1848         _ErrorIf(True, self.ENODENET, node, msg)
1849
1850   def _VerifyInstance(self, instance, instanceconfig, node_image,
1851                       diskstatus):
1852     """Verify an instance.
1853
1854     This function checks to see if the required block devices are
1855     available on the instance's node.
1856
1857     """
1858     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1859     node_current = instanceconfig.primary_node
1860
1861     node_vol_should = {}
1862     instanceconfig.MapLVsByNode(node_vol_should)
1863
1864     for node in node_vol_should:
1865       n_img = node_image[node]
1866       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1867         # ignore missing volumes on offline or broken nodes
1868         continue
1869       for volume in node_vol_should[node]:
1870         test = volume not in n_img.volumes
1871         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1872                  "volume %s missing on node %s", volume, node)
1873
1874     if instanceconfig.admin_up:
1875       pri_img = node_image[node_current]
1876       test = instance not in pri_img.instances and not pri_img.offline
1877       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1878                "instance not running on its primary node %s",
1879                node_current)
1880
1881     diskdata = [(nname, success, status, idx)
1882                 for (nname, disks) in diskstatus.items()
1883                 for idx, (success, status) in enumerate(disks)]
1884
1885     for nname, success, bdev_status, idx in diskdata:
1886       # the 'ghost node' construction in Exec() ensures that we have a
1887       # node here
1888       snode = node_image[nname]
1889       bad_snode = snode.ghost or snode.offline
1890       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1891                self.EINSTANCEFAULTYDISK, instance,
1892                "couldn't retrieve status for disk/%s on %s: %s",
1893                idx, nname, bdev_status)
1894       _ErrorIf((instanceconfig.admin_up and success and
1895                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1896                self.EINSTANCEFAULTYDISK, instance,
1897                "disk/%s on %s is faulty", idx, nname)
1898
1899   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1900     """Verify if there are any unknown volumes in the cluster.
1901
1902     The .os, .swap and backup volumes are ignored. All other volumes are
1903     reported as unknown.
1904
1905     @type reserved: L{ganeti.utils.FieldSet}
1906     @param reserved: a FieldSet of reserved volume names
1907
1908     """
1909     for node, n_img in node_image.items():
1910       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1911         # skip non-healthy nodes
1912         continue
1913       for volume in n_img.volumes:
1914         test = ((node not in node_vol_should or
1915                 volume not in node_vol_should[node]) and
1916                 not reserved.Matches(volume))
1917         self._ErrorIf(test, self.ENODEORPHANLV, node,
1918                       "volume %s is unknown", volume)
1919
1920   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1921     """Verify N+1 Memory Resilience.
1922
1923     Check that if one single node dies we can still start all the
1924     instances it was primary for.
1925
1926     """
1927     cluster_info = self.cfg.GetClusterInfo()
1928     for node, n_img in node_image.items():
1929       # This code checks that every node which is now listed as
1930       # secondary has enough memory to host all instances it is
1931       # supposed to should a single other node in the cluster fail.
1932       # FIXME: not ready for failover to an arbitrary node
1933       # FIXME: does not support file-backed instances
1934       # WARNING: we currently take into account down instances as well
1935       # as up ones, considering that even if they're down someone
1936       # might want to start them even in the event of a node failure.
1937       if n_img.offline:
1938         # we're skipping offline nodes from the N+1 warning, since
1939         # most likely we don't have good memory infromation from them;
1940         # we already list instances living on such nodes, and that's
1941         # enough warning
1942         continue
1943       for prinode, instances in n_img.sbp.items():
1944         needed_mem = 0
1945         for instance in instances:
1946           bep = cluster_info.FillBE(instance_cfg[instance])
1947           if bep[constants.BE_AUTO_BALANCE]:
1948             needed_mem += bep[constants.BE_MEMORY]
1949         test = n_img.mfree < needed_mem
1950         self._ErrorIf(test, self.ENODEN1, node,
1951                       "not enough memory to accomodate instance failovers"
1952                       " should node %s fail (%dMiB needed, %dMiB available)",
1953                       prinode, needed_mem, n_img.mfree)
1954
1955   @classmethod
1956   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1957                    (files_all, files_all_opt, files_mc, files_vm)):
1958     """Verifies file checksums collected from all nodes.
1959
1960     @param errorif: Callback for reporting errors
1961     @param nodeinfo: List of L{objects.Node} objects
1962     @param master_node: Name of master node
1963     @param all_nvinfo: RPC results
1964
1965     """
1966     node_names = frozenset(node.name for node in nodeinfo)
1967
1968     assert master_node in node_names
1969     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1970             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1971            "Found file listed in more than one file list"
1972
1973     # Define functions determining which nodes to consider for a file
1974     file2nodefn = dict([(filename, fn)
1975       for (files, fn) in [(files_all, None),
1976                           (files_all_opt, None),
1977                           (files_mc, lambda node: (node.master_candidate or
1978                                                    node.name == master_node)),
1979                           (files_vm, lambda node: node.vm_capable)]
1980       for filename in files])
1981
1982     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1983
1984     for node in nodeinfo:
1985       nresult = all_nvinfo[node.name]
1986
1987       if nresult.fail_msg or not nresult.payload:
1988         node_files = None
1989       else:
1990         node_files = nresult.payload.get(constants.NV_FILELIST, None)
1991
1992       test = not (node_files and isinstance(node_files, dict))
1993       errorif(test, cls.ENODEFILECHECK, node.name,
1994               "Node did not return file checksum data")
1995       if test:
1996         continue
1997
1998       for (filename, checksum) in node_files.items():
1999         # Check if the file should be considered for a node
2000         fn = file2nodefn[filename]
2001         if fn is None or fn(node):
2002           fileinfo[filename].setdefault(checksum, set()).add(node.name)
2003
2004     for (filename, checksums) in fileinfo.items():
2005       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2006
2007       # Nodes having the file
2008       with_file = frozenset(node_name
2009                             for nodes in fileinfo[filename].values()
2010                             for node_name in nodes)
2011
2012       # Nodes missing file
2013       missing_file = node_names - with_file
2014
2015       if filename in files_all_opt:
2016         # All or no nodes
2017         errorif(missing_file and missing_file != node_names,
2018                 cls.ECLUSTERFILECHECK, None,
2019                 "File %s is optional, but it must exist on all or no nodes (not"
2020                 " found on %s)",
2021                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2022       else:
2023         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2024                 "File %s is missing from node(s) %s", filename,
2025                 utils.CommaJoin(utils.NiceSort(missing_file)))
2026
2027       # See if there are multiple versions of the file
2028       test = len(checksums) > 1
2029       if test:
2030         variants = ["variant %s on %s" %
2031                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2032                     for (idx, (checksum, nodes)) in
2033                       enumerate(sorted(checksums.items()))]
2034       else:
2035         variants = []
2036
2037       errorif(test, cls.ECLUSTERFILECHECK, None,
2038               "File %s found with %s different checksums (%s)",
2039               filename, len(checksums), "; ".join(variants))
2040
2041   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2042                       drbd_map):
2043     """Verifies and the node DRBD status.
2044
2045     @type ninfo: L{objects.Node}
2046     @param ninfo: the node to check
2047     @param nresult: the remote results for the node
2048     @param instanceinfo: the dict of instances
2049     @param drbd_helper: the configured DRBD usermode helper
2050     @param drbd_map: the DRBD map as returned by
2051         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2052
2053     """
2054     node = ninfo.name
2055     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2056
2057     if drbd_helper:
2058       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2059       test = (helper_result == None)
2060       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2061                "no drbd usermode helper returned")
2062       if helper_result:
2063         status, payload = helper_result
2064         test = not status
2065         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2066                  "drbd usermode helper check unsuccessful: %s", payload)
2067         test = status and (payload != drbd_helper)
2068         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2069                  "wrong drbd usermode helper: %s", payload)
2070
2071     # compute the DRBD minors
2072     node_drbd = {}
2073     for minor, instance in drbd_map[node].items():
2074       test = instance not in instanceinfo
2075       _ErrorIf(test, self.ECLUSTERCFG, None,
2076                "ghost instance '%s' in temporary DRBD map", instance)
2077         # ghost instance should not be running, but otherwise we
2078         # don't give double warnings (both ghost instance and
2079         # unallocated minor in use)
2080       if test:
2081         node_drbd[minor] = (instance, False)
2082       else:
2083         instance = instanceinfo[instance]
2084         node_drbd[minor] = (instance.name, instance.admin_up)
2085
2086     # and now check them
2087     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2088     test = not isinstance(used_minors, (tuple, list))
2089     _ErrorIf(test, self.ENODEDRBD, node,
2090              "cannot parse drbd status file: %s", str(used_minors))
2091     if test:
2092       # we cannot check drbd status
2093       return
2094
2095     for minor, (iname, must_exist) in node_drbd.items():
2096       test = minor not in used_minors and must_exist
2097       _ErrorIf(test, self.ENODEDRBD, node,
2098                "drbd minor %d of instance %s is not active", minor, iname)
2099     for minor in used_minors:
2100       test = minor not in node_drbd
2101       _ErrorIf(test, self.ENODEDRBD, node,
2102                "unallocated drbd minor %d is in use", minor)
2103
2104   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2105     """Builds the node OS structures.
2106
2107     @type ninfo: L{objects.Node}
2108     @param ninfo: the node to check
2109     @param nresult: the remote results for the node
2110     @param nimg: the node image object
2111
2112     """
2113     node = ninfo.name
2114     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2115
2116     remote_os = nresult.get(constants.NV_OSLIST, None)
2117     test = (not isinstance(remote_os, list) or
2118             not compat.all(isinstance(v, list) and len(v) == 7
2119                            for v in remote_os))
2120
2121     _ErrorIf(test, self.ENODEOS, node,
2122              "node hasn't returned valid OS data")
2123
2124     nimg.os_fail = test
2125
2126     if test:
2127       return
2128
2129     os_dict = {}
2130
2131     for (name, os_path, status, diagnose,
2132          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2133
2134       if name not in os_dict:
2135         os_dict[name] = []
2136
2137       # parameters is a list of lists instead of list of tuples due to
2138       # JSON lacking a real tuple type, fix it:
2139       parameters = [tuple(v) for v in parameters]
2140       os_dict[name].append((os_path, status, diagnose,
2141                             set(variants), set(parameters), set(api_ver)))
2142
2143     nimg.oslist = os_dict
2144
2145   def _VerifyNodeOS(self, ninfo, nimg, base):
2146     """Verifies the node OS list.
2147
2148     @type ninfo: L{objects.Node}
2149     @param ninfo: the node to check
2150     @param nimg: the node image object
2151     @param base: the 'template' node we match against (e.g. from the master)
2152
2153     """
2154     node = ninfo.name
2155     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2156
2157     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2158
2159     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2160     for os_name, os_data in nimg.oslist.items():
2161       assert os_data, "Empty OS status for OS %s?!" % os_name
2162       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2163       _ErrorIf(not f_status, self.ENODEOS, node,
2164                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2165       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2166                "OS '%s' has multiple entries (first one shadows the rest): %s",
2167                os_name, utils.CommaJoin([v[0] for v in os_data]))
2168       # this will catched in backend too
2169       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2170                and not f_var, self.ENODEOS, node,
2171                "OS %s with API at least %d does not declare any variant",
2172                os_name, constants.OS_API_V15)
2173       # comparisons with the 'base' image
2174       test = os_name not in base.oslist
2175       _ErrorIf(test, self.ENODEOS, node,
2176                "Extra OS %s not present on reference node (%s)",
2177                os_name, base.name)
2178       if test:
2179         continue
2180       assert base.oslist[os_name], "Base node has empty OS status?"
2181       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2182       if not b_status:
2183         # base OS is invalid, skipping
2184         continue
2185       for kind, a, b in [("API version", f_api, b_api),
2186                          ("variants list", f_var, b_var),
2187                          ("parameters", beautify_params(f_param),
2188                           beautify_params(b_param))]:
2189         _ErrorIf(a != b, self.ENODEOS, node,
2190                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2191                  kind, os_name, base.name,
2192                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2193
2194     # check any missing OSes
2195     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2196     _ErrorIf(missing, self.ENODEOS, node,
2197              "OSes present on reference node %s but missing on this node: %s",
2198              base.name, utils.CommaJoin(missing))
2199
2200   def _VerifyOob(self, ninfo, nresult):
2201     """Verifies out of band functionality of a node.
2202
2203     @type ninfo: L{objects.Node}
2204     @param ninfo: the node to check
2205     @param nresult: the remote results for the node
2206
2207     """
2208     node = ninfo.name
2209     # We just have to verify the paths on master and/or master candidates
2210     # as the oob helper is invoked on the master
2211     if ((ninfo.master_candidate or ninfo.master_capable) and
2212         constants.NV_OOB_PATHS in nresult):
2213       for path_result in nresult[constants.NV_OOB_PATHS]:
2214         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2215
2216   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2217     """Verifies and updates the node volume data.
2218
2219     This function will update a L{NodeImage}'s internal structures
2220     with data from the remote call.
2221
2222     @type ninfo: L{objects.Node}
2223     @param ninfo: the node to check
2224     @param nresult: the remote results for the node
2225     @param nimg: the node image object
2226     @param vg_name: the configured VG name
2227
2228     """
2229     node = ninfo.name
2230     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2231
2232     nimg.lvm_fail = True
2233     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2234     if vg_name is None:
2235       pass
2236     elif isinstance(lvdata, basestring):
2237       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2238                utils.SafeEncode(lvdata))
2239     elif not isinstance(lvdata, dict):
2240       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2241     else:
2242       nimg.volumes = lvdata
2243       nimg.lvm_fail = False
2244
2245   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2246     """Verifies and updates the node instance list.
2247
2248     If the listing was successful, then updates this node's instance
2249     list. Otherwise, it marks the RPC call as failed for the instance
2250     list key.
2251
2252     @type ninfo: L{objects.Node}
2253     @param ninfo: the node to check
2254     @param nresult: the remote results for the node
2255     @param nimg: the node image object
2256
2257     """
2258     idata = nresult.get(constants.NV_INSTANCELIST, None)
2259     test = not isinstance(idata, list)
2260     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2261                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2262     if test:
2263       nimg.hyp_fail = True
2264     else:
2265       nimg.instances = idata
2266
2267   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2268     """Verifies and computes a node information map
2269
2270     @type ninfo: L{objects.Node}
2271     @param ninfo: the node to check
2272     @param nresult: the remote results for the node
2273     @param nimg: the node image object
2274     @param vg_name: the configured VG name
2275
2276     """
2277     node = ninfo.name
2278     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2279
2280     # try to read free memory (from the hypervisor)
2281     hv_info = nresult.get(constants.NV_HVINFO, None)
2282     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2283     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2284     if not test:
2285       try:
2286         nimg.mfree = int(hv_info["memory_free"])
2287       except (ValueError, TypeError):
2288         _ErrorIf(True, self.ENODERPC, node,
2289                  "node returned invalid nodeinfo, check hypervisor")
2290
2291     # FIXME: devise a free space model for file based instances as well
2292     if vg_name is not None:
2293       test = (constants.NV_VGLIST not in nresult or
2294               vg_name not in nresult[constants.NV_VGLIST])
2295       _ErrorIf(test, self.ENODELVM, node,
2296                "node didn't return data for the volume group '%s'"
2297                " - it is either missing or broken", vg_name)
2298       if not test:
2299         try:
2300           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2301         except (ValueError, TypeError):
2302           _ErrorIf(True, self.ENODERPC, node,
2303                    "node returned invalid LVM info, check LVM status")
2304
2305   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2306     """Gets per-disk status information for all instances.
2307
2308     @type nodelist: list of strings
2309     @param nodelist: Node names
2310     @type node_image: dict of (name, L{objects.Node})
2311     @param node_image: Node objects
2312     @type instanceinfo: dict of (name, L{objects.Instance})
2313     @param instanceinfo: Instance objects
2314     @rtype: {instance: {node: [(succes, payload)]}}
2315     @return: a dictionary of per-instance dictionaries with nodes as
2316         keys and disk information as values; the disk information is a
2317         list of tuples (success, payload)
2318
2319     """
2320     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2321
2322     node_disks = {}
2323     node_disks_devonly = {}
2324     diskless_instances = set()
2325     diskless = constants.DT_DISKLESS
2326
2327     for nname in nodelist:
2328       node_instances = list(itertools.chain(node_image[nname].pinst,
2329                                             node_image[nname].sinst))
2330       diskless_instances.update(inst for inst in node_instances
2331                                 if instanceinfo[inst].disk_template == diskless)
2332       disks = [(inst, disk)
2333                for inst in node_instances
2334                for disk in instanceinfo[inst].disks]
2335
2336       if not disks:
2337         # No need to collect data
2338         continue
2339
2340       node_disks[nname] = disks
2341
2342       # Creating copies as SetDiskID below will modify the objects and that can
2343       # lead to incorrect data returned from nodes
2344       devonly = [dev.Copy() for (_, dev) in disks]
2345
2346       for dev in devonly:
2347         self.cfg.SetDiskID(dev, nname)
2348
2349       node_disks_devonly[nname] = devonly
2350
2351     assert len(node_disks) == len(node_disks_devonly)
2352
2353     # Collect data from all nodes with disks
2354     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2355                                                           node_disks_devonly)
2356
2357     assert len(result) == len(node_disks)
2358
2359     instdisk = {}
2360
2361     for (nname, nres) in result.items():
2362       disks = node_disks[nname]
2363
2364       if nres.offline:
2365         # No data from this node
2366         data = len(disks) * [(False, "node offline")]
2367       else:
2368         msg = nres.fail_msg
2369         _ErrorIf(msg, self.ENODERPC, nname,
2370                  "while getting disk information: %s", msg)
2371         if msg:
2372           # No data from this node
2373           data = len(disks) * [(False, msg)]
2374         else:
2375           data = []
2376           for idx, i in enumerate(nres.payload):
2377             if isinstance(i, (tuple, list)) and len(i) == 2:
2378               data.append(i)
2379             else:
2380               logging.warning("Invalid result from node %s, entry %d: %s",
2381                               nname, idx, i)
2382               data.append((False, "Invalid result from the remote node"))
2383
2384       for ((inst, _), status) in zip(disks, data):
2385         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2386
2387     # Add empty entries for diskless instances.
2388     for inst in diskless_instances:
2389       assert inst not in instdisk
2390       instdisk[inst] = {}
2391
2392     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2393                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2394                       compat.all(isinstance(s, (tuple, list)) and
2395                                  len(s) == 2 for s in statuses)
2396                       for inst, nnames in instdisk.items()
2397                       for nname, statuses in nnames.items())
2398     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2399
2400     return instdisk
2401
2402   def BuildHooksEnv(self):
2403     """Build hooks env.
2404
2405     Cluster-Verify hooks just ran in the post phase and their failure makes
2406     the output be logged in the verify output and the verification to fail.
2407
2408     """
2409     env = {
2410       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2411       }
2412
2413     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2414                for node in self.my_node_info.values())
2415
2416     return env
2417
2418   def BuildHooksNodes(self):
2419     """Build hooks nodes.
2420
2421     """
2422     assert self.my_node_names, ("Node list not gathered,"
2423       " has CheckPrereq been executed?")
2424     return ([], self.my_node_names)
2425
2426   def Exec(self, feedback_fn):
2427     """Verify integrity of the node group, performing various test on nodes.
2428
2429     """
2430     # This method has too many local variables. pylint: disable-msg=R0914
2431     self.bad = False
2432     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2433     verbose = self.op.verbose
2434     self._feedback_fn = feedback_fn
2435
2436     vg_name = self.cfg.GetVGName()
2437     drbd_helper = self.cfg.GetDRBDHelper()
2438     cluster = self.cfg.GetClusterInfo()
2439     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2440     hypervisors = cluster.enabled_hypervisors
2441     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2442
2443     i_non_redundant = [] # Non redundant instances
2444     i_non_a_balanced = [] # Non auto-balanced instances
2445     n_offline = 0 # Count of offline nodes
2446     n_drained = 0 # Count of nodes being drained
2447     node_vol_should = {}
2448
2449     # FIXME: verify OS list
2450
2451     # File verification
2452     filemap = _ComputeAncillaryFiles(cluster, False)
2453
2454     # do local checksums
2455     master_node = self.master_node = self.cfg.GetMasterNode()
2456     master_ip = self.cfg.GetMasterIP()
2457
2458     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2459
2460     # We will make nodes contact all nodes in their group, and one node from
2461     # every other group.
2462     # TODO: should it be a *random* node, different every time?
2463     online_nodes = [node.name for node in node_data_list if not node.offline]
2464     other_group_nodes = {}
2465
2466     for name in sorted(self.all_node_info):
2467       node = self.all_node_info[name]
2468       if (node.group not in other_group_nodes
2469           and node.group != self.group_uuid
2470           and not node.offline):
2471         other_group_nodes[node.group] = node.name
2472
2473     node_verify_param = {
2474       constants.NV_FILELIST:
2475         utils.UniqueSequence(filename
2476                              for files in filemap
2477                              for filename in files),
2478       constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2479       constants.NV_HYPERVISOR: hypervisors,
2480       constants.NV_HVPARAMS:
2481         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2482       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2483                                  for node in node_data_list
2484                                  if not node.offline],
2485       constants.NV_INSTANCELIST: hypervisors,
2486       constants.NV_VERSION: None,
2487       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2488       constants.NV_NODESETUP: None,
2489       constants.NV_TIME: None,
2490       constants.NV_MASTERIP: (master_node, master_ip),
2491       constants.NV_OSLIST: None,
2492       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2493       }
2494
2495     if vg_name is not None:
2496       node_verify_param[constants.NV_VGLIST] = None
2497       node_verify_param[constants.NV_LVLIST] = vg_name
2498       node_verify_param[constants.NV_PVLIST] = [vg_name]
2499       node_verify_param[constants.NV_DRBDLIST] = None
2500
2501     if drbd_helper:
2502       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2503
2504     # bridge checks
2505     # FIXME: this needs to be changed per node-group, not cluster-wide
2506     bridges = set()
2507     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2508     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2509       bridges.add(default_nicpp[constants.NIC_LINK])
2510     for instance in self.my_inst_info.values():
2511       for nic in instance.nics:
2512         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2513         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2514           bridges.add(full_nic[constants.NIC_LINK])
2515
2516     if bridges:
2517       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2518
2519     # Build our expected cluster state
2520     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2521                                                  name=node.name,
2522                                                  vm_capable=node.vm_capable))
2523                       for node in node_data_list)
2524
2525     # Gather OOB paths
2526     oob_paths = []
2527     for node in self.all_node_info.values():
2528       path = _SupportsOob(self.cfg, node)
2529       if path and path not in oob_paths:
2530         oob_paths.append(path)
2531
2532     if oob_paths:
2533       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2534
2535     for instance in self.my_inst_names:
2536       inst_config = self.my_inst_info[instance]
2537
2538       for nname in inst_config.all_nodes:
2539         if nname not in node_image:
2540           gnode = self.NodeImage(name=nname)
2541           gnode.ghost = (nname not in self.all_node_info)
2542           node_image[nname] = gnode
2543
2544       inst_config.MapLVsByNode(node_vol_should)
2545
2546       pnode = inst_config.primary_node
2547       node_image[pnode].pinst.append(instance)
2548
2549       for snode in inst_config.secondary_nodes:
2550         nimg = node_image[snode]
2551         nimg.sinst.append(instance)
2552         if pnode not in nimg.sbp:
2553           nimg.sbp[pnode] = []
2554         nimg.sbp[pnode].append(instance)
2555
2556     # At this point, we have the in-memory data structures complete,
2557     # except for the runtime information, which we'll gather next
2558
2559     # Due to the way our RPC system works, exact response times cannot be
2560     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2561     # time before and after executing the request, we can at least have a time
2562     # window.
2563     nvinfo_starttime = time.time()
2564     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2565                                            node_verify_param,
2566                                            self.cfg.GetClusterName())
2567     nvinfo_endtime = time.time()
2568
2569     if self.extra_lv_nodes and vg_name is not None:
2570       extra_lv_nvinfo = \
2571           self.rpc.call_node_verify(self.extra_lv_nodes,
2572                                     {constants.NV_LVLIST: vg_name},
2573                                     self.cfg.GetClusterName())
2574     else:
2575       extra_lv_nvinfo = {}
2576
2577     all_drbd_map = self.cfg.ComputeDRBDMap()
2578
2579     feedback_fn("* Gathering disk information (%s nodes)" %
2580                 len(self.my_node_names))
2581     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2582                                      self.my_inst_info)
2583
2584     feedback_fn("* Verifying configuration file consistency")
2585
2586     # If not all nodes are being checked, we need to make sure the master node
2587     # and a non-checked vm_capable node are in the list.
2588     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2589     if absent_nodes:
2590       vf_nvinfo = all_nvinfo.copy()
2591       vf_node_info = list(self.my_node_info.values())
2592       additional_nodes = []
2593       if master_node not in self.my_node_info:
2594         additional_nodes.append(master_node)
2595         vf_node_info.append(self.all_node_info[master_node])
2596       # Add the first vm_capable node we find which is not included
2597       for node in absent_nodes:
2598         nodeinfo = self.all_node_info[node]
2599         if nodeinfo.vm_capable and not nodeinfo.offline:
2600           additional_nodes.append(node)
2601           vf_node_info.append(self.all_node_info[node])
2602           break
2603       key = constants.NV_FILELIST
2604       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2605                                                  {key: node_verify_param[key]},
2606                                                  self.cfg.GetClusterName()))
2607     else:
2608       vf_nvinfo = all_nvinfo
2609       vf_node_info = self.my_node_info.values()
2610
2611     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2612
2613     feedback_fn("* Verifying node status")
2614
2615     refos_img = None
2616
2617     for node_i in node_data_list:
2618       node = node_i.name
2619       nimg = node_image[node]
2620
2621       if node_i.offline:
2622         if verbose:
2623           feedback_fn("* Skipping offline node %s" % (node,))
2624         n_offline += 1
2625         continue
2626
2627       if node == master_node:
2628         ntype = "master"
2629       elif node_i.master_candidate:
2630         ntype = "master candidate"
2631       elif node_i.drained:
2632         ntype = "drained"
2633         n_drained += 1
2634       else:
2635         ntype = "regular"
2636       if verbose:
2637         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2638
2639       msg = all_nvinfo[node].fail_msg
2640       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2641       if msg:
2642         nimg.rpc_fail = True
2643         continue
2644
2645       nresult = all_nvinfo[node].payload
2646
2647       nimg.call_ok = self._VerifyNode(node_i, nresult)
2648       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2649       self._VerifyNodeNetwork(node_i, nresult)
2650       self._VerifyOob(node_i, nresult)
2651
2652       if nimg.vm_capable:
2653         self._VerifyNodeLVM(node_i, nresult, vg_name)
2654         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2655                              all_drbd_map)
2656
2657         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2658         self._UpdateNodeInstances(node_i, nresult, nimg)
2659         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2660         self._UpdateNodeOS(node_i, nresult, nimg)
2661
2662         if not nimg.os_fail:
2663           if refos_img is None:
2664             refos_img = nimg
2665           self._VerifyNodeOS(node_i, nimg, refos_img)
2666         self._VerifyNodeBridges(node_i, nresult, bridges)
2667
2668         # Check whether all running instancies are primary for the node. (This
2669         # can no longer be done from _VerifyInstance below, since some of the
2670         # wrong instances could be from other node groups.)
2671         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2672
2673         for inst in non_primary_inst:
2674           test = inst in self.all_inst_info
2675           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2676                    "instance should not run on node %s", node_i.name)
2677           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2678                    "node is running unknown instance %s", inst)
2679
2680     for node, result in extra_lv_nvinfo.items():
2681       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2682                               node_image[node], vg_name)
2683
2684     feedback_fn("* Verifying instance status")
2685     for instance in self.my_inst_names:
2686       if verbose:
2687         feedback_fn("* Verifying instance %s" % instance)
2688       inst_config = self.my_inst_info[instance]
2689       self._VerifyInstance(instance, inst_config, node_image,
2690                            instdisk[instance])
2691       inst_nodes_offline = []
2692
2693       pnode = inst_config.primary_node
2694       pnode_img = node_image[pnode]
2695       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2696                self.ENODERPC, pnode, "instance %s, connection to"
2697                " primary node failed", instance)
2698
2699       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2700                self.EINSTANCEBADNODE, instance,
2701                "instance is marked as running and lives on offline node %s",
2702                inst_config.primary_node)
2703
2704       # If the instance is non-redundant we cannot survive losing its primary
2705       # node, so we are not N+1 compliant. On the other hand we have no disk
2706       # templates with more than one secondary so that situation is not well
2707       # supported either.
2708       # FIXME: does not support file-backed instances
2709       if not inst_config.secondary_nodes:
2710         i_non_redundant.append(instance)
2711
2712       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2713                instance, "instance has multiple secondary nodes: %s",
2714                utils.CommaJoin(inst_config.secondary_nodes),
2715                code=self.ETYPE_WARNING)
2716
2717       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2718         pnode = inst_config.primary_node
2719         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2720         instance_groups = {}
2721
2722         for node in instance_nodes:
2723           instance_groups.setdefault(self.all_node_info[node].group,
2724                                      []).append(node)
2725
2726         pretty_list = [
2727           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2728           # Sort so that we always list the primary node first.
2729           for group, nodes in sorted(instance_groups.items(),
2730                                      key=lambda (_, nodes): pnode in nodes,
2731                                      reverse=True)]
2732
2733         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2734                       instance, "instance has primary and secondary nodes in"
2735                       " different groups: %s", utils.CommaJoin(pretty_list),
2736                       code=self.ETYPE_WARNING)
2737
2738       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2739         i_non_a_balanced.append(instance)
2740
2741       for snode in inst_config.secondary_nodes:
2742         s_img = node_image[snode]
2743         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2744                  "instance %s, connection to secondary node failed", instance)
2745
2746         if s_img.offline:
2747           inst_nodes_offline.append(snode)
2748
2749       # warn that the instance lives on offline nodes
2750       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2751                "instance has offline secondary node(s) %s",
2752                utils.CommaJoin(inst_nodes_offline))
2753       # ... or ghost/non-vm_capable nodes
2754       for node in inst_config.all_nodes:
2755         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2756                  "instance lives on ghost node %s", node)
2757         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2758                  instance, "instance lives on non-vm_capable node %s", node)
2759
2760     feedback_fn("* Verifying orphan volumes")
2761     reserved = utils.FieldSet(*cluster.reserved_lvs)
2762
2763     # We will get spurious "unknown volume" warnings if any node of this group
2764     # is secondary for an instance whose primary is in another group. To avoid
2765     # them, we find these instances and add their volumes to node_vol_should.
2766     for inst in self.all_inst_info.values():
2767       for secondary in inst.secondary_nodes:
2768         if (secondary in self.my_node_info
2769             and inst.name not in self.my_inst_info):
2770           inst.MapLVsByNode(node_vol_should)
2771           break
2772
2773     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2774
2775     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2776       feedback_fn("* Verifying N+1 Memory redundancy")
2777       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2778
2779     feedback_fn("* Other Notes")
2780     if i_non_redundant:
2781       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2782                   % len(i_non_redundant))
2783
2784     if i_non_a_balanced:
2785       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2786                   % len(i_non_a_balanced))
2787
2788     if n_offline:
2789       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2790
2791     if n_drained:
2792       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2793
2794     return not self.bad
2795
2796   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2797     """Analyze the post-hooks' result
2798
2799     This method analyses the hook result, handles it, and sends some
2800     nicely-formatted feedback back to the user.
2801
2802     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2803         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2804     @param hooks_results: the results of the multi-node hooks rpc call
2805     @param feedback_fn: function used send feedback back to the caller
2806     @param lu_result: previous Exec result
2807     @return: the new Exec result, based on the previous result
2808         and hook results
2809
2810     """
2811     # We only really run POST phase hooks, and are only interested in
2812     # their results
2813     if phase == constants.HOOKS_PHASE_POST:
2814       # Used to change hooks' output to proper indentation
2815       feedback_fn("* Hooks Results")
2816       assert hooks_results, "invalid result from hooks"
2817
2818       for node_name in hooks_results:
2819         res = hooks_results[node_name]
2820         msg = res.fail_msg
2821         test = msg and not res.offline
2822         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2823                       "Communication failure in hooks execution: %s", msg)
2824         if res.offline or msg:
2825           # No need to investigate payload if node is offline or gave an error.
2826           # override manually lu_result here as _ErrorIf only
2827           # overrides self.bad
2828           lu_result = 1
2829           continue
2830         for script, hkr, output in res.payload:
2831           test = hkr == constants.HKR_FAIL
2832           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2833                         "Script %s failed, output:", script)
2834           if test:
2835             output = self._HOOKS_INDENT_RE.sub('      ', output)
2836             feedback_fn("%s" % output)
2837             lu_result = 0
2838
2839       return lu_result
2840
2841
2842 class LUClusterVerifyDisks(NoHooksLU):
2843   """Verifies the cluster disks status.
2844
2845   """
2846   REQ_BGL = False
2847
2848   def ExpandNames(self):
2849     self.needed_locks = {
2850       locking.LEVEL_NODE: locking.ALL_SET,
2851       locking.LEVEL_INSTANCE: locking.ALL_SET,
2852     }
2853     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2854
2855   def Exec(self, feedback_fn):
2856     """Verify integrity of cluster disks.
2857
2858     @rtype: tuple of three items
2859     @return: a tuple of (dict of node-to-node_error, list of instances
2860         which need activate-disks, dict of instance: (node, volume) for
2861         missing volumes
2862
2863     """
2864     result = res_nodes, res_instances, res_missing = {}, [], {}
2865
2866     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2867     instances = self.cfg.GetAllInstancesInfo().values()
2868
2869     nv_dict = {}
2870     for inst in instances:
2871       inst_lvs = {}
2872       if not inst.admin_up:
2873         continue
2874       inst.MapLVsByNode(inst_lvs)
2875       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2876       for node, vol_list in inst_lvs.iteritems():
2877         for vol in vol_list:
2878           nv_dict[(node, vol)] = inst
2879
2880     if not nv_dict:
2881       return result
2882
2883     node_lvs = self.rpc.call_lv_list(nodes, [])
2884     for node, node_res in node_lvs.items():
2885       if node_res.offline:
2886         continue
2887       msg = node_res.fail_msg
2888       if msg:
2889         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2890         res_nodes[node] = msg
2891         continue
2892
2893       lvs = node_res.payload
2894       for lv_name, (_, _, lv_online) in lvs.items():
2895         inst = nv_dict.pop((node, lv_name), None)
2896         if (not lv_online and inst is not None
2897             and inst.name not in res_instances):
2898           res_instances.append(inst.name)
2899
2900     # any leftover items in nv_dict are missing LVs, let's arrange the
2901     # data better
2902     for key, inst in nv_dict.iteritems():
2903       if inst.name not in res_missing:
2904         res_missing[inst.name] = []
2905       res_missing[inst.name].append(key)
2906
2907     return result
2908
2909
2910 class LUClusterRepairDiskSizes(NoHooksLU):
2911   """Verifies the cluster disks sizes.
2912
2913   """
2914   REQ_BGL = False
2915
2916   def ExpandNames(self):
2917     if self.op.instances:
2918       self.wanted_names = _GetWantedInstances(self, self.op.instances)
2919       self.needed_locks = {
2920         locking.LEVEL_NODE: [],
2921         locking.LEVEL_INSTANCE: self.wanted_names,
2922         }
2923       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2924     else:
2925       self.wanted_names = None
2926       self.needed_locks = {
2927         locking.LEVEL_NODE: locking.ALL_SET,
2928         locking.LEVEL_INSTANCE: locking.ALL_SET,
2929         }
2930     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2931
2932   def DeclareLocks(self, level):
2933     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2934       self._LockInstancesNodes(primary_only=True)
2935
2936   def CheckPrereq(self):
2937     """Check prerequisites.
2938
2939     This only checks the optional instance list against the existing names.
2940
2941     """
2942     if self.wanted_names is None:
2943       self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2944
2945     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2946                              in self.wanted_names]
2947
2948   def _EnsureChildSizes(self, disk):
2949     """Ensure children of the disk have the needed disk size.
2950
2951     This is valid mainly for DRBD8 and fixes an issue where the
2952     children have smaller disk size.
2953
2954     @param disk: an L{ganeti.objects.Disk} object
2955
2956     """
2957     if disk.dev_type == constants.LD_DRBD8:
2958       assert disk.children, "Empty children for DRBD8?"
2959       fchild = disk.children[0]
2960       mismatch = fchild.size < disk.size
2961       if mismatch:
2962         self.LogInfo("Child disk has size %d, parent %d, fixing",
2963                      fchild.size, disk.size)
2964         fchild.size = disk.size
2965
2966       # and we recurse on this child only, not on the metadev
2967       return self._EnsureChildSizes(fchild) or mismatch
2968     else:
2969       return False
2970
2971   def Exec(self, feedback_fn):
2972     """Verify the size of cluster disks.
2973
2974     """
2975     # TODO: check child disks too
2976     # TODO: check differences in size between primary/secondary nodes
2977     per_node_disks = {}
2978     for instance in self.wanted_instances:
2979       pnode = instance.primary_node
2980       if pnode not in per_node_disks:
2981         per_node_disks[pnode] = []
2982       for idx, disk in enumerate(instance.disks):
2983         per_node_disks[pnode].append((instance, idx, disk))
2984
2985     changed = []
2986     for node, dskl in per_node_disks.items():
2987       newl = [v[2].Copy() for v in dskl]
2988       for dsk in newl:
2989         self.cfg.SetDiskID(dsk, node)
2990       result = self.rpc.call_blockdev_getsize(node, newl)
2991       if result.fail_msg:
2992         self.LogWarning("Failure in blockdev_getsize call to node"
2993                         " %s, ignoring", node)
2994         continue
2995       if len(result.payload) != len(dskl):
2996         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2997                         " result.payload=%s", node, len(dskl), result.payload)
2998         self.LogWarning("Invalid result from node %s, ignoring node results",
2999                         node)
3000         continue
3001       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3002         if size is None:
3003           self.LogWarning("Disk %d of instance %s did not return size"
3004                           " information, ignoring", idx, instance.name)
3005           continue
3006         if not isinstance(size, (int, long)):
3007           self.LogWarning("Disk %d of instance %s did not return valid"
3008                           " size information, ignoring", idx, instance.name)
3009           continue
3010         size = size >> 20
3011         if size != disk.size:
3012           self.LogInfo("Disk %d of instance %s has mismatched size,"
3013                        " correcting: recorded %d, actual %d", idx,
3014                        instance.name, disk.size, size)
3015           disk.size = size
3016           self.cfg.Update(instance, feedback_fn)
3017           changed.append((instance.name, idx, size))
3018         if self._EnsureChildSizes(disk):
3019           self.cfg.Update(instance, feedback_fn)
3020           changed.append((instance.name, idx, disk.size))
3021     return changed
3022
3023
3024 class LUClusterRename(LogicalUnit):
3025   """Rename the cluster.
3026
3027   """
3028   HPATH = "cluster-rename"
3029   HTYPE = constants.HTYPE_CLUSTER
3030
3031   def BuildHooksEnv(self):
3032     """Build hooks env.
3033
3034     """
3035     return {
3036       "OP_TARGET": self.cfg.GetClusterName(),
3037       "NEW_NAME": self.op.name,
3038       }
3039
3040   def BuildHooksNodes(self):
3041     """Build hooks nodes.
3042
3043     """
3044     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3045
3046   def CheckPrereq(self):
3047     """Verify that the passed name is a valid one.
3048
3049     """
3050     hostname = netutils.GetHostname(name=self.op.name,
3051                                     family=self.cfg.GetPrimaryIPFamily())
3052
3053     new_name = hostname.name
3054     self.ip = new_ip = hostname.ip
3055     old_name = self.cfg.GetClusterName()
3056     old_ip = self.cfg.GetMasterIP()
3057     if new_name == old_name and new_ip == old_ip:
3058       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3059                                  " cluster has changed",
3060                                  errors.ECODE_INVAL)
3061     if new_ip != old_ip:
3062       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3063         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3064                                    " reachable on the network" %
3065                                    new_ip, errors.ECODE_NOTUNIQUE)
3066
3067     self.op.name = new_name
3068
3069   def Exec(self, feedback_fn):
3070     """Rename the cluster.
3071
3072     """
3073     clustername = self.op.name
3074     ip = self.ip
3075
3076     # shutdown the master IP
3077     master = self.cfg.GetMasterNode()
3078     result = self.rpc.call_node_stop_master(master, False)
3079     result.Raise("Could not disable the master role")
3080
3081     try:
3082       cluster = self.cfg.GetClusterInfo()
3083       cluster.cluster_name = clustername
3084       cluster.master_ip = ip
3085       self.cfg.Update(cluster, feedback_fn)
3086
3087       # update the known hosts file
3088       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3089       node_list = self.cfg.GetOnlineNodeList()
3090       try:
3091         node_list.remove(master)
3092       except ValueError:
3093         pass
3094       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3095     finally:
3096       result = self.rpc.call_node_start_master(master, False, False)
3097       msg = result.fail_msg
3098       if msg:
3099         self.LogWarning("Could not re-enable the master role on"
3100                         " the master, please restart manually: %s", msg)
3101
3102     return clustername
3103
3104
3105 class LUClusterSetParams(LogicalUnit):
3106   """Change the parameters of the cluster.
3107
3108   """
3109   HPATH = "cluster-modify"
3110   HTYPE = constants.HTYPE_CLUSTER
3111   REQ_BGL = False
3112
3113   def CheckArguments(self):
3114     """Check parameters
3115
3116     """
3117     if self.op.uid_pool:
3118       uidpool.CheckUidPool(self.op.uid_pool)
3119
3120     if self.op.add_uids:
3121       uidpool.CheckUidPool(self.op.add_uids)
3122
3123     if self.op.remove_uids:
3124       uidpool.CheckUidPool(self.op.remove_uids)
3125
3126   def ExpandNames(self):
3127     # FIXME: in the future maybe other cluster params won't require checking on
3128     # all nodes to be modified.
3129     self.needed_locks = {
3130       locking.LEVEL_NODE: locking.ALL_SET,
3131     }
3132     self.share_locks[locking.LEVEL_NODE] = 1
3133
3134   def BuildHooksEnv(self):
3135     """Build hooks env.
3136
3137     """
3138     return {
3139       "OP_TARGET": self.cfg.GetClusterName(),
3140       "NEW_VG_NAME": self.op.vg_name,
3141       }
3142
3143   def BuildHooksNodes(self):
3144     """Build hooks nodes.
3145
3146     """
3147     mn = self.cfg.GetMasterNode()
3148     return ([mn], [mn])
3149
3150   def CheckPrereq(self):
3151     """Check prerequisites.
3152
3153     This checks whether the given params don't conflict and
3154     if the given volume group is valid.
3155
3156     """
3157     if self.op.vg_name is not None and not self.op.vg_name:
3158       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3159         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3160                                    " instances exist", errors.ECODE_INVAL)
3161
3162     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3163       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3164         raise errors.OpPrereqError("Cannot disable drbd helper while"
3165                                    " drbd-based instances exist",
3166                                    errors.ECODE_INVAL)
3167
3168     node_list = self.glm.list_owned(locking.LEVEL_NODE)
3169
3170     # if vg_name not None, checks given volume group on all nodes
3171     if self.op.vg_name:
3172       vglist = self.rpc.call_vg_list(node_list)
3173       for node in node_list:
3174         msg = vglist[node].fail_msg
3175         if msg:
3176           # ignoring down node
3177           self.LogWarning("Error while gathering data on node %s"
3178                           " (ignoring node): %s", node, msg)
3179           continue
3180         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3181                                               self.op.vg_name,
3182                                               constants.MIN_VG_SIZE)
3183         if vgstatus:
3184           raise errors.OpPrereqError("Error on node '%s': %s" %
3185                                      (node, vgstatus), errors.ECODE_ENVIRON)
3186
3187     if self.op.drbd_helper:
3188       # checks given drbd helper on all nodes
3189       helpers = self.rpc.call_drbd_helper(node_list)
3190       for node in node_list:
3191         ninfo = self.cfg.GetNodeInfo(node)
3192         if ninfo.offline:
3193           self.LogInfo("Not checking drbd helper on offline node %s", node)
3194           continue
3195         msg = helpers[node].fail_msg
3196         if msg:
3197           raise errors.OpPrereqError("Error checking drbd helper on node"
3198                                      " '%s': %s" % (node, msg),
3199                                      errors.ECODE_ENVIRON)
3200         node_helper = helpers[node].payload
3201         if node_helper != self.op.drbd_helper:
3202           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3203                                      (node, node_helper), errors.ECODE_ENVIRON)
3204
3205     self.cluster = cluster = self.cfg.GetClusterInfo()
3206     # validate params changes
3207     if self.op.beparams:
3208       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3209       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3210
3211     if self.op.ndparams:
3212       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3213       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3214
3215       # TODO: we need a more general way to handle resetting
3216       # cluster-level parameters to default values
3217       if self.new_ndparams["oob_program"] == "":
3218         self.new_ndparams["oob_program"] = \
3219             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3220
3221     if self.op.nicparams:
3222       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3223       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3224       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3225       nic_errors = []
3226
3227       # check all instances for consistency
3228       for instance in self.cfg.GetAllInstancesInfo().values():
3229         for nic_idx, nic in enumerate(instance.nics):
3230           params_copy = copy.deepcopy(nic.nicparams)
3231           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3232
3233           # check parameter syntax
3234           try:
3235             objects.NIC.CheckParameterSyntax(params_filled)
3236           except errors.ConfigurationError, err:
3237             nic_errors.append("Instance %s, nic/%d: %s" %
3238                               (instance.name, nic_idx, err))
3239
3240           # if we're moving instances to routed, check that they have an ip
3241           target_mode = params_filled[constants.NIC_MODE]
3242           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3243             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3244                               " address" % (instance.name, nic_idx))
3245       if nic_errors:
3246         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3247                                    "\n".join(nic_errors))
3248
3249     # hypervisor list/parameters
3250     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3251     if self.op.hvparams:
3252       for hv_name, hv_dict in self.op.hvparams.items():
3253         if hv_name not in self.new_hvparams:
3254           self.new_hvparams[hv_name] = hv_dict
3255         else:
3256           self.new_hvparams[hv_name].update(hv_dict)
3257
3258     # os hypervisor parameters
3259     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3260     if self.op.os_hvp:
3261       for os_name, hvs in self.op.os_hvp.items():
3262         if os_name not in self.new_os_hvp:
3263           self.new_os_hvp[os_name] = hvs
3264         else:
3265           for hv_name, hv_dict in hvs.items():
3266             if hv_name not in self.new_os_hvp[os_name]:
3267               self.new_os_hvp[os_name][hv_name] = hv_dict
3268             else:
3269               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3270
3271     # os parameters
3272     self.new_osp = objects.FillDict(cluster.osparams, {})
3273     if self.op.osparams:
3274       for os_name, osp in self.op.osparams.items():
3275         if os_name not in self.new_osp:
3276           self.new_osp[os_name] = {}
3277
3278         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3279                                                   use_none=True)
3280
3281         if not self.new_osp[os_name]:
3282           # we removed all parameters
3283           del self.new_osp[os_name]
3284         else:
3285           # check the parameter validity (remote check)
3286           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3287                          os_name, self.new_osp[os_name])
3288
3289     # changes to the hypervisor list
3290     if self.op.enabled_hypervisors is not None:
3291       self.hv_list = self.op.enabled_hypervisors
3292       for hv in self.hv_list:
3293         # if the hypervisor doesn't already exist in the cluster
3294         # hvparams, we initialize it to empty, and then (in both
3295         # cases) we make sure to fill the defaults, as we might not
3296         # have a complete defaults list if the hypervisor wasn't
3297         # enabled before
3298         if hv not in new_hvp:
3299           new_hvp[hv] = {}
3300         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3301         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3302     else:
3303       self.hv_list = cluster.enabled_hypervisors
3304
3305     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3306       # either the enabled list has changed, or the parameters have, validate
3307       for hv_name, hv_params in self.new_hvparams.items():
3308         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3309             (self.op.enabled_hypervisors and
3310              hv_name in self.op.enabled_hypervisors)):
3311           # either this is a new hypervisor, or its parameters have changed
3312           hv_class = hypervisor.GetHypervisor(hv_name)
3313           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3314           hv_class.CheckParameterSyntax(hv_params)
3315           _CheckHVParams(self, node_list, hv_name, hv_params)
3316
3317     if self.op.os_hvp:
3318       # no need to check any newly-enabled hypervisors, since the
3319       # defaults have already been checked in the above code-block
3320       for os_name, os_hvp in self.new_os_hvp.items():
3321         for hv_name, hv_params in os_hvp.items():
3322           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3323           # we need to fill in the new os_hvp on top of the actual hv_p
3324           cluster_defaults = self.new_hvparams.get(hv_name, {})
3325           new_osp = objects.FillDict(cluster_defaults, hv_params)
3326           hv_class = hypervisor.GetHypervisor(hv_name)
3327           hv_class.CheckParameterSyntax(new_osp)
3328           _CheckHVParams(self, node_list, hv_name, new_osp)
3329
3330     if self.op.default_iallocator:
3331       alloc_script = utils.FindFile(self.op.default_iallocator,
3332                                     constants.IALLOCATOR_SEARCH_PATH,
3333                                     os.path.isfile)
3334       if alloc_script is None:
3335         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3336                                    " specified" % self.op.default_iallocator,
3337                                    errors.ECODE_INVAL)
3338
3339   def Exec(self, feedback_fn):
3340     """Change the parameters of the cluster.
3341
3342     """
3343     if self.op.vg_name is not None:
3344       new_volume = self.op.vg_name
3345       if not new_volume:
3346         new_volume = None
3347       if new_volume != self.cfg.GetVGName():
3348         self.cfg.SetVGName(new_volume)
3349       else:
3350         feedback_fn("Cluster LVM configuration already in desired"
3351                     " state, not changing")
3352     if self.op.drbd_helper is not None:
3353       new_helper = self.op.drbd_helper
3354       if not new_helper:
3355         new_helper = None
3356       if new_helper != self.cfg.GetDRBDHelper():
3357         self.cfg.SetDRBDHelper(new_helper)
3358       else:
3359         feedback_fn("Cluster DRBD helper already in desired state,"
3360                     " not changing")
3361     if self.op.hvparams:
3362       self.cluster.hvparams = self.new_hvparams
3363     if self.op.os_hvp:
3364       self.cluster.os_hvp = self.new_os_hvp
3365     if self.op.enabled_hypervisors is not None:
3366       self.cluster.hvparams = self.new_hvparams
3367       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3368     if self.op.beparams:
3369       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3370     if self.op.nicparams:
3371       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3372     if self.op.osparams:
3373       self.cluster.osparams = self.new_osp
3374     if self.op.ndparams:
3375       self.cluster.ndparams = self.new_ndparams
3376
3377     if self.op.candidate_pool_size is not None:
3378       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3379       # we need to update the pool size here, otherwise the save will fail
3380       _AdjustCandidatePool(self, [])
3381
3382     if self.op.maintain_node_health is not None:
3383       self.cluster.maintain_node_health = self.op.maintain_node_health
3384
3385     if self.op.prealloc_wipe_disks is not None:
3386       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3387
3388     if self.op.add_uids is not None:
3389       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3390
3391     if self.op.remove_uids is not None:
3392       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3393
3394     if self.op.uid_pool is not None:
3395       self.cluster.uid_pool = self.op.uid_pool
3396
3397     if self.op.default_iallocator is not None:
3398       self.cluster.default_iallocator = self.op.default_iallocator
3399
3400     if self.op.reserved_lvs is not None:
3401       self.cluster.reserved_lvs = self.op.reserved_lvs
3402
3403     def helper_os(aname, mods, desc):
3404       desc += " OS list"
3405       lst = getattr(self.cluster, aname)
3406       for key, val in mods:
3407         if key == constants.DDM_ADD:
3408           if val in lst:
3409             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3410           else:
3411             lst.append(val)
3412         elif key == constants.DDM_REMOVE:
3413           if val in lst:
3414             lst.remove(val)
3415           else:
3416             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3417         else:
3418           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3419
3420     if self.op.hidden_os:
3421       helper_os("hidden_os", self.op.hidden_os, "hidden")
3422
3423     if self.op.blacklisted_os:
3424       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3425
3426     if self.op.master_netdev:
3427       master = self.cfg.GetMasterNode()
3428       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3429                   self.cluster.master_netdev)
3430       result = self.rpc.call_node_stop_master(master, False)
3431       result.Raise("Could not disable the master ip")
3432       feedback_fn("Changing master_netdev from %s to %s" %
3433                   (self.cluster.master_netdev, self.op.master_netdev))
3434       self.cluster.master_netdev = self.op.master_netdev
3435
3436     self.cfg.Update(self.cluster, feedback_fn)
3437
3438     if self.op.master_netdev:
3439       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3440                   self.op.master_netdev)
3441       result = self.rpc.call_node_start_master(master, False, False)
3442       if result.fail_msg:
3443         self.LogWarning("Could not re-enable the master ip on"
3444                         " the master, please restart manually: %s",
3445                         result.fail_msg)
3446
3447
3448 def _UploadHelper(lu, nodes, fname):
3449   """Helper for uploading a file and showing warnings.
3450
3451   """
3452   if os.path.exists(fname):
3453     result = lu.rpc.call_upload_file(nodes, fname)
3454     for to_node, to_result in result.items():
3455       msg = to_result.fail_msg
3456       if msg:
3457         msg = ("Copy of file %s to node %s failed: %s" %
3458                (fname, to_node, msg))
3459         lu.proc.LogWarning(msg)
3460
3461
3462 def _ComputeAncillaryFiles(cluster, redist):
3463   """Compute files external to Ganeti which need to be consistent.
3464
3465   @type redist: boolean
3466   @param redist: Whether to include files which need to be redistributed
3467
3468   """
3469   # Compute files for all nodes
3470   files_all = set([
3471     constants.SSH_KNOWN_HOSTS_FILE,
3472     constants.CONFD_HMAC_KEY,
3473     constants.CLUSTER_DOMAIN_SECRET_FILE,
3474     ])
3475
3476   if not redist:
3477     files_all.update(constants.ALL_CERT_FILES)
3478     files_all.update(ssconf.SimpleStore().GetFileList())
3479
3480   if cluster.modify_etc_hosts:
3481     files_all.add(constants.ETC_HOSTS)
3482
3483   # Files which must either exist on all nodes or on none
3484   files_all_opt = set([
3485     constants.RAPI_USERS_FILE,
3486     ])
3487
3488   # Files which should only be on master candidates
3489   files_mc = set()
3490   if not redist:
3491     files_mc.add(constants.CLUSTER_CONF_FILE)
3492
3493   # Files which should only be on VM-capable nodes
3494   files_vm = set(filename
3495     for hv_name in cluster.enabled_hypervisors
3496     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3497
3498   # Filenames must be unique
3499   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3500           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3501          "Found file listed in more than one file list"
3502
3503   return (files_all, files_all_opt, files_mc, files_vm)
3504
3505
3506 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3507   """Distribute additional files which are part of the cluster configuration.
3508
3509   ConfigWriter takes care of distributing the config and ssconf files, but
3510   there are more files which should be distributed to all nodes. This function
3511   makes sure those are copied.
3512
3513   @param lu: calling logical unit
3514   @param additional_nodes: list of nodes not in the config to distribute to
3515   @type additional_vm: boolean
3516   @param additional_vm: whether the additional nodes are vm-capable or not
3517
3518   """
3519   # Gather target nodes
3520   cluster = lu.cfg.GetClusterInfo()
3521   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3522
3523   online_nodes = lu.cfg.GetOnlineNodeList()
3524   vm_nodes = lu.cfg.GetVmCapableNodeList()
3525
3526   if additional_nodes is not None:
3527     online_nodes.extend(additional_nodes)
3528     if additional_vm:
3529       vm_nodes.extend(additional_nodes)
3530
3531   # Never distribute to master node
3532   for nodelist in [online_nodes, vm_nodes]:
3533     if master_info.name in nodelist:
3534       nodelist.remove(master_info.name)
3535
3536   # Gather file lists
3537   (files_all, files_all_opt, files_mc, files_vm) = \
3538     _ComputeAncillaryFiles(cluster, True)
3539
3540   # Never re-distribute configuration file from here
3541   assert not (constants.CLUSTER_CONF_FILE in files_all or
3542               constants.CLUSTER_CONF_FILE in files_vm)
3543   assert not files_mc, "Master candidates not handled in this function"
3544
3545   filemap = [
3546     (online_nodes, files_all),
3547     (online_nodes, files_all_opt),
3548     (vm_nodes, files_vm),
3549     ]
3550
3551   # Upload the files
3552   for (node_list, files) in filemap:
3553     for fname in files:
3554       _UploadHelper(lu, node_list, fname)
3555
3556
3557 class LUClusterRedistConf(NoHooksLU):
3558   """Force the redistribution of cluster configuration.
3559
3560   This is a very simple LU.
3561
3562   """
3563   REQ_BGL = False
3564
3565   def ExpandNames(self):
3566     self.needed_locks = {
3567       locking.LEVEL_NODE: locking.ALL_SET,
3568     }
3569     self.share_locks[locking.LEVEL_NODE] = 1
3570
3571   def Exec(self, feedback_fn):
3572     """Redistribute the configuration.
3573
3574     """
3575     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3576     _RedistributeAncillaryFiles(self)
3577
3578
3579 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3580   """Sleep and poll for an instance's disk to sync.
3581
3582   """
3583   if not instance.disks or disks is not None and not disks:
3584     return True
3585
3586   disks = _ExpandCheckDisks(instance, disks)
3587
3588   if not oneshot:
3589     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3590
3591   node = instance.primary_node
3592
3593   for dev in disks:
3594     lu.cfg.SetDiskID(dev, node)
3595
3596   # TODO: Convert to utils.Retry
3597
3598   retries = 0
3599   degr_retries = 10 # in seconds, as we sleep 1 second each time
3600   while True:
3601     max_time = 0
3602     done = True
3603     cumul_degraded = False
3604     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3605     msg = rstats.fail_msg
3606     if msg:
3607       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3608       retries += 1
3609       if retries >= 10:
3610         raise errors.RemoteError("Can't contact node %s for mirror data,"
3611                                  " aborting." % node)
3612       time.sleep(6)
3613       continue
3614     rstats = rstats.payload
3615     retries = 0
3616     for i, mstat in enumerate(rstats):
3617       if mstat is None:
3618         lu.LogWarning("Can't compute data for node %s/%s",
3619                            node, disks[i].iv_name)
3620         continue
3621
3622       cumul_degraded = (cumul_degraded or
3623                         (mstat.is_degraded and mstat.sync_percent is None))
3624       if mstat.sync_percent is not None:
3625         done = False
3626         if mstat.estimated_time is not None:
3627           rem_time = ("%s remaining (estimated)" %
3628                       utils.FormatSeconds(mstat.estimated_time))
3629           max_time = mstat.estimated_time
3630         else:
3631           rem_time = "no time estimate"
3632         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3633                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3634
3635     # if we're done but degraded, let's do a few small retries, to
3636     # make sure we see a stable and not transient situation; therefore
3637     # we force restart of the loop
3638     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3639       logging.info("Degraded disks found, %d retries left", degr_retries)
3640       degr_retries -= 1
3641       time.sleep(1)
3642       continue
3643
3644     if done or oneshot:
3645       break
3646
3647     time.sleep(min(60, max_time))
3648
3649   if done:
3650     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3651   return not cumul_degraded
3652
3653
3654 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3655   """Check that mirrors are not degraded.
3656
3657   The ldisk parameter, if True, will change the test from the
3658   is_degraded attribute (which represents overall non-ok status for
3659   the device(s)) to the ldisk (representing the local storage status).
3660
3661   """
3662   lu.cfg.SetDiskID(dev, node)
3663
3664   result = True
3665
3666   if on_primary or dev.AssembleOnSecondary():
3667     rstats = lu.rpc.call_blockdev_find(node, dev)
3668     msg = rstats.fail_msg
3669     if msg:
3670       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3671       result = False
3672     elif not rstats.payload:
3673       lu.LogWarning("Can't find disk on node %s", node)
3674       result = False
3675     else:
3676       if ldisk:
3677         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3678       else:
3679         result = result and not rstats.payload.is_degraded
3680
3681   if dev.children:
3682     for child in dev.children:
3683       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3684
3685   return result
3686
3687
3688 class LUOobCommand(NoHooksLU):
3689   """Logical unit for OOB handling.
3690
3691   """
3692   REG_BGL = False
3693   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3694
3695   def ExpandNames(self):
3696     """Gather locks we need.
3697
3698     """
3699     if self.op.node_names:
3700       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3701       lock_names = self.op.node_names
3702     else:
3703       lock_names = locking.ALL_SET
3704
3705     self.needed_locks = {
3706       locking.LEVEL_NODE: lock_names,
3707       }
3708
3709   def CheckPrereq(self):
3710     """Check prerequisites.
3711
3712     This checks:
3713      - the node exists in the configuration
3714      - OOB is supported
3715
3716     Any errors are signaled by raising errors.OpPrereqError.
3717
3718     """
3719     self.nodes = []
3720     self.master_node = self.cfg.GetMasterNode()
3721
3722     assert self.op.power_delay >= 0.0
3723
3724     if self.op.node_names:
3725       if (self.op.command in self._SKIP_MASTER and
3726           self.master_node in self.op.node_names):
3727         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3728         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3729
3730         if master_oob_handler:
3731           additional_text = ("run '%s %s %s' if you want to operate on the"
3732                              " master regardless") % (master_oob_handler,
3733                                                       self.op.command,
3734                                                       self.master_node)
3735         else:
3736           additional_text = "it does not support out-of-band operations"
3737
3738         raise errors.OpPrereqError(("Operating on the master node %s is not"
3739                                     " allowed for %s; %s") %
3740                                    (self.master_node, self.op.command,
3741                                     additional_text), errors.ECODE_INVAL)
3742     else:
3743       self.op.node_names = self.cfg.GetNodeList()
3744       if self.op.command in self._SKIP_MASTER:
3745         self.op.node_names.remove(self.master_node)
3746
3747     if self.op.command in self._SKIP_MASTER:
3748       assert self.master_node not in self.op.node_names
3749
3750     for node_name in self.op.node_names:
3751       node = self.cfg.GetNodeInfo(node_name)
3752
3753       if node is None:
3754         raise errors.OpPrereqError("Node %s not found" % node_name,
3755                                    errors.ECODE_NOENT)
3756       else:
3757         self.nodes.append(node)
3758
3759       if (not self.op.ignore_status and
3760           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3761         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3762                                     " not marked offline") % node_name,
3763                                    errors.ECODE_STATE)
3764
3765   def Exec(self, feedback_fn):
3766     """Execute OOB and return result if we expect any.
3767
3768     """
3769     master_node = self.master_node
3770     ret = []
3771
3772     for idx, node in enumerate(utils.NiceSort(self.nodes,
3773                                               key=lambda node: node.name)):
3774       node_entry = [(constants.RS_NORMAL, node.name)]
3775       ret.append(node_entry)
3776
3777       oob_program = _SupportsOob(self.cfg, node)
3778
3779       if not oob_program:
3780         node_entry.append((constants.RS_UNAVAIL, None))
3781         continue
3782
3783       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3784                    self.op.command, oob_program, node.name)
3785       result = self.rpc.call_run_oob(master_node, oob_program,
3786                                      self.op.command, node.name,
3787                                      self.op.timeout)
3788
3789       if result.fail_msg:
3790         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3791                         node.name, result.fail_msg)
3792         node_entry.append((constants.RS_NODATA, None))
3793       else:
3794         try:
3795           self._CheckPayload(result)
3796         except errors.OpExecError, err:
3797           self.LogWarning("Payload returned by node '%s' is not valid: %s",
3798                           node.name, err)
3799           node_entry.append((constants.RS_NODATA, None))
3800         else:
3801           if self.op.command == constants.OOB_HEALTH:
3802             # For health we should log important events
3803             for item, status in result.payload:
3804               if status in [constants.OOB_STATUS_WARNING,
3805                             constants.OOB_STATUS_CRITICAL]:
3806                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3807                                 item, node.name, status)
3808
3809           if self.op.command == constants.OOB_POWER_ON:
3810             node.powered = True
3811           elif self.op.command == constants.OOB_POWER_OFF:
3812             node.powered = False
3813           elif self.op.command == constants.OOB_POWER_STATUS:
3814             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3815             if powered != node.powered:
3816               logging.warning(("Recorded power state (%s) of node '%s' does not"
3817                                " match actual power state (%s)"), node.powered,
3818                               node.name, powered)
3819
3820           # For configuration changing commands we should update the node
3821           if self.op.command in (constants.OOB_POWER_ON,
3822                                  constants.OOB_POWER_OFF):
3823             self.cfg.Update(node, feedback_fn)
3824
3825           node_entry.append((constants.RS_NORMAL, result.payload))
3826
3827           if (self.op.command == constants.OOB_POWER_ON and
3828               idx < len(self.nodes) - 1):
3829             time.sleep(self.op.power_delay)
3830
3831     return ret
3832
3833   def _CheckPayload(self, result):
3834     """Checks if the payload is valid.
3835
3836     @param result: RPC result
3837     @raises errors.OpExecError: If payload is not valid
3838
3839     """
3840     errs = []
3841     if self.op.command == constants.OOB_HEALTH:
3842       if not isinstance(result.payload, list):
3843         errs.append("command 'health' is expected to return a list but got %s" %
3844                     type(result.payload))
3845       else:
3846         for item, status in result.payload:
3847           if status not in constants.OOB_STATUSES:
3848             errs.append("health item '%s' has invalid status '%s'" %
3849                         (item, status))
3850
3851     if self.op.command == constants.OOB_POWER_STATUS:
3852       if not isinstance(result.payload, dict):
3853         errs.append("power-status is expected to return a dict but got %s" %
3854                     type(result.payload))
3855
3856     if self.op.command in [
3857         constants.OOB_POWER_ON,
3858         constants.OOB_POWER_OFF,
3859         constants.OOB_POWER_CYCLE,
3860         ]:
3861       if result.payload is not None:
3862         errs.append("%s is expected to not return payload but got '%s'" %
3863                     (self.op.command, result.payload))
3864
3865     if errs:
3866       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3867                                utils.CommaJoin(errs))
3868
3869 class _OsQuery(_QueryBase):
3870   FIELDS = query.OS_FIELDS
3871
3872   def ExpandNames(self, lu):
3873     # Lock all nodes in shared mode
3874     # Temporary removal of locks, should be reverted later
3875     # TODO: reintroduce locks when they are lighter-weight
3876     lu.needed_locks = {}
3877     #self.share_locks[locking.LEVEL_NODE] = 1
3878     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3879
3880     # The following variables interact with _QueryBase._GetNames
3881     if self.names:
3882       self.wanted = self.names
3883     else:
3884       self.wanted = locking.ALL_SET
3885
3886     self.do_locking = self.use_locking
3887
3888   def DeclareLocks(self, lu, level):
3889     pass
3890
3891   @staticmethod
3892   def _DiagnoseByOS(rlist):
3893     """Remaps a per-node return list into an a per-os per-node dictionary
3894
3895     @param rlist: a map with node names as keys and OS objects as values
3896
3897     @rtype: dict
3898     @return: a dictionary with osnames as keys and as value another
3899         map, with nodes as keys and tuples of (path, status, diagnose,
3900         variants, parameters, api_versions) as values, eg::
3901
3902           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3903                                      (/srv/..., False, "invalid api")],
3904                            "node2": [(/srv/..., True, "", [], [])]}
3905           }
3906
3907     """
3908     all_os = {}
3909     # we build here the list of nodes that didn't fail the RPC (at RPC
3910     # level), so that nodes with a non-responding node daemon don't
3911     # make all OSes invalid
3912     good_nodes = [node_name for node_name in rlist
3913                   if not rlist[node_name].fail_msg]
3914     for node_name, nr in rlist.items():
3915       if nr.fail_msg or not nr.payload:
3916         continue
3917       for (name, path, status, diagnose, variants,
3918            params, api_versions) in nr.payload:
3919         if name not in all_os:
3920           # build a list of nodes for this os containing empty lists
3921           # for each node in node_list
3922           all_os[name] = {}
3923           for nname in good_nodes:
3924             all_os[name][nname] = []
3925         # convert params from [name, help] to (name, help)
3926         params = [tuple(v) for v in params]
3927         all_os[name][node_name].append((path, status, diagnose,
3928                                         variants, params, api_versions))
3929     return all_os
3930
3931   def _GetQueryData(self, lu):
3932     """Computes the list of nodes and their attributes.
3933
3934     """
3935     # Locking is not used
3936     assert not (compat.any(lu.glm.is_owned(level)
3937                            for level in locking.LEVELS
3938                            if level != locking.LEVEL_CLUSTER) or
3939                 self.do_locking or self.use_locking)
3940
3941     valid_nodes = [node.name
3942                    for node in lu.cfg.GetAllNodesInfo().values()
3943                    if not node.offline and node.vm_capable]
3944     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3945     cluster = lu.cfg.GetClusterInfo()
3946
3947     data = {}
3948
3949     for (os_name, os_data) in pol.items():
3950       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3951                           hidden=(os_name in cluster.hidden_os),
3952                           blacklisted=(os_name in cluster.blacklisted_os))
3953
3954       variants = set()
3955       parameters = set()
3956       api_versions = set()
3957
3958       for idx, osl in enumerate(os_data.values()):
3959         info.valid = bool(info.valid and osl and osl[0][1])
3960         if not info.valid:
3961           break
3962
3963         (node_variants, node_params, node_api) = osl[0][3:6]
3964         if idx == 0:
3965           # First entry
3966           variants.update(node_variants)
3967           parameters.update(node_params)
3968           api_versions.update(node_api)
3969         else:
3970           # Filter out inconsistent values
3971           variants.intersection_update(node_variants)
3972           parameters.intersection_update(node_params)
3973           api_versions.intersection_update(node_api)
3974
3975       info.variants = list(variants)
3976       info.parameters = list(parameters)
3977       info.api_versions = list(api_versions)
3978
3979       data[os_name] = info
3980
3981     # Prepare data in requested order
3982     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3983             if name in data]
3984
3985
3986 class LUOsDiagnose(NoHooksLU):
3987   """Logical unit for OS diagnose/query.
3988
3989   """
3990   REQ_BGL = False
3991
3992   @staticmethod
3993   def _BuildFilter(fields, names):
3994     """Builds a filter for querying OSes.
3995
3996     """
3997     name_filter = qlang.MakeSimpleFilter("name", names)
3998
3999     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4000     # respective field is not requested
4001     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4002                      for fname in ["hidden", "blacklisted"]
4003                      if fname not in fields]
4004     if "valid" not in fields:
4005       status_filter.append([qlang.OP_TRUE, "valid"])
4006
4007     if status_filter:
4008       status_filter.insert(0, qlang.OP_AND)
4009     else:
4010       status_filter = None
4011
4012     if name_filter and status_filter:
4013       return [qlang.OP_AND, name_filter, status_filter]
4014     elif name_filter:
4015       return name_filter
4016     else:
4017       return status_filter
4018
4019   def CheckArguments(self):
4020     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4021                        self.op.output_fields, False)
4022
4023   def ExpandNames(self):
4024     self.oq.ExpandNames(self)
4025
4026   def Exec(self, feedback_fn):
4027     return self.oq.OldStyleQuery(self)
4028
4029
4030 class LUNodeRemove(LogicalUnit):
4031   """Logical unit for removing a node.
4032
4033   """
4034   HPATH = "node-remove"
4035   HTYPE = constants.HTYPE_NODE
4036
4037   def BuildHooksEnv(self):
4038     """Build hooks env.
4039
4040     This doesn't run on the target node in the pre phase as a failed
4041     node would then be impossible to remove.
4042
4043     """
4044     return {
4045       "OP_TARGET": self.op.node_name,
4046       "NODE_NAME": self.op.node_name,
4047       }
4048
4049   def BuildHooksNodes(self):
4050     """Build hooks nodes.
4051
4052     """
4053     all_nodes = self.cfg.GetNodeList()
4054     try:
4055       all_nodes.remove(self.op.node_name)
4056     except ValueError:
4057       logging.warning("Node '%s', which is about to be removed, was not found"
4058                       " in the list of all nodes", self.op.node_name)
4059     return (all_nodes, all_nodes)
4060
4061   def CheckPrereq(self):
4062     """Check prerequisites.
4063
4064     This checks:
4065      - the node exists in the configuration
4066      - it does not have primary or secondary instances
4067      - it's not the master
4068
4069     Any errors are signaled by raising errors.OpPrereqError.
4070
4071     """
4072     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4073     node = self.cfg.GetNodeInfo(self.op.node_name)
4074     assert node is not None
4075
4076     instance_list = self.cfg.GetInstanceList()
4077
4078     masternode = self.cfg.GetMasterNode()
4079     if node.name == masternode:
4080       raise errors.OpPrereqError("Node is the master node, failover to another"
4081                                  " node is required", errors.ECODE_INVAL)
4082
4083     for instance_name in instance_list:
4084       instance = self.cfg.GetInstanceInfo(instance_name)
4085       if node.name in instance.all_nodes:
4086         raise errors.OpPrereqError("Instance %s is still running on the node,"
4087                                    " please remove first" % instance_name,
4088                                    errors.ECODE_INVAL)
4089     self.op.node_name = node.name
4090     self.node = node
4091
4092   def Exec(self, feedback_fn):
4093     """Removes the node from the cluster.
4094
4095     """
4096     node = self.node
4097     logging.info("Stopping the node daemon and removing configs from node %s",
4098                  node.name)
4099
4100     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4101
4102     # Promote nodes to master candidate as needed
4103     _AdjustCandidatePool(self, exceptions=[node.name])
4104     self.context.RemoveNode(node.name)
4105
4106     # Run post hooks on the node before it's removed
4107     _RunPostHook(self, node.name)
4108
4109     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4110     msg = result.fail_msg
4111     if msg:
4112       self.LogWarning("Errors encountered on the remote node while leaving"
4113                       " the cluster: %s", msg)
4114
4115     # Remove node from our /etc/hosts
4116     if self.cfg.GetClusterInfo().modify_etc_hosts:
4117       master_node = self.cfg.GetMasterNode()
4118       result = self.rpc.call_etc_hosts_modify(master_node,
4119                                               constants.ETC_HOSTS_REMOVE,
4120                                               node.name, None)
4121       result.Raise("Can't update hosts file with new host data")
4122       _RedistributeAncillaryFiles(self)
4123
4124
4125 class _NodeQuery(_QueryBase):
4126   FIELDS = query.NODE_FIELDS
4127
4128   def ExpandNames(self, lu):
4129     lu.needed_locks = {}
4130     lu.share_locks[locking.LEVEL_NODE] = 1
4131
4132     if self.names:
4133       self.wanted = _GetWantedNodes(lu, self.names)
4134     else:
4135       self.wanted = locking.ALL_SET
4136
4137     self.do_locking = (self.use_locking and
4138                        query.NQ_LIVE in self.requested_data)
4139
4140     if self.do_locking:
4141       # if we don't request only static fields, we need to lock the nodes
4142       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4143
4144   def DeclareLocks(self, lu, level):
4145     pass
4146
4147   def _GetQueryData(self, lu):
4148     """Computes the list of nodes and their attributes.
4149
4150     """
4151     all_info = lu.cfg.GetAllNodesInfo()
4152
4153     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4154
4155     # Gather data as requested
4156     if query.NQ_LIVE in self.requested_data:
4157       # filter out non-vm_capable nodes
4158       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4159
4160       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4161                                         lu.cfg.GetHypervisorType())
4162       live_data = dict((name, nresult.payload)
4163                        for (name, nresult) in node_data.items()
4164                        if not nresult.fail_msg and nresult.payload)
4165     else:
4166       live_data = None
4167
4168     if query.NQ_INST in self.requested_data:
4169       node_to_primary = dict([(name, set()) for name in nodenames])
4170       node_to_secondary = dict([(name, set()) for name in nodenames])
4171
4172       inst_data = lu.cfg.GetAllInstancesInfo()
4173
4174       for inst in inst_data.values():
4175         if inst.primary_node in node_to_primary:
4176           node_to_primary[inst.primary_node].add(inst.name)
4177         for secnode in inst.secondary_nodes:
4178           if secnode in node_to_secondary:
4179             node_to_secondary[secnode].add(inst.name)
4180     else:
4181       node_to_primary = None
4182       node_to_secondary = None
4183
4184     if query.NQ_OOB in self.requested_data:
4185       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4186                          for name, node in all_info.iteritems())
4187     else:
4188       oob_support = None
4189
4190     if query.NQ_GROUP in self.requested_data:
4191       groups = lu.cfg.GetAllNodeGroupsInfo()
4192     else:
4193       groups = {}
4194
4195     return query.NodeQueryData([all_info[name] for name in nodenames],
4196                                live_data, lu.cfg.GetMasterNode(),
4197                                node_to_primary, node_to_secondary, groups,
4198                                oob_support, lu.cfg.GetClusterInfo())
4199
4200
4201 class LUNodeQuery(NoHooksLU):
4202   """Logical unit for querying nodes.
4203
4204   """
4205   # pylint: disable-msg=W0142
4206   REQ_BGL = False
4207
4208   def CheckArguments(self):
4209     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4210                          self.op.output_fields, self.op.use_locking)
4211
4212   def ExpandNames(self):
4213     self.nq.ExpandNames(self)
4214
4215   def Exec(self, feedback_fn):
4216     return self.nq.OldStyleQuery(self)
4217
4218
4219 class LUNodeQueryvols(NoHooksLU):
4220   """Logical unit for getting volumes on node(s).
4221
4222   """
4223   REQ_BGL = False
4224   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4225   _FIELDS_STATIC = utils.FieldSet("node")
4226
4227   def CheckArguments(self):
4228     _CheckOutputFields(static=self._FIELDS_STATIC,
4229                        dynamic=self._FIELDS_DYNAMIC,
4230                        selected=self.op.output_fields)
4231
4232   def ExpandNames(self):
4233     self.needed_locks = {}
4234     self.share_locks[locking.LEVEL_NODE] = 1
4235     if not self.op.nodes:
4236       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4237     else:
4238       self.needed_locks[locking.LEVEL_NODE] = \
4239         _GetWantedNodes(self, self.op.nodes)
4240
4241   def Exec(self, feedback_fn):
4242     """Computes the list of nodes and their attributes.
4243
4244     """
4245     nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4246     volumes = self.rpc.call_node_volumes(nodenames)
4247
4248     ilist = [self.cfg.GetInstanceInfo(iname) for iname
4249              in self.cfg.GetInstanceList()]
4250
4251     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
4252
4253     output = []
4254     for node in nodenames:
4255       nresult = volumes[node]
4256       if nresult.offline:
4257         continue
4258       msg = nresult.fail_msg
4259       if msg:
4260         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4261         continue
4262
4263       node_vols = nresult.payload[:]
4264       node_vols.sort(key=lambda vol: vol['dev'])
4265
4266       for vol in node_vols:
4267         node_output = []
4268         for field in self.op.output_fields:
4269           if field == "node":
4270             val = node
4271           elif field == "phys":
4272             val = vol['dev']
4273           elif field == "vg":
4274             val = vol['vg']
4275           elif field == "name":
4276             val = vol['name']
4277           elif field == "size":
4278             val = int(float(vol['size']))
4279           elif field == "instance":
4280             for inst in ilist:
4281               if node not in lv_by_node[inst]:
4282                 continue
4283               if vol['name'] in lv_by_node[inst][node]:
4284                 val = inst.name
4285                 break
4286             else:
4287               val = '-'
4288           else:
4289             raise errors.ParameterError(field)
4290           node_output.append(str(val))
4291
4292         output.append(node_output)
4293
4294     return output
4295
4296
4297 class LUNodeQueryStorage(NoHooksLU):
4298   """Logical unit for getting information on storage units on node(s).
4299
4300   """
4301   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4302   REQ_BGL = False
4303
4304   def CheckArguments(self):
4305     _CheckOutputFields(static=self._FIELDS_STATIC,
4306                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4307                        selected=self.op.output_fields)
4308
4309   def ExpandNames(self):
4310     self.needed_locks = {}
4311     self.share_locks[locking.LEVEL_NODE] = 1
4312
4313     if self.op.nodes:
4314       self.needed_locks[locking.LEVEL_NODE] = \
4315         _GetWantedNodes(self, self.op.nodes)
4316     else:
4317       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4318
4319   def Exec(self, feedback_fn):
4320     """Computes the list of nodes and their attributes.
4321
4322     """
4323     self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4324
4325     # Always get name to sort by
4326     if constants.SF_NAME in self.op.output_fields:
4327       fields = self.op.output_fields[:]
4328     else:
4329       fields = [constants.SF_NAME] + self.op.output_fields
4330
4331     # Never ask for node or type as it's only known to the LU
4332     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4333       while extra in fields:
4334         fields.remove(extra)
4335
4336     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4337     name_idx = field_idx[constants.SF_NAME]
4338
4339     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4340     data = self.rpc.call_storage_list(self.nodes,
4341                                       self.op.storage_type, st_args,
4342                                       self.op.name, fields)
4343
4344     result = []
4345
4346     for node in utils.NiceSort(self.nodes):
4347       nresult = data[node]
4348       if nresult.offline:
4349         continue
4350
4351       msg = nresult.fail_msg
4352       if msg:
4353         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4354         continue
4355
4356       rows = dict([(row[name_idx], row) for row in nresult.payload])
4357
4358       for name in utils.NiceSort(rows.keys()):
4359         row = rows[name]
4360
4361         out = []
4362
4363         for field in self.op.output_fields:
4364           if field == constants.SF_NODE:
4365             val = node
4366           elif field == constants.SF_TYPE:
4367             val = self.op.storage_type
4368           elif field in field_idx:
4369             val = row[field_idx[field]]
4370           else:
4371             raise errors.ParameterError(field)
4372
4373           out.append(val)
4374
4375         result.append(out)
4376
4377     return result
4378
4379
4380 class _InstanceQuery(_QueryBase):
4381   FIELDS = query.INSTANCE_FIELDS
4382
4383   def ExpandNames(self, lu):
4384     lu.needed_locks = {}
4385     lu.share_locks[locking.LEVEL_INSTANCE] = 1
4386     lu.share_locks[locking.LEVEL_NODE] = 1
4387
4388     if self.names:
4389       self.wanted = _GetWantedInstances(lu, self.names)
4390     else:
4391       self.wanted = locking.ALL_SET
4392
4393     self.do_locking = (self.use_locking and
4394                        query.IQ_LIVE in self.requested_data)
4395     if self.do_locking:
4396       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4397       lu.needed_locks[locking.LEVEL_NODE] = []
4398       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4399
4400   def DeclareLocks(self, lu, level):
4401     if level == locking.LEVEL_NODE and self.do_locking:
4402       lu._LockInstancesNodes() # pylint: disable-msg=W0212
4403
4404   def _GetQueryData(self, lu):
4405     """Computes the list of instances and their attributes.
4406
4407     """
4408     cluster = lu.cfg.GetClusterInfo()
4409     all_info = lu.cfg.GetAllInstancesInfo()
4410
4411     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4412
4413     instance_list = [all_info[name] for name in instance_names]
4414     nodes = frozenset(itertools.chain(*(inst.all_nodes
4415                                         for inst in instance_list)))
4416     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4417     bad_nodes = []
4418     offline_nodes = []
4419     wrongnode_inst = set()
4420
4421     # Gather data as requested
4422     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4423       live_data = {}
4424       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4425       for name in nodes:
4426         result = node_data[name]
4427         if result.offline:
4428           # offline nodes will be in both lists
4429           assert result.fail_msg
4430           offline_nodes.append(name)
4431         if result.fail_msg:
4432           bad_nodes.append(name)
4433         elif result.payload:
4434           for inst in result.payload:
4435             if inst in all_info:
4436               if all_info[inst].primary_node == name:
4437                 live_data.update(result.payload)
4438               else:
4439                 wrongnode_inst.add(inst)
4440             else:
4441               # orphan instance; we don't list it here as we don't
4442               # handle this case yet in the output of instance listing
4443               logging.warning("Orphan instance '%s' found on node %s",
4444                               inst, name)
4445         # else no instance is alive
4446     else:
4447       live_data = {}
4448
4449     if query.IQ_DISKUSAGE in self.requested_data:
4450       disk_usage = dict((inst.name,
4451                          _ComputeDiskSize(inst.disk_template,
4452                                           [{constants.IDISK_SIZE: disk.size}
4453                                            for disk in inst.disks]))
4454                         for inst in instance_list)
4455     else:
4456       disk_usage = None
4457
4458     if query.IQ_CONSOLE in self.requested_data:
4459       consinfo = {}
4460       for inst in instance_list:
4461         if inst.name in live_data:
4462           # Instance is running
4463           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4464         else:
4465           consinfo[inst.name] = None
4466       assert set(consinfo.keys()) == set(instance_names)
4467     else:
4468       consinfo = None
4469
4470     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4471                                    disk_usage, offline_nodes, bad_nodes,
4472                                    live_data, wrongnode_inst, consinfo)
4473
4474
4475 class LUQuery(NoHooksLU):
4476   """Query for resources/items of a certain kind.
4477
4478   """
4479   # pylint: disable-msg=W0142
4480   REQ_BGL = False
4481
4482   def CheckArguments(self):
4483     qcls = _GetQueryImplementation(self.op.what)
4484
4485     self.impl = qcls(self.op.filter, self.op.fields, False)
4486
4487   def ExpandNames(self):
4488     self.impl.ExpandNames(self)
4489
4490   def DeclareLocks(self, level):
4491     self.impl.DeclareLocks(self, level)
4492
4493   def Exec(self, feedback_fn):
4494     return self.impl.NewStyleQuery(self)
4495
4496
4497 class LUQueryFields(NoHooksLU):
4498   """Query for resources/items of a certain kind.
4499
4500   """
4501   # pylint: disable-msg=W0142
4502   REQ_BGL = False
4503
4504   def CheckArguments(self):
4505     self.qcls = _GetQueryImplementation(self.op.what)
4506
4507   def ExpandNames(self):
4508     self.needed_locks = {}
4509
4510   def Exec(self, feedback_fn):
4511     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4512
4513
4514 class LUNodeModifyStorage(NoHooksLU):
4515   """Logical unit for modifying a storage volume on a node.
4516
4517   """
4518   REQ_BGL = False
4519
4520   def CheckArguments(self):
4521     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4522
4523     storage_type = self.op.storage_type
4524
4525     try:
4526       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4527     except KeyError:
4528       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4529                                  " modified" % storage_type,
4530                                  errors.ECODE_INVAL)
4531
4532     diff = set(self.op.changes.keys()) - modifiable
4533     if diff:
4534       raise errors.OpPrereqError("The following fields can not be modified for"
4535                                  " storage units of type '%s': %r" %
4536                                  (storage_type, list(diff)),
4537                                  errors.ECODE_INVAL)
4538
4539   def ExpandNames(self):
4540     self.needed_locks = {
4541       locking.LEVEL_NODE: self.op.node_name,
4542       }
4543
4544   def Exec(self, feedback_fn):
4545     """Computes the list of nodes and their attributes.
4546
4547     """
4548     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4549     result = self.rpc.call_storage_modify(self.op.node_name,
4550                                           self.op.storage_type, st_args,
4551                                           self.op.name, self.op.changes)
4552     result.Raise("Failed to modify storage unit '%s' on %s" %
4553                  (self.op.name, self.op.node_name))
4554
4555
4556 class LUNodeAdd(LogicalUnit):
4557   """Logical unit for adding node to the cluster.
4558
4559   """
4560   HPATH = "node-add"
4561   HTYPE = constants.HTYPE_NODE
4562   _NFLAGS = ["master_capable", "vm_capable"]
4563
4564   def CheckArguments(self):
4565     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4566     # validate/normalize the node name
4567     self.hostname = netutils.GetHostname(name=self.op.node_name,
4568                                          family=self.primary_ip_family)
4569     self.op.node_name = self.hostname.name
4570
4571     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4572       raise errors.OpPrereqError("Cannot readd the master node",
4573                                  errors.ECODE_STATE)
4574
4575     if self.op.readd and self.op.group:
4576       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4577                                  " being readded", errors.ECODE_INVAL)
4578
4579   def BuildHooksEnv(self):
4580     """Build hooks env.
4581
4582     This will run on all nodes before, and on all nodes + the new node after.
4583
4584     """
4585     return {
4586       "OP_TARGET": self.op.node_name,
4587       "NODE_NAME": self.op.node_name,
4588       "NODE_PIP": self.op.primary_ip,
4589       "NODE_SIP": self.op.secondary_ip,
4590       "MASTER_CAPABLE": str(self.op.master_capable),
4591       "VM_CAPABLE": str(self.op.vm_capable),
4592       }
4593
4594   def BuildHooksNodes(self):
4595     """Build hooks nodes.
4596
4597     """
4598     # Exclude added node
4599     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4600     post_nodes = pre_nodes + [self.op.node_name, ]
4601
4602     return (pre_nodes, post_nodes)
4603
4604   def CheckPrereq(self):
4605     """Check prerequisites.
4606
4607     This checks:
4608      - the new node is not already in the config
4609      - it is resolvable
4610      - its parameters (single/dual homed) matches the cluster
4611
4612     Any errors are signaled by raising errors.OpPrereqError.
4613
4614     """
4615     cfg = self.cfg
4616     hostname = self.hostname
4617     node = hostname.name
4618     primary_ip = self.op.primary_ip = hostname.ip
4619     if self.op.secondary_ip is None:
4620       if self.primary_ip_family == netutils.IP6Address.family:
4621         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4622                                    " IPv4 address must be given as secondary",
4623                                    errors.ECODE_INVAL)
4624       self.op.secondary_ip = primary_ip
4625
4626     secondary_ip = self.op.secondary_ip
4627     if not netutils.IP4Address.IsValid(secondary_ip):
4628       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4629                                  " address" % secondary_ip, errors.ECODE_INVAL)
4630
4631     node_list = cfg.GetNodeList()
4632     if not self.op.readd and node in node_list:
4633       raise errors.OpPrereqError("Node %s is already in the configuration" %
4634                                  node, errors.ECODE_EXISTS)
4635     elif self.op.readd and node not in node_list:
4636       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4637                                  errors.ECODE_NOENT)
4638
4639     self.changed_primary_ip = False
4640
4641     for existing_node_name in node_list:
4642       existing_node = cfg.GetNodeInfo(existing_node_name)
4643
4644       if self.op.readd and node == existing_node_name:
4645         if existing_node.secondary_ip != secondary_ip:
4646           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4647                                      " address configuration as before",
4648                                      errors.ECODE_INVAL)
4649         if existing_node.primary_ip != primary_ip:
4650           self.changed_primary_ip = True
4651
4652         continue
4653
4654       if (existing_node.primary_ip == primary_ip or
4655           existing_node.secondary_ip == primary_ip or
4656           existing_node.primary_ip == secondary_ip or
4657           existing_node.secondary_ip == secondary_ip):
4658         raise errors.OpPrereqError("New node ip address(es) conflict with"
4659                                    " existing node %s" % existing_node.name,
4660                                    errors.ECODE_NOTUNIQUE)
4661
4662     # After this 'if' block, None is no longer a valid value for the
4663     # _capable op attributes
4664     if self.op.readd:
4665       old_node = self.cfg.GetNodeInfo(node)
4666       assert old_node is not None, "Can't retrieve locked node %s" % node
4667       for attr in self._NFLAGS:
4668         if getattr(self.op, attr) is None:
4669           setattr(self.op, attr, getattr(old_node, attr))
4670     else:
4671       for attr in self._NFLAGS:
4672         if getattr(self.op, attr) is None:
4673           setattr(self.op, attr, True)
4674
4675     if self.op.readd and not self.op.vm_capable:
4676       pri, sec = cfg.GetNodeInstances(node)
4677       if pri or sec:
4678         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4679                                    " flag set to false, but it already holds"
4680                                    " instances" % node,
4681                                    errors.ECODE_STATE)
4682
4683     # check that the type of the node (single versus dual homed) is the
4684     # same as for the master
4685     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4686     master_singlehomed = myself.secondary_ip == myself.primary_ip
4687     newbie_singlehomed = secondary_ip == primary_ip
4688     if master_singlehomed != newbie_singlehomed:
4689       if master_singlehomed:
4690         raise errors.OpPrereqError("The master has no secondary ip but the"
4691                                    " new node has one",
4692                                    errors.ECODE_INVAL)
4693       else:
4694         raise errors.OpPrereqError("The master has a secondary ip but the"
4695                                    " new node doesn't have one",
4696                                    errors.ECODE_INVAL)
4697
4698     # checks reachability
4699     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4700       raise errors.OpPrereqError("Node not reachable by ping",
4701                                  errors.ECODE_ENVIRON)
4702
4703     if not newbie_singlehomed:
4704       # check reachability from my secondary ip to newbie's secondary ip
4705       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4706                            source=myself.secondary_ip):
4707         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4708                                    " based ping to node daemon port",
4709                                    errors.ECODE_ENVIRON)
4710
4711     if self.op.readd:
4712       exceptions = [node]
4713     else:
4714       exceptions = []
4715
4716     if self.op.master_capable:
4717       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4718     else:
4719       self.master_candidate = False
4720
4721     if self.op.readd:
4722       self.new_node = old_node
4723     else:
4724       node_group = cfg.LookupNodeGroup(self.op.group)
4725       self.new_node = objects.Node(name=node,
4726                                    primary_ip=primary_ip,
4727                                    secondary_ip=secondary_ip,
4728                                    master_candidate=self.master_candidate,
4729                                    offline=False, drained=False,
4730                                    group=node_group)
4731
4732     if self.op.ndparams:
4733       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4734
4735   def Exec(self, feedback_fn):
4736     """Adds the new node to the cluster.
4737
4738     """
4739     new_node = self.new_node
4740     node = new_node.name
4741
4742     # We adding a new node so we assume it's powered
4743     new_node.powered = True
4744
4745     # for re-adds, reset the offline/drained/master-candidate flags;
4746     # we need to reset here, otherwise offline would prevent RPC calls
4747     # later in the procedure; this also means that if the re-add
4748     # fails, we are left with a non-offlined, broken node
4749     if self.op.readd:
4750       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4751       self.LogInfo("Readding a node, the offline/drained flags were reset")
4752       # if we demote the node, we do cleanup later in the procedure
4753       new_node.master_candidate = self.master_candidate
4754       if self.changed_primary_ip:
4755         new_node.primary_ip = self.op.primary_ip
4756
4757     # copy the master/vm_capable flags
4758     for attr in self._NFLAGS:
4759       setattr(new_node, attr, getattr(self.op, attr))
4760
4761     # notify the user about any possible mc promotion
4762     if new_node.master_candidate:
4763       self.LogInfo("Node will be a master candidate")
4764
4765     if self.op.ndparams:
4766       new_node.ndparams = self.op.ndparams
4767     else:
4768       new_node.ndparams = {}
4769
4770     # check connectivity
4771     result = self.rpc.call_version([node])[node]
4772     result.Raise("Can't get version information from node %s" % node)
4773     if constants.PROTOCOL_VERSION == result.payload:
4774       logging.info("Communication to node %s fine, sw version %s match",
4775                    node, result.payload)
4776     else:
4777       raise errors.OpExecError("Version mismatch master version %s,"
4778                                " node version %s" %
4779                                (constants.PROTOCOL_VERSION, result.payload))
4780
4781     # Add node to our /etc/hosts, and add key to known_hosts
4782     if self.cfg.GetClusterInfo().modify_etc_hosts:
4783       master_node = self.cfg.GetMasterNode()
4784       result = self.rpc.call_etc_hosts_modify(master_node,
4785                                               constants.ETC_HOSTS_ADD,
4786                                               self.hostname.name,
4787                                               self.hostname.ip)
4788       result.Raise("Can't update hosts file with new host data")
4789
4790     if new_node.secondary_ip != new_node.primary_ip:
4791       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4792                                False)
4793
4794     node_verify_list = [self.cfg.GetMasterNode()]
4795     node_verify_param = {
4796       constants.NV_NODELIST: [node],
4797       # TODO: do a node-net-test as well?
4798     }
4799
4800     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4801                                        self.cfg.GetClusterName())
4802     for verifier in node_verify_list:
4803       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4804       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4805       if nl_payload:
4806         for failed in nl_payload:
4807           feedback_fn("ssh/hostname verification failed"
4808                       " (checking from %s): %s" %
4809                       (verifier, nl_payload[failed]))
4810         raise errors.OpExecError("ssh/hostname verification failed")
4811
4812     if self.op.readd:
4813       _RedistributeAncillaryFiles(self)
4814       self.context.ReaddNode(new_node)
4815       # make sure we redistribute the config
4816       self.cfg.Update(new_node, feedback_fn)
4817       # and make sure the new node will not have old files around
4818       if not new_node.master_candidate:
4819         result = self.rpc.call_node_demote_from_mc(new_node.name)
4820         msg = result.fail_msg
4821         if msg:
4822           self.LogWarning("Node failed to demote itself from master"
4823                           " candidate status: %s" % msg)
4824     else:
4825       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4826                                   additional_vm=self.op.vm_capable)
4827       self.context.AddNode(new_node, self.proc.GetECId())
4828
4829
4830 class LUNodeSetParams(LogicalUnit):
4831   """Modifies the parameters of a node.
4832
4833   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4834       to the node role (as _ROLE_*)
4835   @cvar _R2F: a dictionary from node role to tuples of flags
4836   @cvar _FLAGS: a list of attribute names corresponding to the flags
4837
4838   """
4839   HPATH = "node-modify"
4840   HTYPE = constants.HTYPE_NODE
4841   REQ_BGL = False
4842   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4843   _F2R = {
4844     (True, False, False): _ROLE_CANDIDATE,
4845     (False, True, False): _ROLE_DRAINED,
4846     (False, False, True): _ROLE_OFFLINE,
4847     (False, False, False): _ROLE_REGULAR,
4848     }
4849   _R2F = dict((v, k) for k, v in _F2R.items())
4850   _FLAGS = ["master_candidate", "drained", "offline"]
4851
4852   def CheckArguments(self):
4853     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4854     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4855                 self.op.master_capable, self.op.vm_capable,
4856                 self.op.secondary_ip, self.op.ndparams]
4857     if all_mods.count(None) == len(all_mods):
4858       raise errors.OpPrereqError("Please pass at least one modification",
4859                                  errors.ECODE_INVAL)
4860     if all_mods.count(True) > 1:
4861       raise errors.OpPrereqError("Can't set the node into more than one"
4862                                  " state at the same time",
4863                                  errors.ECODE_INVAL)
4864
4865     # Boolean value that tells us whether we might be demoting from MC
4866     self.might_demote = (self.op.master_candidate == False or
4867                          self.op.offline == True or
4868                          self.op.drained == True or
4869                          self.op.master_capable == False)
4870
4871     if self.op.secondary_ip:
4872       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4873         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4874                                    " address" % self.op.secondary_ip,
4875                                    errors.ECODE_INVAL)
4876
4877     self.lock_all = self.op.auto_promote and self.might_demote
4878     self.lock_instances = self.op.secondary_ip is not None
4879
4880   def ExpandNames(self):
4881     if self.lock_all:
4882       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4883     else:
4884       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4885
4886     if self.lock_instances:
4887       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4888
4889   def DeclareLocks(self, level):
4890     # If we have locked all instances, before waiting to lock nodes, release
4891     # all the ones living on nodes unrelated to the current operation.
4892     if level == locking.LEVEL_NODE and self.lock_instances:
4893       self.affected_instances = []
4894       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4895         instances_keep = []
4896
4897         # Build list of instances to release
4898         for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4899           instance = self.context.cfg.GetInstanceInfo(instance_name)
4900           if (instance.disk_template in constants.DTS_INT_MIRROR and
4901               self.op.node_name in instance.all_nodes):
4902             instances_keep.append(instance_name)
4903             self.affected_instances.append(instance)
4904
4905         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4906
4907         assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4908                 set(instances_keep))
4909
4910   def BuildHooksEnv(self):
4911     """Build hooks env.
4912
4913     This runs on the master node.
4914
4915     """
4916     return {
4917       "OP_TARGET": self.op.node_name,
4918       "MASTER_CANDIDATE": str(self.op.master_candidate),
4919       "OFFLINE": str(self.op.offline),
4920       "DRAINED": str(self.op.drained),
4921       "MASTER_CAPABLE": str(self.op.master_capable),
4922       "VM_CAPABLE": str(self.op.vm_capable),
4923       }
4924
4925   def BuildHooksNodes(self):
4926     """Build hooks nodes.
4927
4928     """
4929     nl = [self.cfg.GetMasterNode(), self.op.node_name]
4930     return (nl, nl)
4931
4932   def CheckPrereq(self):
4933     """Check prerequisites.
4934
4935     This only checks the instance list against the existing names.
4936
4937     """
4938     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4939
4940     if (self.op.master_candidate is not None or
4941         self.op.drained is not None or
4942         self.op.offline is not None):
4943       # we can't change the master's node flags
4944       if self.op.node_name == self.cfg.GetMasterNode():
4945         raise errors.OpPrereqError("The master role can be changed"
4946                                    " only via master-failover",
4947                                    errors.ECODE_INVAL)
4948
4949     if self.op.master_candidate and not node.master_capable:
4950       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4951                                  " it a master candidate" % node.name,
4952                                  errors.ECODE_STATE)
4953
4954     if self.op.vm_capable == False:
4955       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4956       if ipri or isec:
4957         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4958                                    " the vm_capable flag" % node.name,
4959                                    errors.ECODE_STATE)
4960
4961     if node.master_candidate and self.might_demote and not self.lock_all:
4962       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4963       # check if after removing the current node, we're missing master
4964       # candidates
4965       (mc_remaining, mc_should, _) = \
4966           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4967       if mc_remaining < mc_should:
4968         raise errors.OpPrereqError("Not enough master candidates, please"
4969                                    " pass auto promote option to allow"
4970                                    " promotion", errors.ECODE_STATE)
4971
4972     self.old_flags = old_flags = (node.master_candidate,
4973                                   node.drained, node.offline)
4974     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4975     self.old_role = old_role = self._F2R[old_flags]
4976
4977     # Check for ineffective changes
4978     for attr in self._FLAGS:
4979       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4980         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4981         setattr(self.op, attr, None)
4982
4983     # Past this point, any flag change to False means a transition
4984     # away from the respective state, as only real changes are kept
4985
4986     # TODO: We might query the real power state if it supports OOB
4987     if _SupportsOob(self.cfg, node):
4988       if self.op.offline is False and not (node.powered or
4989                                            self.op.powered == True):
4990         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4991                                     " offline status can be reset") %
4992                                    self.op.node_name)
4993     elif self.op.powered is not None:
4994       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4995                                   " as it does not support out-of-band"
4996                                   " handling") % self.op.node_name)
4997
4998     # If we're being deofflined/drained, we'll MC ourself if needed
4999     if (self.op.drained == False or self.op.offline == False or
5000         (self.op.master_capable and not node.master_capable)):
5001       if _DecideSelfPromotion(self):
5002         self.op.master_candidate = True
5003         self.LogInfo("Auto-promoting node to master candidate")
5004
5005     # If we're no longer master capable, we'll demote ourselves from MC
5006     if self.op.master_capable == False and node.master_candidate:
5007       self.LogInfo("Demoting from master candidate")
5008       self.op.master_candidate = False
5009
5010     # Compute new role
5011     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5012     if self.op.master_candidate:
5013       new_role = self._ROLE_CANDIDATE
5014     elif self.op.drained:
5015       new_role = self._ROLE_DRAINED
5016     elif self.op.offline:
5017       new_role = self._ROLE_OFFLINE
5018     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5019       # False is still in new flags, which means we're un-setting (the
5020       # only) True flag
5021       new_role = self._ROLE_REGULAR
5022     else: # no new flags, nothing, keep old role
5023       new_role = old_role
5024
5025     self.new_role = new_role
5026
5027     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5028       # Trying to transition out of offline status
5029       result = self.rpc.call_version([node.name])[node.name]
5030       if result.fail_msg:
5031         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5032                                    " to report its version: %s" %
5033                                    (node.name, result.fail_msg),
5034                                    errors.ECODE_STATE)
5035       else:
5036         self.LogWarning("Transitioning node from offline to online state"
5037                         " without using re-add. Please make sure the node"
5038                         " is healthy!")
5039
5040     if self.op.secondary_ip:
5041       # Ok even without locking, because this can't be changed by any LU
5042       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5043       master_singlehomed = master.secondary_ip == master.primary_ip
5044       if master_singlehomed and self.op.secondary_ip:
5045         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5046                                    " homed cluster", errors.ECODE_INVAL)
5047
5048       if node.offline:
5049         if self.affected_instances:
5050           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5051                                      " node has instances (%s) configured"
5052                                      " to use it" % self.affected_instances)
5053       else:
5054         # On online nodes, check that no instances are running, and that
5055         # the node has the new ip and we can reach it.
5056         for instance in self.affected_instances:
5057           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5058
5059         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5060         if master.name != node.name:
5061           # check reachability from master secondary ip to new secondary ip
5062           if not netutils.TcpPing(self.op.secondary_ip,
5063                                   constants.DEFAULT_NODED_PORT,
5064                                   source=master.secondary_ip):
5065             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5066                                        " based ping to node daemon port",
5067                                        errors.ECODE_ENVIRON)
5068
5069     if self.op.ndparams:
5070       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5071       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5072       self.new_ndparams = new_ndparams
5073
5074   def Exec(self, feedback_fn):
5075     """Modifies a node.
5076
5077     """
5078     node = self.node
5079     old_role = self.old_role
5080     new_role = self.new_role
5081
5082     result = []
5083
5084     if self.op.ndparams:
5085       node.ndparams = self.new_ndparams
5086
5087     if self.op.powered is not None:
5088       node.powered = self.op.powered
5089
5090     for attr in ["master_capable", "vm_capable"]:
5091       val = getattr(self.op, attr)
5092       if val is not None:
5093         setattr(node, attr, val)
5094         result.append((attr, str(val)))
5095
5096     if new_role != old_role:
5097       # Tell the node to demote itself, if no longer MC and not offline
5098       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5099         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5100         if msg:
5101           self.LogWarning("Node failed to demote itself: %s", msg)
5102
5103       new_flags = self._R2F[new_role]
5104       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5105         if of != nf:
5106           result.append((desc, str(nf)))
5107       (node.master_candidate, node.drained, node.offline) = new_flags
5108
5109       # we locked all nodes, we adjust the CP before updating this node
5110       if self.lock_all:
5111         _AdjustCandidatePool(self, [node.name])
5112
5113     if self.op.secondary_ip:
5114       node.secondary_ip = self.op.secondary_ip
5115       result.append(("secondary_ip", self.op.secondary_ip))
5116
5117     # this will trigger configuration file update, if needed
5118     self.cfg.Update(node, feedback_fn)
5119
5120     # this will trigger job queue propagation or cleanup if the mc
5121     # flag changed
5122     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5123       self.context.ReaddNode(node)
5124
5125     return result
5126
5127
5128 class LUNodePowercycle(NoHooksLU):
5129   """Powercycles a node.
5130
5131   """
5132   REQ_BGL = False
5133
5134   def CheckArguments(self):
5135     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5136     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5137       raise errors.OpPrereqError("The node is the master and the force"
5138                                  " parameter was not set",
5139                                  errors.ECODE_INVAL)
5140
5141   def ExpandNames(self):
5142     """Locking for PowercycleNode.
5143
5144     This is a last-resort option and shouldn't block on other
5145     jobs. Therefore, we grab no locks.
5146
5147     """
5148     self.needed_locks = {}
5149
5150   def Exec(self, feedback_fn):
5151     """Reboots a node.
5152
5153     """
5154     result = self.rpc.call_node_powercycle(self.op.node_name,
5155                                            self.cfg.GetHypervisorType())
5156     result.Raise("Failed to schedule the reboot")
5157     return result.payload
5158
5159
5160 class LUClusterQuery(NoHooksLU):
5161   """Query cluster configuration.
5162
5163   """
5164   REQ_BGL = False
5165
5166   def ExpandNames(self):
5167     self.needed_locks = {}
5168
5169   def Exec(self, feedback_fn):
5170     """Return cluster config.
5171
5172     """
5173     cluster = self.cfg.GetClusterInfo()
5174     os_hvp = {}
5175
5176     # Filter just for enabled hypervisors
5177     for os_name, hv_dict in cluster.os_hvp.items():
5178       os_hvp[os_name] = {}
5179       for hv_name, hv_params in hv_dict.items():
5180         if hv_name in cluster.enabled_hypervisors:
5181           os_hvp[os_name][hv_name] = hv_params
5182
5183     # Convert ip_family to ip_version
5184     primary_ip_version = constants.IP4_VERSION
5185     if cluster.primary_ip_family == netutils.IP6Address.family:
5186       primary_ip_version = constants.IP6_VERSION
5187
5188     result = {
5189       "software_version": constants.RELEASE_VERSION,
5190       "protocol_version": constants.PROTOCOL_VERSION,
5191       "config_version": constants.CONFIG_VERSION,
5192       "os_api_version": max(constants.OS_API_VERSIONS),
5193       "export_version": constants.EXPORT_VERSION,
5194       "architecture": (platform.architecture()[0], platform.machine()),
5195       "name": cluster.cluster_name,
5196       "master": cluster.master_node,
5197       "default_hypervisor": cluster.enabled_hypervisors[0],
5198       "enabled_hypervisors": cluster.enabled_hypervisors,
5199       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5200                         for hypervisor_name in cluster.enabled_hypervisors]),
5201       "os_hvp": os_hvp,
5202       "beparams": cluster.beparams,
5203       "osparams": cluster.osparams,
5204       "nicparams": cluster.nicparams,
5205       "ndparams": cluster.ndparams,
5206       "candidate_pool_size": cluster.candidate_pool_size,
5207       "master_netdev": cluster.master_netdev,
5208       "volume_group_name": cluster.volume_group_name,
5209       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5210       "file_storage_dir": cluster.file_storage_dir,
5211       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5212       "maintain_node_health": cluster.maintain_node_health,
5213       "ctime": cluster.ctime,
5214       "mtime": cluster.mtime,
5215       "uuid": cluster.uuid,
5216       "tags": list(cluster.GetTags()),
5217       "uid_pool": cluster.uid_pool,
5218       "default_iallocator": cluster.default_iallocator,
5219       "reserved_lvs": cluster.reserved_lvs,
5220       "primary_ip_version": primary_ip_version,
5221       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5222       "hidden_os": cluster.hidden_os,
5223       "blacklisted_os": cluster.blacklisted_os,
5224       }
5225
5226     return result
5227
5228
5229 class LUClusterConfigQuery(NoHooksLU):
5230   """Return configuration values.
5231
5232   """
5233   REQ_BGL = False
5234   _FIELDS_DYNAMIC = utils.FieldSet()
5235   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5236                                   "watcher_pause", "volume_group_name")
5237
5238   def CheckArguments(self):
5239     _CheckOutputFields(static=self._FIELDS_STATIC,
5240                        dynamic=self._FIELDS_DYNAMIC,
5241                        selected=self.op.output_fields)
5242
5243   def ExpandNames(self):
5244     self.needed_locks = {}
5245
5246   def Exec(self, feedback_fn):
5247     """Dump a representation of the cluster config to the standard output.
5248
5249     """
5250     values = []
5251     for field in self.op.output_fields:
5252       if field == "cluster_name":
5253         entry = self.cfg.GetClusterName()
5254       elif field == "master_node":
5255         entry = self.cfg.GetMasterNode()
5256       elif field == "drain_flag":
5257         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5258       elif field == "watcher_pause":
5259         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5260       elif field == "volume_group_name":
5261         entry = self.cfg.GetVGName()
5262       else:
5263         raise errors.ParameterError(field)
5264       values.append(entry)
5265     return values
5266
5267
5268 class LUInstanceActivateDisks(NoHooksLU):
5269   """Bring up an instance's disks.
5270
5271   """
5272   REQ_BGL = False
5273
5274   def ExpandNames(self):
5275     self._ExpandAndLockInstance()
5276     self.needed_locks[locking.LEVEL_NODE] = []
5277     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5278
5279   def DeclareLocks(self, level):
5280     if level == locking.LEVEL_NODE:
5281       self._LockInstancesNodes()
5282
5283   def CheckPrereq(self):
5284     """Check prerequisites.
5285
5286     This checks that the instance is in the cluster.
5287
5288     """
5289     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5290     assert self.instance is not None, \
5291       "Cannot retrieve locked instance %s" % self.op.instance_name
5292     _CheckNodeOnline(self, self.instance.primary_node)
5293
5294   def Exec(self, feedback_fn):
5295     """Activate the disks.
5296
5297     """
5298     disks_ok, disks_info = \
5299               _AssembleInstanceDisks(self, self.instance,
5300                                      ignore_size=self.op.ignore_size)
5301     if not disks_ok:
5302       raise errors.OpExecError("Cannot activate block devices")
5303
5304     return disks_info
5305
5306
5307 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5308                            ignore_size=False):
5309   """Prepare the block devices for an instance.
5310
5311   This sets up the block devices on all nodes.
5312
5313   @type lu: L{LogicalUnit}
5314   @param lu: the logical unit on whose behalf we execute
5315   @type instance: L{objects.Instance}
5316   @param instance: the instance for whose disks we assemble
5317   @type disks: list of L{objects.Disk} or None
5318   @param disks: which disks to assemble (or all, if None)
5319   @type ignore_secondaries: boolean
5320   @param ignore_secondaries: if true, errors on secondary nodes
5321       won't result in an error return from the function
5322   @type ignore_size: boolean
5323   @param ignore_size: if true, the current known size of the disk
5324       will not be used during the disk activation, useful for cases
5325       when the size is wrong
5326   @return: False if the operation failed, otherwise a list of
5327       (host, instance_visible_name, node_visible_name)
5328       with the mapping from node devices to instance devices
5329
5330   """
5331   device_info = []
5332   disks_ok = True
5333   iname = instance.name
5334   disks = _ExpandCheckDisks(instance, disks)
5335
5336   # With the two passes mechanism we try to reduce the window of
5337   # opportunity for the race condition of switching DRBD to primary
5338   # before handshaking occured, but we do not eliminate it
5339
5340   # The proper fix would be to wait (with some limits) until the
5341   # connection has been made and drbd transitions from WFConnection
5342   # into any other network-connected state (Connected, SyncTarget,
5343   # SyncSource, etc.)
5344
5345   # 1st pass, assemble on all nodes in secondary mode
5346   for idx, inst_disk in enumerate(disks):
5347     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5348       if ignore_size:
5349         node_disk = node_disk.Copy()
5350         node_disk.UnsetSize()
5351       lu.cfg.SetDiskID(node_disk, node)
5352       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5353       msg = result.fail_msg
5354       if msg:
5355         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5356                            " (is_primary=False, pass=1): %s",
5357                            inst_disk.iv_name, node, msg)
5358         if not ignore_secondaries:
5359           disks_ok = False
5360
5361   # FIXME: race condition on drbd migration to primary
5362
5363   # 2nd pass, do only the primary node
5364   for idx, inst_disk in enumerate(disks):
5365     dev_path = None
5366
5367     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5368       if node != instance.primary_node:
5369         continue
5370       if ignore_size:
5371         node_disk = node_disk.Copy()
5372         node_disk.UnsetSize()
5373       lu.cfg.SetDiskID(node_disk, node)
5374       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5375       msg = result.fail_msg
5376       if msg:
5377         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5378                            " (is_primary=True, pass=2): %s",
5379                            inst_disk.iv_name, node, msg)
5380         disks_ok = False
5381       else:
5382         dev_path = result.payload
5383
5384     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5385
5386   # leave the disks configured for the primary node
5387   # this is a workaround that would be fixed better by
5388   # improving the logical/physical id handling
5389   for disk in disks:
5390     lu.cfg.SetDiskID(disk, instance.primary_node)
5391
5392   return disks_ok, device_info
5393
5394
5395 def _StartInstanceDisks(lu, instance, force):
5396   """Start the disks of an instance.
5397
5398   """
5399   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5400                                            ignore_secondaries=force)
5401   if not disks_ok:
5402     _ShutdownInstanceDisks(lu, instance)
5403     if force is not None and not force:
5404       lu.proc.LogWarning("", hint="If the message above refers to a"
5405                          " secondary node,"
5406                          " you can retry the operation using '--force'.")
5407     raise errors.OpExecError("Disk consistency error")
5408
5409
5410 class LUInstanceDeactivateDisks(NoHooksLU):
5411   """Shutdown an instance's disks.
5412
5413   """
5414   REQ_BGL = False
5415
5416   def ExpandNames(self):
5417     self._ExpandAndLockInstance()
5418     self.needed_locks[locking.LEVEL_NODE] = []
5419     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5420
5421   def DeclareLocks(self, level):
5422     if level == locking.LEVEL_NODE:
5423       self._LockInstancesNodes()
5424
5425   def CheckPrereq(self):
5426     """Check prerequisites.
5427
5428     This checks that the instance is in the cluster.
5429
5430     """
5431     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5432     assert self.instance is not None, \
5433       "Cannot retrieve locked instance %s" % self.op.instance_name
5434
5435   def Exec(self, feedback_fn):
5436     """Deactivate the disks
5437
5438     """
5439     instance = self.instance
5440     if self.op.force:
5441       _ShutdownInstanceDisks(self, instance)
5442     else:
5443       _SafeShutdownInstanceDisks(self, instance)
5444
5445
5446 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5447   """Shutdown block devices of an instance.
5448
5449   This function checks if an instance is running, before calling
5450   _ShutdownInstanceDisks.
5451
5452   """
5453   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5454   _ShutdownInstanceDisks(lu, instance, disks=disks)
5455
5456
5457 def _ExpandCheckDisks(instance, disks):
5458   """Return the instance disks selected by the disks list
5459
5460   @type disks: list of L{objects.Disk} or None
5461   @param disks: selected disks
5462   @rtype: list of L{objects.Disk}
5463   @return: selected instance disks to act on
5464
5465   """
5466   if disks is None:
5467     return instance.disks
5468   else:
5469     if not set(disks).issubset(instance.disks):
5470       raise errors.ProgrammerError("Can only act on disks belonging to the"
5471                                    " target instance")
5472     return disks
5473
5474
5475 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5476   """Shutdown block devices of an instance.
5477
5478   This does the shutdown on all nodes of the instance.
5479
5480   If the ignore_primary is false, errors on the primary node are
5481   ignored.
5482
5483   """
5484   all_result = True
5485   disks = _ExpandCheckDisks(instance, disks)
5486
5487   for disk in disks:
5488     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5489       lu.cfg.SetDiskID(top_disk, node)
5490       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5491       msg = result.fail_msg
5492       if msg:
5493         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5494                       disk.iv_name, node, msg)
5495         if ((node == instance.primary_node and not ignore_primary) or
5496             (node != instance.primary_node and not result.offline)):
5497           all_result = False
5498   return all_result
5499
5500
5501 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5502   """Checks if a node has enough free memory.
5503
5504   This function check if a given node has the needed amount of free
5505   memory. In case the node has less memory or we cannot get the
5506   information from the node, this function raise an OpPrereqError
5507   exception.
5508
5509   @type lu: C{LogicalUnit}
5510   @param lu: a logical unit from which we get configuration data
5511   @type node: C{str}
5512   @param node: the node to check
5513   @type reason: C{str}
5514   @param reason: string to use in the error message
5515   @type requested: C{int}
5516   @param requested: the amount of memory in MiB to check for
5517   @type hypervisor_name: C{str}
5518   @param hypervisor_name: the hypervisor to ask for memory stats
5519   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5520       we cannot check the node
5521
5522   """
5523   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5524   nodeinfo[node].Raise("Can't get data from node %s" % node,
5525                        prereq=True, ecode=errors.ECODE_ENVIRON)
5526   free_mem = nodeinfo[node].payload.get('memory_free', None)
5527   if not isinstance(free_mem, int):
5528     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5529                                " was '%s'" % (node, free_mem),
5530                                errors.ECODE_ENVIRON)
5531   if requested > free_mem:
5532     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5533                                " needed %s MiB, available %s MiB" %
5534                                (node, reason, requested, free_mem),
5535                                errors.ECODE_NORES)
5536
5537
5538 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5539   """Checks if nodes have enough free disk space in the all VGs.
5540
5541   This function check if all given nodes have the needed amount of
5542   free disk. In case any node has less disk or we cannot get the
5543   information from the node, this function raise an OpPrereqError
5544   exception.
5545
5546   @type lu: C{LogicalUnit}
5547   @param lu: a logical unit from which we get configuration data
5548   @type nodenames: C{list}
5549   @param nodenames: the list of node names to check
5550   @type req_sizes: C{dict}
5551   @param req_sizes: the hash of vg and corresponding amount of disk in
5552       MiB to check for
5553   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5554       or we cannot check the node
5555
5556   """
5557   for vg, req_size in req_sizes.items():
5558     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5559
5560
5561 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5562   """Checks if nodes have enough free disk space in the specified VG.
5563
5564   This function check if all given nodes have the needed amount of
5565   free disk. In case any node has less disk or we cannot get the
5566   information from the node, this function raise an OpPrereqError
5567   exception.
5568
5569   @type lu: C{LogicalUnit}
5570   @param lu: a logical unit from which we get configuration data
5571   @type nodenames: C{list}
5572   @param nodenames: the list of node names to check
5573   @type vg: C{str}
5574   @param vg: the volume group to check
5575   @type requested: C{int}
5576   @param requested: the amount of disk in MiB to check for
5577   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5578       or we cannot check the node
5579
5580   """
5581   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5582   for node in nodenames:
5583     info = nodeinfo[node]
5584     info.Raise("Cannot get current information from node %s" % node,
5585                prereq=True, ecode=errors.ECODE_ENVIRON)
5586     vg_free = info.payload.get("vg_free", None)
5587     if not isinstance(vg_free, int):
5588       raise errors.OpPrereqError("Can't compute free disk space on node"
5589                                  " %s for vg %s, result was '%s'" %
5590                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5591     if requested > vg_free:
5592       raise errors.OpPrereqError("Not enough disk space on target node %s"
5593                                  " vg %s: required %d MiB, available %d MiB" %
5594                                  (node, vg, requested, vg_free),
5595                                  errors.ECODE_NORES)
5596
5597
5598 class LUInstanceStartup(LogicalUnit):
5599   """Starts an instance.
5600
5601   """
5602   HPATH = "instance-start"
5603   HTYPE = constants.HTYPE_INSTANCE
5604   REQ_BGL = False
5605
5606   def CheckArguments(self):
5607     # extra beparams
5608     if self.op.beparams:
5609       # fill the beparams dict
5610       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5611
5612   def ExpandNames(self):
5613     self._ExpandAndLockInstance()
5614
5615   def BuildHooksEnv(self):
5616     """Build hooks env.
5617
5618     This runs on master, primary and secondary nodes of the instance.
5619
5620     """
5621     env = {
5622       "FORCE": self.op.force,
5623       }
5624
5625     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5626
5627     return env
5628
5629   def BuildHooksNodes(self):
5630     """Build hooks nodes.
5631
5632     """
5633     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5634     return (nl, nl)
5635
5636   def CheckPrereq(self):
5637     """Check prerequisites.
5638
5639     This checks that the instance is in the cluster.
5640
5641     """
5642     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5643     assert self.instance is not None, \
5644       "Cannot retrieve locked instance %s" % self.op.instance_name
5645
5646     # extra hvparams
5647     if self.op.hvparams:
5648       # check hypervisor parameter syntax (locally)
5649       cluster = self.cfg.GetClusterInfo()
5650       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5651       filled_hvp = cluster.FillHV(instance)
5652       filled_hvp.update(self.op.hvparams)
5653       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5654       hv_type.CheckParameterSyntax(filled_hvp)
5655       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5656
5657     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5658
5659     if self.primary_offline and self.op.ignore_offline_nodes:
5660       self.proc.LogWarning("Ignoring offline primary node")
5661
5662       if self.op.hvparams or self.op.beparams:
5663         self.proc.LogWarning("Overridden parameters are ignored")
5664     else:
5665       _CheckNodeOnline(self, instance.primary_node)
5666
5667       bep = self.cfg.GetClusterInfo().FillBE(instance)
5668
5669       # check bridges existence
5670       _CheckInstanceBridgesExist(self, instance)
5671
5672       remote_info = self.rpc.call_instance_info(instance.primary_node,
5673                                                 instance.name,
5674                                                 instance.hypervisor)
5675       remote_info.Raise("Error checking node %s" % instance.primary_node,
5676                         prereq=True, ecode=errors.ECODE_ENVIRON)
5677       if not remote_info.payload: # not running already
5678         _CheckNodeFreeMemory(self, instance.primary_node,
5679                              "starting instance %s" % instance.name,
5680                              bep[constants.BE_MEMORY], instance.hypervisor)
5681
5682   def Exec(self, feedback_fn):
5683     """Start the instance.
5684
5685     """
5686     instance = self.instance
5687     force = self.op.force
5688
5689     if not self.op.no_remember:
5690       self.cfg.MarkInstanceUp(instance.name)
5691
5692     if self.primary_offline:
5693       assert self.op.ignore_offline_nodes
5694       self.proc.LogInfo("Primary node offline, marked instance as started")
5695     else:
5696       node_current = instance.primary_node
5697
5698       _StartInstanceDisks(self, instance, force)
5699
5700       result = self.rpc.call_instance_start(node_current, instance,
5701                                             self.op.hvparams, self.op.beparams)
5702       msg = result.fail_msg
5703       if msg:
5704         _ShutdownInstanceDisks(self, instance)
5705         raise errors.OpExecError("Could not start instance: %s" % msg)
5706
5707
5708 class LUInstanceReboot(LogicalUnit):
5709   """Reboot an instance.
5710
5711   """
5712   HPATH = "instance-reboot"
5713   HTYPE = constants.HTYPE_INSTANCE
5714   REQ_BGL = False
5715
5716   def ExpandNames(self):
5717     self._ExpandAndLockInstance()
5718
5719   def BuildHooksEnv(self):
5720     """Build hooks env.
5721
5722     This runs on master, primary and secondary nodes of the instance.
5723
5724     """
5725     env = {
5726       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5727       "REBOOT_TYPE": self.op.reboot_type,
5728       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5729       }
5730
5731     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5732
5733     return env
5734
5735   def BuildHooksNodes(self):
5736     """Build hooks nodes.
5737
5738     """
5739     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5740     return (nl, nl)
5741
5742   def CheckPrereq(self):
5743     """Check prerequisites.
5744
5745     This checks that the instance is in the cluster.
5746
5747     """
5748     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5749     assert self.instance is not None, \
5750       "Cannot retrieve locked instance %s" % self.op.instance_name
5751
5752     _CheckNodeOnline(self, instance.primary_node)
5753
5754     # check bridges existence
5755     _CheckInstanceBridgesExist(self, instance)
5756
5757   def Exec(self, feedback_fn):
5758     """Reboot the instance.
5759
5760     """
5761     instance = self.instance
5762     ignore_secondaries = self.op.ignore_secondaries
5763     reboot_type = self.op.reboot_type
5764
5765     remote_info = self.rpc.call_instance_info(instance.primary_node,
5766                                               instance.name,
5767                                               instance.hypervisor)
5768     remote_info.Raise("Error checking node %s" % instance.primary_node)
5769     instance_running = bool(remote_info.payload)
5770
5771     node_current = instance.primary_node
5772
5773     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5774                                             constants.INSTANCE_REBOOT_HARD]:
5775       for disk in instance.disks:
5776         self.cfg.SetDiskID(disk, node_current)
5777       result = self.rpc.call_instance_reboot(node_current, instance,
5778                                              reboot_type,
5779                                              self.op.shutdown_timeout)
5780       result.Raise("Could not reboot instance")
5781     else:
5782       if instance_running:
5783         result = self.rpc.call_instance_shutdown(node_current, instance,
5784                                                  self.op.shutdown_timeout)
5785         result.Raise("Could not shutdown instance for full reboot")
5786         _ShutdownInstanceDisks(self, instance)
5787       else:
5788         self.LogInfo("Instance %s was already stopped, starting now",
5789                      instance.name)
5790       _StartInstanceDisks(self, instance, ignore_secondaries)
5791       result = self.rpc.call_instance_start(node_current, instance, None, None)
5792       msg = result.fail_msg
5793       if msg:
5794         _ShutdownInstanceDisks(self, instance)
5795         raise errors.OpExecError("Could not start instance for"
5796                                  " full reboot: %s" % msg)
5797
5798     self.cfg.MarkInstanceUp(instance.name)
5799
5800
5801 class LUInstanceShutdown(LogicalUnit):
5802   """Shutdown an instance.
5803
5804   """
5805   HPATH = "instance-stop"
5806   HTYPE = constants.HTYPE_INSTANCE
5807   REQ_BGL = False
5808
5809   def ExpandNames(self):
5810     self._ExpandAndLockInstance()
5811
5812   def BuildHooksEnv(self):
5813     """Build hooks env.
5814
5815     This runs on master, primary and secondary nodes of the instance.
5816
5817     """
5818     env = _BuildInstanceHookEnvByObject(self, self.instance)
5819     env["TIMEOUT"] = self.op.timeout
5820     return env
5821
5822   def BuildHooksNodes(self):
5823     """Build hooks nodes.
5824
5825     """
5826     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5827     return (nl, nl)
5828
5829   def CheckPrereq(self):
5830     """Check prerequisites.
5831
5832     This checks that the instance is in the cluster.
5833
5834     """
5835     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5836     assert self.instance is not None, \
5837       "Cannot retrieve locked instance %s" % self.op.instance_name
5838
5839     self.primary_offline = \
5840       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5841
5842     if self.primary_offline and self.op.ignore_offline_nodes:
5843       self.proc.LogWarning("Ignoring offline primary node")
5844     else:
5845       _CheckNodeOnline(self, self.instance.primary_node)
5846
5847   def Exec(self, feedback_fn):
5848     """Shutdown the instance.
5849
5850     """
5851     instance = self.instance
5852     node_current = instance.primary_node
5853     timeout = self.op.timeout
5854
5855     if not self.op.no_remember:
5856       self.cfg.MarkInstanceDown(instance.name)
5857
5858     if self.primary_offline:
5859       assert self.op.ignore_offline_nodes
5860       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5861     else:
5862       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5863       msg = result.fail_msg
5864       if msg:
5865         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5866
5867       _ShutdownInstanceDisks(self, instance)
5868
5869
5870 class LUInstanceReinstall(LogicalUnit):
5871   """Reinstall an instance.
5872
5873   """
5874   HPATH = "instance-reinstall"
5875   HTYPE = constants.HTYPE_INSTANCE
5876   REQ_BGL = False
5877
5878   def ExpandNames(self):
5879     self._ExpandAndLockInstance()
5880
5881   def BuildHooksEnv(self):
5882     """Build hooks env.
5883
5884     This runs on master, primary and secondary nodes of the instance.
5885
5886     """
5887     return _BuildInstanceHookEnvByObject(self, self.instance)
5888
5889   def BuildHooksNodes(self):
5890     """Build hooks nodes.
5891
5892     """
5893     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5894     return (nl, nl)
5895
5896   def CheckPrereq(self):
5897     """Check prerequisites.
5898
5899     This checks that the instance is in the cluster and is not running.
5900
5901     """
5902     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5903     assert instance is not None, \
5904       "Cannot retrieve locked instance %s" % self.op.instance_name
5905     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5906                      " offline, cannot reinstall")
5907     for node in instance.secondary_nodes:
5908       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5909                        " cannot reinstall")
5910
5911     if instance.disk_template == constants.DT_DISKLESS:
5912       raise errors.OpPrereqError("Instance '%s' has no disks" %
5913                                  self.op.instance_name,
5914                                  errors.ECODE_INVAL)
5915     _CheckInstanceDown(self, instance, "cannot reinstall")
5916
5917     if self.op.os_type is not None:
5918       # OS verification
5919       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5920       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5921       instance_os = self.op.os_type
5922     else:
5923       instance_os = instance.os
5924
5925     nodelist = list(instance.all_nodes)
5926
5927     if self.op.osparams:
5928       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5929       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5930       self.os_inst = i_osdict # the new dict (without defaults)
5931     else:
5932       self.os_inst = None
5933
5934     self.instance = instance
5935
5936   def Exec(self, feedback_fn):
5937     """Reinstall the instance.
5938
5939     """
5940     inst = self.instance
5941
5942     if self.op.os_type is not None:
5943       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5944       inst.os = self.op.os_type
5945       # Write to configuration
5946       self.cfg.Update(inst, feedback_fn)
5947
5948     _StartInstanceDisks(self, inst, None)
5949     try:
5950       feedback_fn("Running the instance OS create scripts...")
5951       # FIXME: pass debug option from opcode to backend
5952       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5953                                              self.op.debug_level,
5954                                              osparams=self.os_inst)
5955       result.Raise("Could not install OS for instance %s on node %s" %
5956                    (inst.name, inst.primary_node))
5957     finally:
5958       _ShutdownInstanceDisks(self, inst)
5959
5960
5961 class LUInstanceRecreateDisks(LogicalUnit):
5962   """Recreate an instance's missing disks.
5963
5964   """
5965   HPATH = "instance-recreate-disks"
5966   HTYPE = constants.HTYPE_INSTANCE
5967   REQ_BGL = False
5968
5969   def CheckArguments(self):
5970     # normalise the disk list
5971     self.op.disks = sorted(frozenset(self.op.disks))
5972
5973   def ExpandNames(self):
5974     self._ExpandAndLockInstance()
5975     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5976     if self.op.nodes:
5977       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5978       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5979     else:
5980       self.needed_locks[locking.LEVEL_NODE] = []
5981
5982   def DeclareLocks(self, level):
5983     if level == locking.LEVEL_NODE:
5984       # if we replace the nodes, we only need to lock the old primary,
5985       # otherwise we need to lock all nodes for disk re-creation
5986       primary_only = bool(self.op.nodes)
5987       self._LockInstancesNodes(primary_only=primary_only)
5988
5989   def BuildHooksEnv(self):
5990     """Build hooks env.
5991
5992     This runs on master, primary and secondary nodes of the instance.
5993
5994     """
5995     return _BuildInstanceHookEnvByObject(self, self.instance)
5996
5997   def BuildHooksNodes(self):
5998     """Build hooks nodes.
5999
6000     """
6001     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6002     return (nl, nl)
6003
6004   def CheckPrereq(self):
6005     """Check prerequisites.
6006
6007     This checks that the instance is in the cluster and is not running.
6008
6009     """
6010     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6011     assert instance is not None, \
6012       "Cannot retrieve locked instance %s" % self.op.instance_name
6013     if self.op.nodes:
6014       if len(self.op.nodes) != len(instance.all_nodes):
6015         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6016                                    " %d replacement nodes were specified" %
6017                                    (instance.name, len(instance.all_nodes),
6018                                     len(self.op.nodes)),
6019                                    errors.ECODE_INVAL)
6020       assert instance.disk_template != constants.DT_DRBD8 or \
6021           len(self.op.nodes) == 2
6022       assert instance.disk_template != constants.DT_PLAIN or \
6023           len(self.op.nodes) == 1
6024       primary_node = self.op.nodes[0]
6025     else:
6026       primary_node = instance.primary_node
6027     _CheckNodeOnline(self, primary_node)
6028
6029     if instance.disk_template == constants.DT_DISKLESS:
6030       raise errors.OpPrereqError("Instance '%s' has no disks" %
6031                                  self.op.instance_name, errors.ECODE_INVAL)
6032     # if we replace nodes *and* the old primary is offline, we don't
6033     # check
6034     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6035     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6036     if not (self.op.nodes and old_pnode.offline):
6037       _CheckInstanceDown(self, instance, "cannot recreate disks")
6038
6039     if not self.op.disks:
6040       self.op.disks = range(len(instance.disks))
6041     else:
6042       for idx in self.op.disks:
6043         if idx >= len(instance.disks):
6044           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6045                                      errors.ECODE_INVAL)
6046     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6047       raise errors.OpPrereqError("Can't recreate disks partially and"
6048                                  " change the nodes at the same time",
6049                                  errors.ECODE_INVAL)
6050     self.instance = instance
6051
6052   def Exec(self, feedback_fn):
6053     """Recreate the disks.
6054
6055     """
6056     # change primary node, if needed
6057     if self.op.nodes:
6058       self.instance.primary_node = self.op.nodes[0]
6059       self.LogWarning("Changing the instance's nodes, you will have to"
6060                       " remove any disks left on the older nodes manually")
6061
6062     to_skip = []
6063     for idx, disk in enumerate(self.instance.disks):
6064       if idx not in self.op.disks: # disk idx has not been passed in
6065         to_skip.append(idx)
6066         continue
6067       # update secondaries for disks, if needed
6068       if self.op.nodes:
6069         if disk.dev_type == constants.LD_DRBD8:
6070           # need to update the nodes
6071           assert len(self.op.nodes) == 2
6072           logical_id = list(disk.logical_id)
6073           logical_id[0] = self.op.nodes[0]
6074           logical_id[1] = self.op.nodes[1]
6075           disk.logical_id = tuple(logical_id)
6076
6077     if self.op.nodes:
6078       self.cfg.Update(self.instance, feedback_fn)
6079
6080     _CreateDisks(self, self.instance, to_skip=to_skip)
6081
6082
6083 class LUInstanceRename(LogicalUnit):
6084   """Rename an instance.
6085
6086   """
6087   HPATH = "instance-rename"
6088   HTYPE = constants.HTYPE_INSTANCE
6089
6090   def CheckArguments(self):
6091     """Check arguments.
6092
6093     """
6094     if self.op.ip_check and not self.op.name_check:
6095       # TODO: make the ip check more flexible and not depend on the name check
6096       raise errors.OpPrereqError("IP address check requires a name check",
6097                                  errors.ECODE_INVAL)
6098
6099   def BuildHooksEnv(self):
6100     """Build hooks env.
6101
6102     This runs on master, primary and secondary nodes of the instance.
6103
6104     """
6105     env = _BuildInstanceHookEnvByObject(self, self.instance)
6106     env["INSTANCE_NEW_NAME"] = self.op.new_name
6107     return env
6108
6109   def BuildHooksNodes(self):
6110     """Build hooks nodes.
6111
6112     """
6113     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6114     return (nl, nl)
6115
6116   def CheckPrereq(self):
6117     """Check prerequisites.
6118
6119     This checks that the instance is in the cluster and is not running.
6120
6121     """
6122     self.op.instance_name = _ExpandInstanceName(self.cfg,
6123                                                 self.op.instance_name)
6124     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6125     assert instance is not None
6126     _CheckNodeOnline(self, instance.primary_node)
6127     _CheckInstanceDown(self, instance, "cannot rename")
6128     self.instance = instance
6129
6130     new_name = self.op.new_name
6131     if self.op.name_check:
6132       hostname = netutils.GetHostname(name=new_name)
6133       if hostname != new_name:
6134         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6135                      hostname.name)
6136       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6137         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6138                                     " same as given hostname '%s'") %
6139                                     (hostname.name, self.op.new_name),
6140                                     errors.ECODE_INVAL)
6141       new_name = self.op.new_name = hostname.name
6142       if (self.op.ip_check and
6143           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6144         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6145                                    (hostname.ip, new_name),
6146                                    errors.ECODE_NOTUNIQUE)
6147
6148     instance_list = self.cfg.GetInstanceList()
6149     if new_name in instance_list and new_name != instance.name:
6150       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6151                                  new_name, errors.ECODE_EXISTS)
6152
6153   def Exec(self, feedback_fn):
6154     """Rename the instance.
6155
6156     """
6157     inst = self.instance
6158     old_name = inst.name
6159
6160     rename_file_storage = False
6161     if (inst.disk_template in constants.DTS_FILEBASED and
6162         self.op.new_name != inst.name):
6163       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6164       rename_file_storage = True
6165
6166     self.cfg.RenameInstance(inst.name, self.op.new_name)
6167     # Change the instance lock. This is definitely safe while we hold the BGL.
6168     # Otherwise the new lock would have to be added in acquired mode.
6169     assert self.REQ_BGL
6170     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6171     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6172
6173     # re-read the instance from the configuration after rename
6174     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6175
6176     if rename_file_storage:
6177       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6178       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6179                                                      old_file_storage_dir,
6180                                                      new_file_storage_dir)
6181       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6182                    " (but the instance has been renamed in Ganeti)" %
6183                    (inst.primary_node, old_file_storage_dir,
6184                     new_file_storage_dir))
6185
6186     _StartInstanceDisks(self, inst, None)
6187     try:
6188       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6189                                                  old_name, self.op.debug_level)
6190       msg = result.fail_msg
6191       if msg:
6192         msg = ("Could not run OS rename script for instance %s on node %s"
6193                " (but the instance has been renamed in Ganeti): %s" %
6194                (inst.name, inst.primary_node, msg))
6195         self.proc.LogWarning(msg)
6196     finally:
6197       _ShutdownInstanceDisks(self, inst)
6198
6199     return inst.name
6200
6201
6202 class LUInstanceRemove(LogicalUnit):
6203   """Remove an instance.
6204
6205   """
6206   HPATH = "instance-remove"
6207   HTYPE = constants.HTYPE_INSTANCE
6208   REQ_BGL = False
6209
6210   def ExpandNames(self):
6211     self._ExpandAndLockInstance()
6212     self.needed_locks[locking.LEVEL_NODE] = []
6213     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6214
6215   def DeclareLocks(self, level):
6216     if level == locking.LEVEL_NODE:
6217       self._LockInstancesNodes()
6218
6219   def BuildHooksEnv(self):
6220     """Build hooks env.
6221
6222     This runs on master, primary and secondary nodes of the instance.
6223
6224     """
6225     env = _BuildInstanceHookEnvByObject(self, self.instance)
6226     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6227     return env
6228
6229   def BuildHooksNodes(self):
6230     """Build hooks nodes.
6231
6232     """
6233     nl = [self.cfg.GetMasterNode()]
6234     nl_post = list(self.instance.all_nodes) + nl
6235     return (nl, nl_post)
6236
6237   def CheckPrereq(self):
6238     """Check prerequisites.
6239
6240     This checks that the instance is in the cluster.
6241
6242     """
6243     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6244     assert self.instance is not None, \
6245       "Cannot retrieve locked instance %s" % self.op.instance_name
6246
6247   def Exec(self, feedback_fn):
6248     """Remove the instance.
6249
6250     """
6251     instance = self.instance
6252     logging.info("Shutting down instance %s on node %s",
6253                  instance.name, instance.primary_node)
6254
6255     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6256                                              self.op.shutdown_timeout)
6257     msg = result.fail_msg
6258     if msg:
6259       if self.op.ignore_failures:
6260         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6261       else:
6262         raise errors.OpExecError("Could not shutdown instance %s on"
6263                                  " node %s: %s" %
6264                                  (instance.name, instance.primary_node, msg))
6265
6266     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6267
6268
6269 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6270   """Utility function to remove an instance.
6271
6272   """
6273   logging.info("Removing block devices for instance %s", instance.name)
6274
6275   if not _RemoveDisks(lu, instance):
6276     if not ignore_failures:
6277       raise errors.OpExecError("Can't remove instance's disks")
6278     feedback_fn("Warning: can't remove instance's disks")
6279
6280   logging.info("Removing instance %s out of cluster config", instance.name)
6281
6282   lu.cfg.RemoveInstance(instance.name)
6283
6284   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6285     "Instance lock removal conflict"
6286
6287   # Remove lock for the instance
6288   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6289
6290
6291 class LUInstanceQuery(NoHooksLU):
6292   """Logical unit for querying instances.
6293
6294   """
6295   # pylint: disable-msg=W0142
6296   REQ_BGL = False
6297
6298   def CheckArguments(self):
6299     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6300                              self.op.output_fields, self.op.use_locking)
6301
6302   def ExpandNames(self):
6303     self.iq.ExpandNames(self)
6304
6305   def DeclareLocks(self, level):
6306     self.iq.DeclareLocks(self, level)
6307
6308   def Exec(self, feedback_fn):
6309     return self.iq.OldStyleQuery(self)
6310
6311
6312 class LUInstanceFailover(LogicalUnit):
6313   """Failover an instance.
6314
6315   """
6316   HPATH = "instance-failover"
6317   HTYPE = constants.HTYPE_INSTANCE
6318   REQ_BGL = False
6319
6320   def CheckArguments(self):
6321     """Check the arguments.
6322
6323     """
6324     self.iallocator = getattr(self.op, "iallocator", None)
6325     self.target_node = getattr(self.op, "target_node", None)
6326
6327   def ExpandNames(self):
6328     self._ExpandAndLockInstance()
6329
6330     if self.op.target_node is not None:
6331       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6332
6333     self.needed_locks[locking.LEVEL_NODE] = []
6334     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6335
6336     ignore_consistency = self.op.ignore_consistency
6337     shutdown_timeout = self.op.shutdown_timeout
6338     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6339                                        cleanup=False,
6340                                        failover=True,
6341                                        ignore_consistency=ignore_consistency,
6342                                        shutdown_timeout=shutdown_timeout)
6343     self.tasklets = [self._migrater]
6344
6345   def DeclareLocks(self, level):
6346     if level == locking.LEVEL_NODE:
6347       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6348       if instance.disk_template in constants.DTS_EXT_MIRROR:
6349         if self.op.target_node is None:
6350           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6351         else:
6352           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6353                                                    self.op.target_node]
6354         del self.recalculate_locks[locking.LEVEL_NODE]
6355       else:
6356         self._LockInstancesNodes()
6357
6358   def BuildHooksEnv(self):
6359     """Build hooks env.
6360
6361     This runs on master, primary and secondary nodes of the instance.
6362
6363     """
6364     instance = self._migrater.instance
6365     source_node = instance.primary_node
6366     target_node = self.op.target_node
6367     env = {
6368       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6369       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6370       "OLD_PRIMARY": source_node,
6371       "NEW_PRIMARY": target_node,
6372       }
6373
6374     if instance.disk_template in constants.DTS_INT_MIRROR:
6375       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6376       env["NEW_SECONDARY"] = source_node
6377     else:
6378       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6379
6380     env.update(_BuildInstanceHookEnvByObject(self, instance))
6381
6382     return env
6383
6384   def BuildHooksNodes(self):
6385     """Build hooks nodes.
6386
6387     """
6388     instance = self._migrater.instance
6389     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6390     return (nl, nl + [instance.primary_node])
6391
6392
6393 class LUInstanceMigrate(LogicalUnit):
6394   """Migrate an instance.
6395
6396   This is migration without shutting down, compared to the failover,
6397   which is done with shutdown.
6398
6399   """
6400   HPATH = "instance-migrate"
6401   HTYPE = constants.HTYPE_INSTANCE
6402   REQ_BGL = False
6403
6404   def ExpandNames(self):
6405     self._ExpandAndLockInstance()
6406
6407     if self.op.target_node is not None:
6408       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6409
6410     self.needed_locks[locking.LEVEL_NODE] = []
6411     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6412
6413     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6414                                        cleanup=self.op.cleanup,
6415                                        failover=False,
6416                                        fallback=self.op.allow_failover)
6417     self.tasklets = [self._migrater]
6418
6419   def DeclareLocks(self, level):
6420     if level == locking.LEVEL_NODE:
6421       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6422       if instance.disk_template in constants.DTS_EXT_MIRROR:
6423         if self.op.target_node is None:
6424           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6425         else:
6426           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6427                                                    self.op.target_node]
6428         del self.recalculate_locks[locking.LEVEL_NODE]
6429       else:
6430         self._LockInstancesNodes()
6431
6432   def BuildHooksEnv(self):
6433     """Build hooks env.
6434
6435     This runs on master, primary and secondary nodes of the instance.
6436
6437     """
6438     instance = self._migrater.instance
6439     source_node = instance.primary_node
6440     target_node = self.op.target_node
6441     env = _BuildInstanceHookEnvByObject(self, instance)
6442     env.update({
6443       "MIGRATE_LIVE": self._migrater.live,
6444       "MIGRATE_CLEANUP": self.op.cleanup,
6445       "OLD_PRIMARY": source_node,
6446       "NEW_PRIMARY": target_node,
6447       })
6448
6449     if instance.disk_template in constants.DTS_INT_MIRROR:
6450       env["OLD_SECONDARY"] = target_node
6451       env["NEW_SECONDARY"] = source_node
6452     else:
6453       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6454
6455     return env
6456
6457   def BuildHooksNodes(self):
6458     """Build hooks nodes.
6459
6460     """
6461     instance = self._migrater.instance
6462     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6463     return (nl, nl + [instance.primary_node])
6464
6465
6466 class LUInstanceMove(LogicalUnit):
6467   """Move an instance by data-copying.
6468
6469   """
6470   HPATH = "instance-move"
6471   HTYPE = constants.HTYPE_INSTANCE
6472   REQ_BGL = False
6473
6474   def ExpandNames(self):
6475     self._ExpandAndLockInstance()
6476     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6477     self.op.target_node = target_node
6478     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6479     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6480
6481   def DeclareLocks(self, level):
6482     if level == locking.LEVEL_NODE:
6483       self._LockInstancesNodes(primary_only=True)
6484
6485   def BuildHooksEnv(self):
6486     """Build hooks env.
6487
6488     This runs on master, primary and secondary nodes of the instance.
6489
6490     """
6491     env = {
6492       "TARGET_NODE": self.op.target_node,
6493       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6494       }
6495     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6496     return env
6497
6498   def BuildHooksNodes(self):
6499     """Build hooks nodes.
6500
6501     """
6502     nl = [
6503       self.cfg.GetMasterNode(),
6504       self.instance.primary_node,
6505       self.op.target_node,
6506       ]
6507     return (nl, nl)
6508
6509   def CheckPrereq(self):
6510     """Check prerequisites.
6511
6512     This checks that the instance is in the cluster.
6513
6514     """
6515     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6516     assert self.instance is not None, \
6517       "Cannot retrieve locked instance %s" % self.op.instance_name
6518
6519     node = self.cfg.GetNodeInfo(self.op.target_node)
6520     assert node is not None, \
6521       "Cannot retrieve locked node %s" % self.op.target_node
6522
6523     self.target_node = target_node = node.name
6524
6525     if target_node == instance.primary_node:
6526       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6527                                  (instance.name, target_node),
6528                                  errors.ECODE_STATE)
6529
6530     bep = self.cfg.GetClusterInfo().FillBE(instance)
6531
6532     for idx, dsk in enumerate(instance.disks):
6533       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6534         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6535                                    " cannot copy" % idx, errors.ECODE_STATE)
6536
6537     _CheckNodeOnline(self, target_node)
6538     _CheckNodeNotDrained(self, target_node)
6539     _CheckNodeVmCapable(self, target_node)
6540
6541     if instance.admin_up:
6542       # check memory requirements on the secondary node
6543       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6544                            instance.name, bep[constants.BE_MEMORY],
6545                            instance.hypervisor)
6546     else:
6547       self.LogInfo("Not checking memory on the secondary node as"
6548                    " instance will not be started")
6549
6550     # check bridge existance
6551     _CheckInstanceBridgesExist(self, instance, node=target_node)
6552
6553   def Exec(self, feedback_fn):
6554     """Move an instance.
6555
6556     The move is done by shutting it down on its present node, copying
6557     the data over (slow) and starting it on the new node.
6558
6559     """
6560     instance = self.instance
6561
6562     source_node = instance.primary_node
6563     target_node = self.target_node
6564
6565     self.LogInfo("Shutting down instance %s on source node %s",
6566                  instance.name, source_node)
6567
6568     result = self.rpc.call_instance_shutdown(source_node, instance,
6569                                              self.op.shutdown_timeout)
6570     msg = result.fail_msg
6571     if msg:
6572       if self.op.ignore_consistency:
6573         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6574                              " Proceeding anyway. Please make sure node"
6575                              " %s is down. Error details: %s",
6576                              instance.name, source_node, source_node, msg)
6577       else:
6578         raise errors.OpExecError("Could not shutdown instance %s on"
6579                                  " node %s: %s" %
6580                                  (instance.name, source_node, msg))
6581
6582     # create the target disks
6583     try:
6584       _CreateDisks(self, instance, target_node=target_node)
6585     except errors.OpExecError:
6586       self.LogWarning("Device creation failed, reverting...")
6587       try:
6588         _RemoveDisks(self, instance, target_node=target_node)
6589       finally:
6590         self.cfg.ReleaseDRBDMinors(instance.name)
6591         raise
6592
6593     cluster_name = self.cfg.GetClusterInfo().cluster_name
6594
6595     errs = []
6596     # activate, get path, copy the data over
6597     for idx, disk in enumerate(instance.disks):
6598       self.LogInfo("Copying data for disk %d", idx)
6599       result = self.rpc.call_blockdev_assemble(target_node, disk,
6600                                                instance.name, True, idx)
6601       if result.fail_msg:
6602         self.LogWarning("Can't assemble newly created disk %d: %s",
6603                         idx, result.fail_msg)
6604         errs.append(result.fail_msg)
6605         break
6606       dev_path = result.payload
6607       result = self.rpc.call_blockdev_export(source_node, disk,
6608                                              target_node, dev_path,
6609                                              cluster_name)
6610       if result.fail_msg:
6611         self.LogWarning("Can't copy data over for disk %d: %s",
6612                         idx, result.fail_msg)
6613         errs.append(result.fail_msg)
6614         break
6615
6616     if errs:
6617       self.LogWarning("Some disks failed to copy, aborting")
6618       try:
6619         _RemoveDisks(self, instance, target_node=target_node)
6620       finally:
6621         self.cfg.ReleaseDRBDMinors(instance.name)
6622         raise errors.OpExecError("Errors during disk copy: %s" %
6623                                  (",".join(errs),))
6624
6625     instance.primary_node = target_node
6626     self.cfg.Update(instance, feedback_fn)
6627
6628     self.LogInfo("Removing the disks on the original node")
6629     _RemoveDisks(self, instance, target_node=source_node)
6630
6631     # Only start the instance if it's marked as up
6632     if instance.admin_up:
6633       self.LogInfo("Starting instance %s on node %s",
6634                    instance.name, target_node)
6635
6636       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6637                                            ignore_secondaries=True)
6638       if not disks_ok:
6639         _ShutdownInstanceDisks(self, instance)
6640         raise errors.OpExecError("Can't activate the instance's disks")
6641
6642       result = self.rpc.call_instance_start(target_node, instance, None, None)
6643       msg = result.fail_msg
6644       if msg:
6645         _ShutdownInstanceDisks(self, instance)
6646         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6647                                  (instance.name, target_node, msg))
6648
6649
6650 class LUNodeMigrate(LogicalUnit):
6651   """Migrate all instances from a node.
6652
6653   """
6654   HPATH = "node-migrate"
6655   HTYPE = constants.HTYPE_NODE
6656   REQ_BGL = False
6657
6658   def CheckArguments(self):
6659     pass
6660
6661   def ExpandNames(self):
6662     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6663
6664     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6665     self.needed_locks = {
6666       locking.LEVEL_NODE: [self.op.node_name],
6667       }
6668
6669   def BuildHooksEnv(self):
6670     """Build hooks env.
6671
6672     This runs on the master, the primary and all the secondaries.
6673
6674     """
6675     return {
6676       "NODE_NAME": self.op.node_name,
6677       }
6678
6679   def BuildHooksNodes(self):
6680     """Build hooks nodes.
6681
6682     """
6683     nl = [self.cfg.GetMasterNode()]
6684     return (nl, nl)
6685
6686   def CheckPrereq(self):
6687     pass
6688
6689   def Exec(self, feedback_fn):
6690     # Prepare jobs for migration instances
6691     jobs = [
6692       [opcodes.OpInstanceMigrate(instance_name=inst.name,
6693                                  mode=self.op.mode,
6694                                  live=self.op.live,
6695                                  iallocator=self.op.iallocator,
6696                                  target_node=self.op.target_node)]
6697       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6698       ]
6699
6700     # TODO: Run iallocator in this opcode and pass correct placement options to
6701     # OpInstanceMigrate. Since other jobs can modify the cluster between
6702     # running the iallocator and the actual migration, a good consistency model
6703     # will have to be found.
6704
6705     assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6706             frozenset([self.op.node_name]))
6707
6708     return ResultWithJobs(jobs)
6709
6710
6711 class TLMigrateInstance(Tasklet):
6712   """Tasklet class for instance migration.
6713
6714   @type live: boolean
6715   @ivar live: whether the migration will be done live or non-live;
6716       this variable is initalized only after CheckPrereq has run
6717   @type cleanup: boolean
6718   @ivar cleanup: Wheater we cleanup from a failed migration
6719   @type iallocator: string
6720   @ivar iallocator: The iallocator used to determine target_node
6721   @type target_node: string
6722   @ivar target_node: If given, the target_node to reallocate the instance to
6723   @type failover: boolean
6724   @ivar failover: Whether operation results in failover or migration
6725   @type fallback: boolean
6726   @ivar fallback: Whether fallback to failover is allowed if migration not
6727                   possible
6728   @type ignore_consistency: boolean
6729   @ivar ignore_consistency: Wheter we should ignore consistency between source
6730                             and target node
6731   @type shutdown_timeout: int
6732   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6733
6734   """
6735   def __init__(self, lu, instance_name, cleanup=False,
6736                failover=False, fallback=False,
6737                ignore_consistency=False,
6738                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6739     """Initializes this class.
6740
6741     """
6742     Tasklet.__init__(self, lu)
6743
6744     # Parameters
6745     self.instance_name = instance_name
6746     self.cleanup = cleanup
6747     self.live = False # will be overridden later
6748     self.failover = failover
6749     self.fallback = fallback
6750     self.ignore_consistency = ignore_consistency
6751     self.shutdown_timeout = shutdown_timeout
6752
6753   def CheckPrereq(self):
6754     """Check prerequisites.
6755
6756     This checks that the instance is in the cluster.
6757
6758     """
6759     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6760     instance = self.cfg.GetInstanceInfo(instance_name)
6761     assert instance is not None
6762     self.instance = instance
6763
6764     if (not self.cleanup and not instance.admin_up and not self.failover and
6765         self.fallback):
6766       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6767                       " to failover")
6768       self.failover = True
6769
6770     if instance.disk_template not in constants.DTS_MIRRORED:
6771       if self.failover:
6772         text = "failovers"
6773       else:
6774         text = "migrations"
6775       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6776                                  " %s" % (instance.disk_template, text),
6777                                  errors.ECODE_STATE)
6778
6779     if instance.disk_template in constants.DTS_EXT_MIRROR:
6780       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6781
6782       if self.lu.op.iallocator:
6783         self._RunAllocator()
6784       else:
6785         # We set set self.target_node as it is required by
6786         # BuildHooksEnv
6787         self.target_node = self.lu.op.target_node
6788
6789       # self.target_node is already populated, either directly or by the
6790       # iallocator run
6791       target_node = self.target_node
6792       if self.target_node == instance.primary_node:
6793         raise errors.OpPrereqError("Cannot migrate instance %s"
6794                                    " to its primary (%s)" %
6795                                    (instance.name, instance.primary_node))
6796
6797       if len(self.lu.tasklets) == 1:
6798         # It is safe to release locks only when we're the only tasklet
6799         # in the LU
6800         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6801                       keep=[instance.primary_node, self.target_node])
6802
6803     else:
6804       secondary_nodes = instance.secondary_nodes
6805       if not secondary_nodes:
6806         raise errors.ConfigurationError("No secondary node but using"
6807                                         " %s disk template" %
6808                                         instance.disk_template)
6809       target_node = secondary_nodes[0]
6810       if self.lu.op.iallocator or (self.lu.op.target_node and
6811                                    self.lu.op.target_node != target_node):
6812         if self.failover:
6813           text = "failed over"
6814         else:
6815           text = "migrated"
6816         raise errors.OpPrereqError("Instances with disk template %s cannot"
6817                                    " be %s to arbitrary nodes"
6818                                    " (neither an iallocator nor a target"
6819                                    " node can be passed)" %
6820                                    (instance.disk_template, text),
6821                                    errors.ECODE_INVAL)
6822
6823     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6824
6825     # check memory requirements on the secondary node
6826     if not self.failover or instance.admin_up:
6827       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6828                            instance.name, i_be[constants.BE_MEMORY],
6829                            instance.hypervisor)
6830     else:
6831       self.lu.LogInfo("Not checking memory on the secondary node as"
6832                       " instance will not be started")
6833
6834     # check bridge existance
6835     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6836
6837     if not self.cleanup:
6838       _CheckNodeNotDrained(self.lu, target_node)
6839       if not self.failover:
6840         result = self.rpc.call_instance_migratable(instance.primary_node,
6841                                                    instance)
6842         if result.fail_msg and self.fallback:
6843           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6844                           " failover")
6845           self.failover = True
6846         else:
6847           result.Raise("Can't migrate, please use failover",
6848                        prereq=True, ecode=errors.ECODE_STATE)
6849
6850     assert not (self.failover and self.cleanup)
6851
6852     if not self.failover:
6853       if self.lu.op.live is not None and self.lu.op.mode is not None:
6854         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6855                                    " parameters are accepted",
6856                                    errors.ECODE_INVAL)
6857       if self.lu.op.live is not None:
6858         if self.lu.op.live:
6859           self.lu.op.mode = constants.HT_MIGRATION_LIVE
6860         else:
6861           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6862         # reset the 'live' parameter to None so that repeated
6863         # invocations of CheckPrereq do not raise an exception
6864         self.lu.op.live = None
6865       elif self.lu.op.mode is None:
6866         # read the default value from the hypervisor
6867         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6868                                                 skip_globals=False)
6869         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6870
6871       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6872     else:
6873       # Failover is never live
6874       self.live = False
6875
6876   def _RunAllocator(self):
6877     """Run the allocator based on input opcode.
6878
6879     """
6880     ial = IAllocator(self.cfg, self.rpc,
6881                      mode=constants.IALLOCATOR_MODE_RELOC,
6882                      name=self.instance_name,
6883                      # TODO See why hail breaks with a single node below
6884                      relocate_from=[self.instance.primary_node,
6885                                     self.instance.primary_node],
6886                      )
6887
6888     ial.Run(self.lu.op.iallocator)
6889
6890     if not ial.success:
6891       raise errors.OpPrereqError("Can't compute nodes using"
6892                                  " iallocator '%s': %s" %
6893                                  (self.lu.op.iallocator, ial.info),
6894                                  errors.ECODE_NORES)
6895     if len(ial.result) != ial.required_nodes:
6896       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6897                                  " of nodes (%s), required %s" %
6898                                  (self.lu.op.iallocator, len(ial.result),
6899                                   ial.required_nodes), errors.ECODE_FAULT)
6900     self.target_node = ial.result[0]
6901     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6902                  self.instance_name, self.lu.op.iallocator,
6903                  utils.CommaJoin(ial.result))
6904
6905   def _WaitUntilSync(self):
6906     """Poll with custom rpc for disk sync.
6907
6908     This uses our own step-based rpc call.
6909
6910     """
6911     self.feedback_fn("* wait until resync is done")
6912     all_done = False
6913     while not all_done:
6914       all_done = True
6915       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6916                                             self.nodes_ip,
6917                                             self.instance.disks)
6918       min_percent = 100
6919       for node, nres in result.items():
6920         nres.Raise("Cannot resync disks on node %s" % node)
6921         node_done, node_percent = nres.payload
6922         all_done = all_done and node_done
6923         if node_percent is not None:
6924           min_percent = min(min_percent, node_percent)
6925       if not all_done:
6926         if min_percent < 100:
6927           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6928         time.sleep(2)
6929
6930   def _EnsureSecondary(self, node):
6931     """Demote a node to secondary.
6932
6933     """
6934     self.feedback_fn("* switching node %s to secondary mode" % node)
6935
6936     for dev in self.instance.disks:
6937       self.cfg.SetDiskID(dev, node)
6938
6939     result = self.rpc.call_blockdev_close(node, self.instance.name,
6940                                           self.instance.disks)
6941     result.Raise("Cannot change disk to secondary on node %s" % node)
6942
6943   def _GoStandalone(self):
6944     """Disconnect from the network.
6945
6946     """
6947     self.feedback_fn("* changing into standalone mode")
6948     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6949                                                self.instance.disks)
6950     for node, nres in result.items():
6951       nres.Raise("Cannot disconnect disks node %s" % node)
6952
6953   def _GoReconnect(self, multimaster):
6954     """Reconnect to the network.
6955
6956     """
6957     if multimaster:
6958       msg = "dual-master"
6959     else:
6960       msg = "single-master"
6961     self.feedback_fn("* changing disks into %s mode" % msg)
6962     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6963                                            self.instance.disks,
6964                                            self.instance.name, multimaster)
6965     for node, nres in result.items():
6966       nres.Raise("Cannot change disks config on node %s" % node)
6967
6968   def _ExecCleanup(self):
6969     """Try to cleanup after a failed migration.
6970
6971     The cleanup is done by:
6972       - check that the instance is running only on one node
6973         (and update the config if needed)
6974       - change disks on its secondary node to secondary
6975       - wait until disks are fully synchronized
6976       - disconnect from the network
6977       - change disks into single-master mode
6978       - wait again until disks are fully synchronized
6979
6980     """
6981     instance = self.instance
6982     target_node = self.target_node
6983     source_node = self.source_node
6984
6985     # check running on only one node
6986     self.feedback_fn("* checking where the instance actually runs"
6987                      " (if this hangs, the hypervisor might be in"
6988                      " a bad state)")
6989     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6990     for node, result in ins_l.items():
6991       result.Raise("Can't contact node %s" % node)
6992
6993     runningon_source = instance.name in ins_l[source_node].payload
6994     runningon_target = instance.name in ins_l[target_node].payload
6995
6996     if runningon_source and runningon_target:
6997       raise errors.OpExecError("Instance seems to be running on two nodes,"
6998                                " or the hypervisor is confused; you will have"
6999                                " to ensure manually that it runs only on one"
7000                                " and restart this operation")
7001
7002     if not (runningon_source or runningon_target):
7003       raise errors.OpExecError("Instance does not seem to be running at all;"
7004                                " in this case it's safer to repair by"
7005                                " running 'gnt-instance stop' to ensure disk"
7006                                " shutdown, and then restarting it")
7007
7008     if runningon_target:
7009       # the migration has actually succeeded, we need to update the config
7010       self.feedback_fn("* instance running on secondary node (%s),"
7011                        " updating config" % target_node)
7012       instance.primary_node = target_node
7013       self.cfg.Update(instance, self.feedback_fn)
7014       demoted_node = source_node
7015     else:
7016       self.feedback_fn("* instance confirmed to be running on its"
7017                        " primary node (%s)" % source_node)
7018       demoted_node = target_node
7019
7020     if instance.disk_template in constants.DTS_INT_MIRROR:
7021       self._EnsureSecondary(demoted_node)
7022       try:
7023         self._WaitUntilSync()
7024       except errors.OpExecError:
7025         # we ignore here errors, since if the device is standalone, it
7026         # won't be able to sync
7027         pass
7028       self._GoStandalone()
7029       self._GoReconnect(False)
7030       self._WaitUntilSync()
7031
7032     self.feedback_fn("* done")
7033
7034   def _RevertDiskStatus(self):
7035     """Try to revert the disk status after a failed migration.
7036
7037     """
7038     target_node = self.target_node
7039     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7040       return
7041
7042     try:
7043       self._EnsureSecondary(target_node)
7044       self._GoStandalone()
7045       self._GoReconnect(False)
7046       self._WaitUntilSync()
7047     except errors.OpExecError, err:
7048       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7049                          " please try to recover the instance manually;"
7050                          " error '%s'" % str(err))
7051
7052   def _AbortMigration(self):
7053     """Call the hypervisor code to abort a started migration.
7054
7055     """
7056     instance = self.instance
7057     target_node = self.target_node
7058     migration_info = self.migration_info
7059
7060     abort_result = self.rpc.call_finalize_migration(target_node,
7061                                                     instance,
7062                                                     migration_info,
7063                                                     False)
7064     abort_msg = abort_result.fail_msg
7065     if abort_msg:
7066       logging.error("Aborting migration failed on target node %s: %s",
7067                     target_node, abort_msg)
7068       # Don't raise an exception here, as we stil have to try to revert the
7069       # disk status, even if this step failed.
7070
7071   def _ExecMigration(self):
7072     """Migrate an instance.
7073
7074     The migrate is done by:
7075       - change the disks into dual-master mode
7076       - wait until disks are fully synchronized again
7077       - migrate the instance
7078       - change disks on the new secondary node (the old primary) to secondary
7079       - wait until disks are fully synchronized
7080       - change disks into single-master mode
7081
7082     """
7083     instance = self.instance
7084     target_node = self.target_node
7085     source_node = self.source_node
7086
7087     self.feedback_fn("* checking disk consistency between source and target")
7088     for dev in instance.disks:
7089       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7090         raise errors.OpExecError("Disk %s is degraded or not fully"
7091                                  " synchronized on target node,"
7092                                  " aborting migration" % dev.iv_name)
7093
7094     # First get the migration information from the remote node
7095     result = self.rpc.call_migration_info(source_node, instance)
7096     msg = result.fail_msg
7097     if msg:
7098       log_err = ("Failed fetching source migration information from %s: %s" %
7099                  (source_node, msg))
7100       logging.error(log_err)
7101       raise errors.OpExecError(log_err)
7102
7103     self.migration_info = migration_info = result.payload
7104
7105     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7106       # Then switch the disks to master/master mode
7107       self._EnsureSecondary(target_node)
7108       self._GoStandalone()
7109       self._GoReconnect(True)
7110       self._WaitUntilSync()
7111
7112     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7113     result = self.rpc.call_accept_instance(target_node,
7114                                            instance,
7115                                            migration_info,
7116                                            self.nodes_ip[target_node])
7117
7118     msg = result.fail_msg
7119     if msg:
7120       logging.error("Instance pre-migration failed, trying to revert"
7121                     " disk status: %s", msg)
7122       self.feedback_fn("Pre-migration failed, aborting")
7123       self._AbortMigration()
7124       self._RevertDiskStatus()
7125       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7126                                (instance.name, msg))
7127
7128     self.feedback_fn("* migrating instance to %s" % target_node)
7129     result = self.rpc.call_instance_migrate(source_node, instance,
7130                                             self.nodes_ip[target_node],
7131                                             self.live)
7132     msg = result.fail_msg
7133     if msg:
7134       logging.error("Instance migration failed, trying to revert"
7135                     " disk status: %s", msg)
7136       self.feedback_fn("Migration failed, aborting")
7137       self._AbortMigration()
7138       self._RevertDiskStatus()
7139       raise errors.OpExecError("Could not migrate instance %s: %s" %
7140                                (instance.name, msg))
7141
7142     instance.primary_node = target_node
7143     # distribute new instance config to the other nodes
7144     self.cfg.Update(instance, self.feedback_fn)
7145
7146     result = self.rpc.call_finalize_migration(target_node,
7147                                               instance,
7148                                               migration_info,
7149                                               True)
7150     msg = result.fail_msg
7151     if msg:
7152       logging.error("Instance migration succeeded, but finalization failed:"
7153                     " %s", msg)
7154       raise errors.OpExecError("Could not finalize instance migration: %s" %
7155                                msg)
7156
7157     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7158       self._EnsureSecondary(source_node)
7159       self._WaitUntilSync()
7160       self._GoStandalone()
7161       self._GoReconnect(False)
7162       self._WaitUntilSync()
7163
7164     self.feedback_fn("* done")
7165
7166   def _ExecFailover(self):
7167     """Failover an instance.
7168
7169     The failover is done by shutting it down on its present node and
7170     starting it on the secondary.
7171
7172     """
7173     instance = self.instance
7174     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7175
7176     source_node = instance.primary_node
7177     target_node = self.target_node
7178
7179     if instance.admin_up:
7180       self.feedback_fn("* checking disk consistency between source and target")
7181       for dev in instance.disks:
7182         # for drbd, these are drbd over lvm
7183         if not _CheckDiskConsistency(self, dev, target_node, False):
7184           if not self.ignore_consistency:
7185             raise errors.OpExecError("Disk %s is degraded on target node,"
7186                                      " aborting failover" % dev.iv_name)
7187     else:
7188       self.feedback_fn("* not checking disk consistency as instance is not"
7189                        " running")
7190
7191     self.feedback_fn("* shutting down instance on source node")
7192     logging.info("Shutting down instance %s on node %s",
7193                  instance.name, source_node)
7194
7195     result = self.rpc.call_instance_shutdown(source_node, instance,
7196                                              self.shutdown_timeout)
7197     msg = result.fail_msg
7198     if msg:
7199       if self.ignore_consistency or primary_node.offline:
7200         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7201                            " proceeding anyway; please make sure node"
7202                            " %s is down; error details: %s",
7203                            instance.name, source_node, source_node, msg)
7204       else:
7205         raise errors.OpExecError("Could not shutdown instance %s on"
7206                                  " node %s: %s" %
7207                                  (instance.name, source_node, msg))
7208
7209     self.feedback_fn("* deactivating the instance's disks on source node")
7210     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
7211       raise errors.OpExecError("Can't shut down the instance's disks.")
7212
7213     instance.primary_node = target_node
7214     # distribute new instance config to the other nodes
7215     self.cfg.Update(instance, self.feedback_fn)
7216
7217     # Only start the instance if it's marked as up
7218     if instance.admin_up:
7219       self.feedback_fn("* activating the instance's disks on target node")
7220       logging.info("Starting instance %s on node %s",
7221                    instance.name, target_node)
7222
7223       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7224                                            ignore_secondaries=True)
7225       if not disks_ok:
7226         _ShutdownInstanceDisks(self, instance)
7227         raise errors.OpExecError("Can't activate the instance's disks")
7228
7229       self.feedback_fn("* starting the instance on the target node")
7230       result = self.rpc.call_instance_start(target_node, instance, None, None)
7231       msg = result.fail_msg
7232       if msg:
7233         _ShutdownInstanceDisks(self, instance)
7234         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7235                                  (instance.name, target_node, msg))
7236
7237   def Exec(self, feedback_fn):
7238     """Perform the migration.
7239
7240     """
7241     self.feedback_fn = feedback_fn
7242     self.source_node = self.instance.primary_node
7243
7244     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7245     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7246       self.target_node = self.instance.secondary_nodes[0]
7247       # Otherwise self.target_node has been populated either
7248       # directly, or through an iallocator.
7249
7250     self.all_nodes = [self.source_node, self.target_node]
7251     self.nodes_ip = {
7252       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7253       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7254       }
7255
7256     if self.failover:
7257       feedback_fn("Failover instance %s" % self.instance.name)
7258       self._ExecFailover()
7259     else:
7260       feedback_fn("Migrating instance %s" % self.instance.name)
7261
7262       if self.cleanup:
7263         return self._ExecCleanup()
7264       else:
7265         return self._ExecMigration()
7266
7267
7268 def _CreateBlockDev(lu, node, instance, device, force_create,
7269                     info, force_open):
7270   """Create a tree of block devices on a given node.
7271
7272   If this device type has to be created on secondaries, create it and
7273   all its children.
7274
7275   If not, just recurse to children keeping the same 'force' value.
7276
7277   @param lu: the lu on whose behalf we execute
7278   @param node: the node on which to create the device
7279   @type instance: L{objects.Instance}
7280   @param instance: the instance which owns the device
7281   @type device: L{objects.Disk}
7282   @param device: the device to create
7283   @type force_create: boolean
7284   @param force_create: whether to force creation of this device; this
7285       will be change to True whenever we find a device which has
7286       CreateOnSecondary() attribute
7287   @param info: the extra 'metadata' we should attach to the device
7288       (this will be represented as a LVM tag)
7289   @type force_open: boolean
7290   @param force_open: this parameter will be passes to the
7291       L{backend.BlockdevCreate} function where it specifies
7292       whether we run on primary or not, and it affects both
7293       the child assembly and the device own Open() execution
7294
7295   """
7296   if device.CreateOnSecondary():
7297     force_create = True
7298
7299   if device.children:
7300     for child in device.children:
7301       _CreateBlockDev(lu, node, instance, child, force_create,
7302                       info, force_open)
7303
7304   if not force_create:
7305     return
7306
7307   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7308
7309
7310 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7311   """Create a single block device on a given node.
7312
7313   This will not recurse over children of the device, so they must be
7314   created in advance.
7315
7316   @param lu: the lu on whose behalf we execute
7317   @param node: the node on which to create the device
7318   @type instance: L{objects.Instance}
7319   @param instance: the instance which owns the device
7320   @type device: L{objects.Disk}
7321   @param device: the device to create
7322   @param info: the extra 'metadata' we should attach to the device
7323       (this will be represented as a LVM tag)
7324   @type force_open: boolean
7325   @param force_open: this parameter will be passes to the
7326       L{backend.BlockdevCreate} function where it specifies
7327       whether we run on primary or not, and it affects both
7328       the child assembly and the device own Open() execution
7329
7330   """
7331   lu.cfg.SetDiskID(device, node)
7332   result = lu.rpc.call_blockdev_create(node, device, device.size,
7333                                        instance.name, force_open, info)
7334   result.Raise("Can't create block device %s on"
7335                " node %s for instance %s" % (device, node, instance.name))
7336   if device.physical_id is None:
7337     device.physical_id = result.payload
7338
7339
7340 def _GenerateUniqueNames(lu, exts):
7341   """Generate a suitable LV name.
7342
7343   This will generate a logical volume name for the given instance.
7344
7345   """
7346   results = []
7347   for val in exts:
7348     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7349     results.append("%s%s" % (new_id, val))
7350   return results
7351
7352
7353 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7354                          iv_name, p_minor, s_minor):
7355   """Generate a drbd8 device complete with its children.
7356
7357   """
7358   assert len(vgnames) == len(names) == 2
7359   port = lu.cfg.AllocatePort()
7360   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7361   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7362                           logical_id=(vgnames[0], names[0]))
7363   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7364                           logical_id=(vgnames[1], names[1]))
7365   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7366                           logical_id=(primary, secondary, port,
7367                                       p_minor, s_minor,
7368                                       shared_secret),
7369                           children=[dev_data, dev_meta],
7370                           iv_name=iv_name)
7371   return drbd_dev
7372
7373
7374 def _GenerateDiskTemplate(lu, template_name,
7375                           instance_name, primary_node,
7376                           secondary_nodes, disk_info,
7377                           file_storage_dir, file_driver,
7378                           base_index, feedback_fn):
7379   """Generate the entire disk layout for a given template type.
7380
7381   """
7382   #TODO: compute space requirements
7383
7384   vgname = lu.cfg.GetVGName()
7385   disk_count = len(disk_info)
7386   disks = []
7387   if template_name == constants.DT_DISKLESS:
7388     pass
7389   elif template_name == constants.DT_PLAIN:
7390     if len(secondary_nodes) != 0:
7391       raise errors.ProgrammerError("Wrong template configuration")
7392
7393     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7394                                       for i in range(disk_count)])
7395     for idx, disk in enumerate(disk_info):
7396       disk_index = idx + base_index
7397       vg = disk.get(constants.IDISK_VG, vgname)
7398       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7399       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7400                               size=disk[constants.IDISK_SIZE],
7401                               logical_id=(vg, names[idx]),
7402                               iv_name="disk/%d" % disk_index,
7403                               mode=disk[constants.IDISK_MODE])
7404       disks.append(disk_dev)
7405   elif template_name == constants.DT_DRBD8:
7406     if len(secondary_nodes) != 1:
7407       raise errors.ProgrammerError("Wrong template configuration")
7408     remote_node = secondary_nodes[0]
7409     minors = lu.cfg.AllocateDRBDMinor(
7410       [primary_node, remote_node] * len(disk_info), instance_name)
7411
7412     names = []
7413     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7414                                                for i in range(disk_count)]):
7415       names.append(lv_prefix + "_data")
7416       names.append(lv_prefix + "_meta")
7417     for idx, disk in enumerate(disk_info):
7418       disk_index = idx + base_index
7419       data_vg = disk.get(constants.IDISK_VG, vgname)
7420       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7421       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7422                                       disk[constants.IDISK_SIZE],
7423                                       [data_vg, meta_vg],
7424                                       names[idx * 2:idx * 2 + 2],
7425                                       "disk/%d" % disk_index,
7426                                       minors[idx * 2], minors[idx * 2 + 1])
7427       disk_dev.mode = disk[constants.IDISK_MODE]
7428       disks.append(disk_dev)
7429   elif template_name == constants.DT_FILE:
7430     if len(secondary_nodes) != 0:
7431       raise errors.ProgrammerError("Wrong template configuration")
7432
7433     opcodes.RequireFileStorage()
7434
7435     for idx, disk in enumerate(disk_info):
7436       disk_index = idx + base_index
7437       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7438                               size=disk[constants.IDISK_SIZE],
7439                               iv_name="disk/%d" % disk_index,
7440                               logical_id=(file_driver,
7441                                           "%s/disk%d" % (file_storage_dir,
7442                                                          disk_index)),
7443                               mode=disk[constants.IDISK_MODE])
7444       disks.append(disk_dev)
7445   elif template_name == constants.DT_SHARED_FILE:
7446     if len(secondary_nodes) != 0:
7447       raise errors.ProgrammerError("Wrong template configuration")
7448
7449     opcodes.RequireSharedFileStorage()
7450
7451     for idx, disk in enumerate(disk_info):
7452       disk_index = idx + base_index
7453       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7454                               size=disk[constants.IDISK_SIZE],
7455                               iv_name="disk/%d" % disk_index,
7456                               logical_id=(file_driver,
7457                                           "%s/disk%d" % (file_storage_dir,
7458                                                          disk_index)),
7459                               mode=disk[constants.IDISK_MODE])
7460       disks.append(disk_dev)
7461   elif template_name == constants.DT_BLOCK:
7462     if len(secondary_nodes) != 0:
7463       raise errors.ProgrammerError("Wrong template configuration")
7464
7465     for idx, disk in enumerate(disk_info):
7466       disk_index = idx + base_index
7467       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7468                               size=disk[constants.IDISK_SIZE],
7469                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7470                                           disk[constants.IDISK_ADOPT]),
7471                               iv_name="disk/%d" % disk_index,
7472                               mode=disk[constants.IDISK_MODE])
7473       disks.append(disk_dev)
7474
7475   else:
7476     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7477   return disks
7478
7479
7480 def _GetInstanceInfoText(instance):
7481   """Compute that text that should be added to the disk's metadata.
7482
7483   """
7484   return "originstname+%s" % instance.name
7485
7486
7487 def _CalcEta(time_taken, written, total_size):
7488   """Calculates the ETA based on size written and total size.
7489
7490   @param time_taken: The time taken so far
7491   @param written: amount written so far
7492   @param total_size: The total size of data to be written
7493   @return: The remaining time in seconds
7494
7495   """
7496   avg_time = time_taken / float(written)
7497   return (total_size - written) * avg_time
7498
7499
7500 def _WipeDisks(lu, instance):
7501   """Wipes instance disks.
7502
7503   @type lu: L{LogicalUnit}
7504   @param lu: the logical unit on whose behalf we execute
7505   @type instance: L{objects.Instance}
7506   @param instance: the instance whose disks we should create
7507   @return: the success of the wipe
7508
7509   """
7510   node = instance.primary_node
7511
7512   for device in instance.disks:
7513     lu.cfg.SetDiskID(device, node)
7514
7515   logging.info("Pause sync of instance %s disks", instance.name)
7516   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7517
7518   for idx, success in enumerate(result.payload):
7519     if not success:
7520       logging.warn("pause-sync of instance %s for disks %d failed",
7521                    instance.name, idx)
7522
7523   try:
7524     for idx, device in enumerate(instance.disks):
7525       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7526       # MAX_WIPE_CHUNK at max
7527       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7528                             constants.MIN_WIPE_CHUNK_PERCENT)
7529       # we _must_ make this an int, otherwise rounding errors will
7530       # occur
7531       wipe_chunk_size = int(wipe_chunk_size)
7532
7533       lu.LogInfo("* Wiping disk %d", idx)
7534       logging.info("Wiping disk %d for instance %s, node %s using"
7535                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7536
7537       offset = 0
7538       size = device.size
7539       last_output = 0
7540       start_time = time.time()
7541
7542       while offset < size:
7543         wipe_size = min(wipe_chunk_size, size - offset)
7544         logging.debug("Wiping disk %d, offset %s, chunk %s",
7545                       idx, offset, wipe_size)
7546         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7547         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7548                      (idx, offset, wipe_size))
7549         now = time.time()
7550         offset += wipe_size
7551         if now - last_output >= 60:
7552           eta = _CalcEta(now - start_time, offset, size)
7553           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7554                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7555           last_output = now
7556   finally:
7557     logging.info("Resume sync of instance %s disks", instance.name)
7558
7559     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7560
7561     for idx, success in enumerate(result.payload):
7562       if not success:
7563         lu.LogWarning("Resume sync of disk %d failed, please have a"
7564                       " look at the status and troubleshoot the issue", idx)
7565         logging.warn("resume-sync of instance %s for disks %d failed",
7566                      instance.name, idx)
7567
7568
7569 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7570   """Create all disks for an instance.
7571
7572   This abstracts away some work from AddInstance.
7573
7574   @type lu: L{LogicalUnit}
7575   @param lu: the logical unit on whose behalf we execute
7576   @type instance: L{objects.Instance}
7577   @param instance: the instance whose disks we should create
7578   @type to_skip: list
7579   @param to_skip: list of indices to skip
7580   @type target_node: string
7581   @param target_node: if passed, overrides the target node for creation
7582   @rtype: boolean
7583   @return: the success of the creation
7584
7585   """
7586   info = _GetInstanceInfoText(instance)
7587   if target_node is None:
7588     pnode = instance.primary_node
7589     all_nodes = instance.all_nodes
7590   else:
7591     pnode = target_node
7592     all_nodes = [pnode]
7593
7594   if instance.disk_template in constants.DTS_FILEBASED:
7595     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7596     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7597
7598     result.Raise("Failed to create directory '%s' on"
7599                  " node %s" % (file_storage_dir, pnode))
7600
7601   # Note: this needs to be kept in sync with adding of disks in
7602   # LUInstanceSetParams
7603   for idx, device in enumerate(instance.disks):
7604     if to_skip and idx in to_skip:
7605       continue
7606     logging.info("Creating volume %s for instance %s",
7607                  device.iv_name, instance.name)
7608     #HARDCODE
7609     for node in all_nodes:
7610       f_create = node == pnode
7611       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7612
7613
7614 def _RemoveDisks(lu, instance, target_node=None):
7615   """Remove all disks for an instance.
7616
7617   This abstracts away some work from `AddInstance()` and
7618   `RemoveInstance()`. Note that in case some of the devices couldn't
7619   be removed, the removal will continue with the other ones (compare
7620   with `_CreateDisks()`).
7621
7622   @type lu: L{LogicalUnit}
7623   @param lu: the logical unit on whose behalf we execute
7624   @type instance: L{objects.Instance}
7625   @param instance: the instance whose disks we should remove
7626   @type target_node: string
7627   @param target_node: used to override the node on which to remove the disks
7628   @rtype: boolean
7629   @return: the success of the removal
7630
7631   """
7632   logging.info("Removing block devices for instance %s", instance.name)
7633
7634   all_result = True
7635   for device in instance.disks:
7636     if target_node:
7637       edata = [(target_node, device)]
7638     else:
7639       edata = device.ComputeNodeTree(instance.primary_node)
7640     for node, disk in edata:
7641       lu.cfg.SetDiskID(disk, node)
7642       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7643       if msg:
7644         lu.LogWarning("Could not remove block device %s on node %s,"
7645                       " continuing anyway: %s", device.iv_name, node, msg)
7646         all_result = False
7647
7648   if instance.disk_template == constants.DT_FILE:
7649     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7650     if target_node:
7651       tgt = target_node
7652     else:
7653       tgt = instance.primary_node
7654     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7655     if result.fail_msg:
7656       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7657                     file_storage_dir, instance.primary_node, result.fail_msg)
7658       all_result = False
7659
7660   return all_result
7661
7662
7663 def _ComputeDiskSizePerVG(disk_template, disks):
7664   """Compute disk size requirements in the volume group
7665
7666   """
7667   def _compute(disks, payload):
7668     """Universal algorithm.
7669
7670     """
7671     vgs = {}
7672     for disk in disks:
7673       vgs[disk[constants.IDISK_VG]] = \
7674         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7675
7676     return vgs
7677
7678   # Required free disk space as a function of disk and swap space
7679   req_size_dict = {
7680     constants.DT_DISKLESS: {},
7681     constants.DT_PLAIN: _compute(disks, 0),
7682     # 128 MB are added for drbd metadata for each disk
7683     constants.DT_DRBD8: _compute(disks, 128),
7684     constants.DT_FILE: {},
7685     constants.DT_SHARED_FILE: {},
7686   }
7687
7688   if disk_template not in req_size_dict:
7689     raise errors.ProgrammerError("Disk template '%s' size requirement"
7690                                  " is unknown" %  disk_template)
7691
7692   return req_size_dict[disk_template]
7693
7694
7695 def _ComputeDiskSize(disk_template, disks):
7696   """Compute disk size requirements in the volume group
7697
7698   """
7699   # Required free disk space as a function of disk and swap space
7700   req_size_dict = {
7701     constants.DT_DISKLESS: None,
7702     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7703     # 128 MB are added for drbd metadata for each disk
7704     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7705     constants.DT_FILE: None,
7706     constants.DT_SHARED_FILE: 0,
7707     constants.DT_BLOCK: 0,
7708   }
7709
7710   if disk_template not in req_size_dict:
7711     raise errors.ProgrammerError("Disk template '%s' size requirement"
7712                                  " is unknown" %  disk_template)
7713
7714   return req_size_dict[disk_template]
7715
7716
7717 def _FilterVmNodes(lu, nodenames):
7718   """Filters out non-vm_capable nodes from a list.
7719
7720   @type lu: L{LogicalUnit}
7721   @param lu: the logical unit for which we check
7722   @type nodenames: list
7723   @param nodenames: the list of nodes on which we should check
7724   @rtype: list
7725   @return: the list of vm-capable nodes
7726
7727   """
7728   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7729   return [name for name in nodenames if name not in vm_nodes]
7730
7731
7732 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7733   """Hypervisor parameter validation.
7734
7735   This function abstract the hypervisor parameter validation to be
7736   used in both instance create and instance modify.
7737
7738   @type lu: L{LogicalUnit}
7739   @param lu: the logical unit for which we check
7740   @type nodenames: list
7741   @param nodenames: the list of nodes on which we should check
7742   @type hvname: string
7743   @param hvname: the name of the hypervisor we should use
7744   @type hvparams: dict
7745   @param hvparams: the parameters which we need to check
7746   @raise errors.OpPrereqError: if the parameters are not valid
7747
7748   """
7749   nodenames = _FilterVmNodes(lu, nodenames)
7750   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7751                                                   hvname,
7752                                                   hvparams)
7753   for node in nodenames:
7754     info = hvinfo[node]
7755     if info.offline:
7756       continue
7757     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7758
7759
7760 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7761   """OS parameters validation.
7762
7763   @type lu: L{LogicalUnit}
7764   @param lu: the logical unit for which we check
7765   @type required: boolean
7766   @param required: whether the validation should fail if the OS is not
7767       found
7768   @type nodenames: list
7769   @param nodenames: the list of nodes on which we should check
7770   @type osname: string
7771   @param osname: the name of the hypervisor we should use
7772   @type osparams: dict
7773   @param osparams: the parameters which we need to check
7774   @raise errors.OpPrereqError: if the parameters are not valid
7775
7776   """
7777   nodenames = _FilterVmNodes(lu, nodenames)
7778   result = lu.rpc.call_os_validate(required, nodenames, osname,
7779                                    [constants.OS_VALIDATE_PARAMETERS],
7780                                    osparams)
7781   for node, nres in result.items():
7782     # we don't check for offline cases since this should be run only
7783     # against the master node and/or an instance's nodes
7784     nres.Raise("OS Parameters validation failed on node %s" % node)
7785     if not nres.payload:
7786       lu.LogInfo("OS %s not found on node %s, validation skipped",
7787                  osname, node)
7788
7789
7790 class LUInstanceCreate(LogicalUnit):
7791   """Create an instance.
7792
7793   """
7794   HPATH = "instance-add"
7795   HTYPE = constants.HTYPE_INSTANCE
7796   REQ_BGL = False
7797
7798   def CheckArguments(self):
7799     """Check arguments.
7800
7801     """
7802     # do not require name_check to ease forward/backward compatibility
7803     # for tools
7804     if self.op.no_install and self.op.start:
7805       self.LogInfo("No-installation mode selected, disabling startup")
7806       self.op.start = False
7807     # validate/normalize the instance name
7808     self.op.instance_name = \
7809       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7810
7811     if self.op.ip_check and not self.op.name_check:
7812       # TODO: make the ip check more flexible and not depend on the name check
7813       raise errors.OpPrereqError("Cannot do IP address check without a name"
7814                                  " check", errors.ECODE_INVAL)
7815
7816     # check nics' parameter names
7817     for nic in self.op.nics:
7818       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7819
7820     # check disks. parameter names and consistent adopt/no-adopt strategy
7821     has_adopt = has_no_adopt = False
7822     for disk in self.op.disks:
7823       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7824       if constants.IDISK_ADOPT in disk:
7825         has_adopt = True
7826       else:
7827         has_no_adopt = True
7828     if has_adopt and has_no_adopt:
7829       raise errors.OpPrereqError("Either all disks are adopted or none is",
7830                                  errors.ECODE_INVAL)
7831     if has_adopt:
7832       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7833         raise errors.OpPrereqError("Disk adoption is not supported for the"
7834                                    " '%s' disk template" %
7835                                    self.op.disk_template,
7836                                    errors.ECODE_INVAL)
7837       if self.op.iallocator is not None:
7838         raise errors.OpPrereqError("Disk adoption not allowed with an"
7839                                    " iallocator script", errors.ECODE_INVAL)
7840       if self.op.mode == constants.INSTANCE_IMPORT:
7841         raise errors.OpPrereqError("Disk adoption not allowed for"
7842                                    " instance import", errors.ECODE_INVAL)
7843     else:
7844       if self.op.disk_template in constants.DTS_MUST_ADOPT:
7845         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7846                                    " but no 'adopt' parameter given" %
7847                                    self.op.disk_template,
7848                                    errors.ECODE_INVAL)
7849
7850     self.adopt_disks = has_adopt
7851
7852     # instance name verification
7853     if self.op.name_check:
7854       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7855       self.op.instance_name = self.hostname1.name
7856       # used in CheckPrereq for ip ping check
7857       self.check_ip = self.hostname1.ip
7858     else:
7859       self.check_ip = None
7860
7861     # file storage checks
7862     if (self.op.file_driver and
7863         not self.op.file_driver in constants.FILE_DRIVER):
7864       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7865                                  self.op.file_driver, errors.ECODE_INVAL)
7866
7867     if (self.op.disk_template == constants.DT_FILE and
7868         not constants.ENABLE_FILE_STORAGE):
7869       raise errors.OpPrereqError("File storage disabled")
7870     elif (self.op.disk_template == constants.DT_SHARED_FILE and
7871           not constants.ENABLE_SHARED_FILE_STORAGE):
7872       raise errors.OpPrereqError("Shared file storage disabled")
7873
7874     ### Node/iallocator related checks
7875     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7876
7877     if self.op.pnode is not None:
7878       if self.op.disk_template in constants.DTS_INT_MIRROR:
7879         if self.op.snode is None:
7880           raise errors.OpPrereqError("The networked disk templates need"
7881                                      " a mirror node", errors.ECODE_INVAL)
7882       elif self.op.snode:
7883         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7884                         " template")
7885         self.op.snode = None
7886
7887     self._cds = _GetClusterDomainSecret()
7888
7889     if self.op.mode == constants.INSTANCE_IMPORT:
7890       # On import force_variant must be True, because if we forced it at
7891       # initial install, our only chance when importing it back is that it
7892       # works again!
7893       self.op.force_variant = True
7894
7895       if self.op.no_install:
7896         self.LogInfo("No-installation mode has no effect during import")
7897
7898     elif self.op.mode == constants.INSTANCE_CREATE:
7899       if self.op.os_type is None:
7900         raise errors.OpPrereqError("No guest OS specified",
7901                                    errors.ECODE_INVAL)
7902       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7903         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7904                                    " installation" % self.op.os_type,
7905                                    errors.ECODE_STATE)
7906       if self.op.disk_template is None:
7907         raise errors.OpPrereqError("No disk template specified",
7908                                    errors.ECODE_INVAL)
7909
7910     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7911       # Check handshake to ensure both clusters have the same domain secret
7912       src_handshake = self.op.source_handshake
7913       if not src_handshake:
7914         raise errors.OpPrereqError("Missing source handshake",
7915                                    errors.ECODE_INVAL)
7916
7917       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7918                                                            src_handshake)
7919       if errmsg:
7920         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7921                                    errors.ECODE_INVAL)
7922
7923       # Load and check source CA
7924       self.source_x509_ca_pem = self.op.source_x509_ca
7925       if not self.source_x509_ca_pem:
7926         raise errors.OpPrereqError("Missing source X509 CA",
7927                                    errors.ECODE_INVAL)
7928
7929       try:
7930         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7931                                                     self._cds)
7932       except OpenSSL.crypto.Error, err:
7933         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7934                                    (err, ), errors.ECODE_INVAL)
7935
7936       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7937       if errcode is not None:
7938         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7939                                    errors.ECODE_INVAL)
7940
7941       self.source_x509_ca = cert
7942
7943       src_instance_name = self.op.source_instance_name
7944       if not src_instance_name:
7945         raise errors.OpPrereqError("Missing source instance name",
7946                                    errors.ECODE_INVAL)
7947
7948       self.source_instance_name = \
7949           netutils.GetHostname(name=src_instance_name).name
7950
7951     else:
7952       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7953                                  self.op.mode, errors.ECODE_INVAL)
7954
7955   def ExpandNames(self):
7956     """ExpandNames for CreateInstance.
7957
7958     Figure out the right locks for instance creation.
7959
7960     """
7961     self.needed_locks = {}
7962
7963     instance_name = self.op.instance_name
7964     # this is just a preventive check, but someone might still add this
7965     # instance in the meantime, and creation will fail at lock-add time
7966     if instance_name in self.cfg.GetInstanceList():
7967       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7968                                  instance_name, errors.ECODE_EXISTS)
7969
7970     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7971
7972     if self.op.iallocator:
7973       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7974     else:
7975       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7976       nodelist = [self.op.pnode]
7977       if self.op.snode is not None:
7978         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7979         nodelist.append(self.op.snode)
7980       self.needed_locks[locking.LEVEL_NODE] = nodelist
7981
7982     # in case of import lock the source node too
7983     if self.op.mode == constants.INSTANCE_IMPORT:
7984       src_node = self.op.src_node
7985       src_path = self.op.src_path
7986
7987       if src_path is None:
7988         self.op.src_path = src_path = self.op.instance_name
7989
7990       if src_node is None:
7991         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7992         self.op.src_node = None
7993         if os.path.isabs(src_path):
7994           raise errors.OpPrereqError("Importing an instance from an absolute"
7995                                      " path requires a source node option",
7996                                      errors.ECODE_INVAL)
7997       else:
7998         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7999         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8000           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8001         if not os.path.isabs(src_path):
8002           self.op.src_path = src_path = \
8003             utils.PathJoin(constants.EXPORT_DIR, src_path)
8004
8005   def _RunAllocator(self):
8006     """Run the allocator based on input opcode.
8007
8008     """
8009     nics = [n.ToDict() for n in self.nics]
8010     ial = IAllocator(self.cfg, self.rpc,
8011                      mode=constants.IALLOCATOR_MODE_ALLOC,
8012                      name=self.op.instance_name,
8013                      disk_template=self.op.disk_template,
8014                      tags=self.op.tags,
8015                      os=self.op.os_type,
8016                      vcpus=self.be_full[constants.BE_VCPUS],
8017                      memory=self.be_full[constants.BE_MEMORY],
8018                      disks=self.disks,
8019                      nics=nics,
8020                      hypervisor=self.op.hypervisor,
8021                      )
8022
8023     ial.Run(self.op.iallocator)
8024
8025     if not ial.success:
8026       raise errors.OpPrereqError("Can't compute nodes using"
8027                                  " iallocator '%s': %s" %
8028                                  (self.op.iallocator, ial.info),
8029                                  errors.ECODE_NORES)
8030     if len(ial.result) != ial.required_nodes:
8031       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8032                                  " of nodes (%s), required %s" %
8033                                  (self.op.iallocator, len(ial.result),
8034                                   ial.required_nodes), errors.ECODE_FAULT)
8035     self.op.pnode = ial.result[0]
8036     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8037                  self.op.instance_name, self.op.iallocator,
8038                  utils.CommaJoin(ial.result))
8039     if ial.required_nodes == 2:
8040       self.op.snode = ial.result[1]
8041
8042   def BuildHooksEnv(self):
8043     """Build hooks env.
8044
8045     This runs on master, primary and secondary nodes of the instance.
8046
8047     """
8048     env = {
8049       "ADD_MODE": self.op.mode,
8050       }
8051     if self.op.mode == constants.INSTANCE_IMPORT:
8052       env["SRC_NODE"] = self.op.src_node
8053       env["SRC_PATH"] = self.op.src_path
8054       env["SRC_IMAGES"] = self.src_images
8055
8056     env.update(_BuildInstanceHookEnv(
8057       name=self.op.instance_name,
8058       primary_node=self.op.pnode,
8059       secondary_nodes=self.secondaries,
8060       status=self.op.start,
8061       os_type=self.op.os_type,
8062       memory=self.be_full[constants.BE_MEMORY],
8063       vcpus=self.be_full[constants.BE_VCPUS],
8064       nics=_NICListToTuple(self, self.nics),
8065       disk_template=self.op.disk_template,
8066       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8067              for d in self.disks],
8068       bep=self.be_full,
8069       hvp=self.hv_full,
8070       hypervisor_name=self.op.hypervisor,
8071       tags=self.op.tags,
8072     ))
8073
8074     return env
8075
8076   def BuildHooksNodes(self):
8077     """Build hooks nodes.
8078
8079     """
8080     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8081     return nl, nl
8082
8083   def _ReadExportInfo(self):
8084     """Reads the export information from disk.
8085
8086     It will override the opcode source node and path with the actual
8087     information, if these two were not specified before.
8088
8089     @return: the export information
8090
8091     """
8092     assert self.op.mode == constants.INSTANCE_IMPORT
8093
8094     src_node = self.op.src_node
8095     src_path = self.op.src_path
8096
8097     if src_node is None:
8098       locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8099       exp_list = self.rpc.call_export_list(locked_nodes)
8100       found = False
8101       for node in exp_list:
8102         if exp_list[node].fail_msg:
8103           continue
8104         if src_path in exp_list[node].payload:
8105           found = True
8106           self.op.src_node = src_node = node
8107           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8108                                                        src_path)
8109           break
8110       if not found:
8111         raise errors.OpPrereqError("No export found for relative path %s" %
8112                                     src_path, errors.ECODE_INVAL)
8113
8114     _CheckNodeOnline(self, src_node)
8115     result = self.rpc.call_export_info(src_node, src_path)
8116     result.Raise("No export or invalid export found in dir %s" % src_path)
8117
8118     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8119     if not export_info.has_section(constants.INISECT_EXP):
8120       raise errors.ProgrammerError("Corrupted export config",
8121                                    errors.ECODE_ENVIRON)
8122
8123     ei_version = export_info.get(constants.INISECT_EXP, "version")
8124     if (int(ei_version) != constants.EXPORT_VERSION):
8125       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8126                                  (ei_version, constants.EXPORT_VERSION),
8127                                  errors.ECODE_ENVIRON)
8128     return export_info
8129
8130   def _ReadExportParams(self, einfo):
8131     """Use export parameters as defaults.
8132
8133     In case the opcode doesn't specify (as in override) some instance
8134     parameters, then try to use them from the export information, if
8135     that declares them.
8136
8137     """
8138     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8139
8140     if self.op.disk_template is None:
8141       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8142         self.op.disk_template = einfo.get(constants.INISECT_INS,
8143                                           "disk_template")
8144       else:
8145         raise errors.OpPrereqError("No disk template specified and the export"
8146                                    " is missing the disk_template information",
8147                                    errors.ECODE_INVAL)
8148
8149     if not self.op.disks:
8150       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8151         disks = []
8152         # TODO: import the disk iv_name too
8153         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8154           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8155           disks.append({constants.IDISK_SIZE: disk_sz})
8156         self.op.disks = disks
8157       else:
8158         raise errors.OpPrereqError("No disk info specified and the export"
8159                                    " is missing the disk information",
8160                                    errors.ECODE_INVAL)
8161
8162     if (not self.op.nics and
8163         einfo.has_option(constants.INISECT_INS, "nic_count")):
8164       nics = []
8165       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8166         ndict = {}
8167         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8168           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8169           ndict[name] = v
8170         nics.append(ndict)
8171       self.op.nics = nics
8172
8173     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8174       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8175
8176     if (self.op.hypervisor is None and
8177         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8178       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8179
8180     if einfo.has_section(constants.INISECT_HYP):
8181       # use the export parameters but do not override the ones
8182       # specified by the user
8183       for name, value in einfo.items(constants.INISECT_HYP):
8184         if name not in self.op.hvparams:
8185           self.op.hvparams[name] = value
8186
8187     if einfo.has_section(constants.INISECT_BEP):
8188       # use the parameters, without overriding
8189       for name, value in einfo.items(constants.INISECT_BEP):
8190         if name not in self.op.beparams:
8191           self.op.beparams[name] = value
8192     else:
8193       # try to read the parameters old style, from the main section
8194       for name in constants.BES_PARAMETERS:
8195         if (name not in self.op.beparams and
8196             einfo.has_option(constants.INISECT_INS, name)):
8197           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8198
8199     if einfo.has_section(constants.INISECT_OSP):
8200       # use the parameters, without overriding
8201       for name, value in einfo.items(constants.INISECT_OSP):
8202         if name not in self.op.osparams:
8203           self.op.osparams[name] = value
8204
8205   def _RevertToDefaults(self, cluster):
8206     """Revert the instance parameters to the default values.
8207
8208     """
8209     # hvparams
8210     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8211     for name in self.op.hvparams.keys():
8212       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8213         del self.op.hvparams[name]
8214     # beparams
8215     be_defs = cluster.SimpleFillBE({})
8216     for name in self.op.beparams.keys():
8217       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8218         del self.op.beparams[name]
8219     # nic params
8220     nic_defs = cluster.SimpleFillNIC({})
8221     for nic in self.op.nics:
8222       for name in constants.NICS_PARAMETERS:
8223         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8224           del nic[name]
8225     # osparams
8226     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8227     for name in self.op.osparams.keys():
8228       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8229         del self.op.osparams[name]
8230
8231   def _CalculateFileStorageDir(self):
8232     """Calculate final instance file storage dir.
8233
8234     """
8235     # file storage dir calculation/check
8236     self.instance_file_storage_dir = None
8237     if self.op.disk_template in constants.DTS_FILEBASED:
8238       # build the full file storage dir path
8239       joinargs = []
8240
8241       if self.op.disk_template == constants.DT_SHARED_FILE:
8242         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8243       else:
8244         get_fsd_fn = self.cfg.GetFileStorageDir
8245
8246       cfg_storagedir = get_fsd_fn()
8247       if not cfg_storagedir:
8248         raise errors.OpPrereqError("Cluster file storage dir not defined")
8249       joinargs.append(cfg_storagedir)
8250
8251       if self.op.file_storage_dir is not None:
8252         joinargs.append(self.op.file_storage_dir)
8253
8254       joinargs.append(self.op.instance_name)
8255
8256       # pylint: disable-msg=W0142
8257       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8258
8259   def CheckPrereq(self):
8260     """Check prerequisites.
8261
8262     """
8263     self._CalculateFileStorageDir()
8264
8265     if self.op.mode == constants.INSTANCE_IMPORT:
8266       export_info = self._ReadExportInfo()
8267       self._ReadExportParams(export_info)
8268
8269     if (not self.cfg.GetVGName() and
8270         self.op.disk_template not in constants.DTS_NOT_LVM):
8271       raise errors.OpPrereqError("Cluster does not support lvm-based"
8272                                  " instances", errors.ECODE_STATE)
8273
8274     if self.op.hypervisor is None:
8275       self.op.hypervisor = self.cfg.GetHypervisorType()
8276
8277     cluster = self.cfg.GetClusterInfo()
8278     enabled_hvs = cluster.enabled_hypervisors
8279     if self.op.hypervisor not in enabled_hvs:
8280       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8281                                  " cluster (%s)" % (self.op.hypervisor,
8282                                   ",".join(enabled_hvs)),
8283                                  errors.ECODE_STATE)
8284
8285     # Check tag validity
8286     for tag in self.op.tags:
8287       objects.TaggableObject.ValidateTag(tag)
8288
8289     # check hypervisor parameter syntax (locally)
8290     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8291     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8292                                       self.op.hvparams)
8293     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8294     hv_type.CheckParameterSyntax(filled_hvp)
8295     self.hv_full = filled_hvp
8296     # check that we don't specify global parameters on an instance
8297     _CheckGlobalHvParams(self.op.hvparams)
8298
8299     # fill and remember the beparams dict
8300     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8301     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8302
8303     # build os parameters
8304     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8305
8306     # now that hvp/bep are in final format, let's reset to defaults,
8307     # if told to do so
8308     if self.op.identify_defaults:
8309       self._RevertToDefaults(cluster)
8310
8311     # NIC buildup
8312     self.nics = []
8313     for idx, nic in enumerate(self.op.nics):
8314       nic_mode_req = nic.get(constants.INIC_MODE, None)
8315       nic_mode = nic_mode_req
8316       if nic_mode is None:
8317         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8318
8319       # in routed mode, for the first nic, the default ip is 'auto'
8320       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8321         default_ip_mode = constants.VALUE_AUTO
8322       else:
8323         default_ip_mode = constants.VALUE_NONE
8324
8325       # ip validity checks
8326       ip = nic.get(constants.INIC_IP, default_ip_mode)
8327       if ip is None or ip.lower() == constants.VALUE_NONE:
8328         nic_ip = None
8329       elif ip.lower() == constants.VALUE_AUTO:
8330         if not self.op.name_check:
8331           raise errors.OpPrereqError("IP address set to auto but name checks"
8332                                      " have been skipped",
8333                                      errors.ECODE_INVAL)
8334         nic_ip = self.hostname1.ip
8335       else:
8336         if not netutils.IPAddress.IsValid(ip):
8337           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8338                                      errors.ECODE_INVAL)
8339         nic_ip = ip
8340
8341       # TODO: check the ip address for uniqueness
8342       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8343         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8344                                    errors.ECODE_INVAL)
8345
8346       # MAC address verification
8347       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8348       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8349         mac = utils.NormalizeAndValidateMac(mac)
8350
8351         try:
8352           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8353         except errors.ReservationError:
8354           raise errors.OpPrereqError("MAC address %s already in use"
8355                                      " in cluster" % mac,
8356                                      errors.ECODE_NOTUNIQUE)
8357
8358       #  Build nic parameters
8359       link = nic.get(constants.INIC_LINK, None)
8360       nicparams = {}
8361       if nic_mode_req:
8362         nicparams[constants.NIC_MODE] = nic_mode_req
8363       if link:
8364         nicparams[constants.NIC_LINK] = link
8365
8366       check_params = cluster.SimpleFillNIC(nicparams)
8367       objects.NIC.CheckParameterSyntax(check_params)
8368       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8369
8370     # disk checks/pre-build
8371     default_vg = self.cfg.GetVGName()
8372     self.disks = []
8373     for disk in self.op.disks:
8374       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8375       if mode not in constants.DISK_ACCESS_SET:
8376         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8377                                    mode, errors.ECODE_INVAL)
8378       size = disk.get(constants.IDISK_SIZE, None)
8379       if size is None:
8380         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8381       try:
8382         size = int(size)
8383       except (TypeError, ValueError):
8384         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8385                                    errors.ECODE_INVAL)
8386
8387       data_vg = disk.get(constants.IDISK_VG, default_vg)
8388       new_disk = {
8389         constants.IDISK_SIZE: size,
8390         constants.IDISK_MODE: mode,
8391         constants.IDISK_VG: data_vg,
8392         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8393         }
8394       if constants.IDISK_ADOPT in disk:
8395         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8396       self.disks.append(new_disk)
8397
8398     if self.op.mode == constants.INSTANCE_IMPORT:
8399
8400       # Check that the new instance doesn't have less disks than the export
8401       instance_disks = len(self.disks)
8402       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8403       if instance_disks < export_disks:
8404         raise errors.OpPrereqError("Not enough disks to import."
8405                                    " (instance: %d, export: %d)" %
8406                                    (instance_disks, export_disks),
8407                                    errors.ECODE_INVAL)
8408
8409       disk_images = []
8410       for idx in range(export_disks):
8411         option = 'disk%d_dump' % idx
8412         if export_info.has_option(constants.INISECT_INS, option):
8413           # FIXME: are the old os-es, disk sizes, etc. useful?
8414           export_name = export_info.get(constants.INISECT_INS, option)
8415           image = utils.PathJoin(self.op.src_path, export_name)
8416           disk_images.append(image)
8417         else:
8418           disk_images.append(False)
8419
8420       self.src_images = disk_images
8421
8422       old_name = export_info.get(constants.INISECT_INS, 'name')
8423       try:
8424         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8425       except (TypeError, ValueError), err:
8426         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8427                                    " an integer: %s" % str(err),
8428                                    errors.ECODE_STATE)
8429       if self.op.instance_name == old_name:
8430         for idx, nic in enumerate(self.nics):
8431           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8432             nic_mac_ini = 'nic%d_mac' % idx
8433             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8434
8435     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8436
8437     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8438     if self.op.ip_check:
8439       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8440         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8441                                    (self.check_ip, self.op.instance_name),
8442                                    errors.ECODE_NOTUNIQUE)
8443
8444     #### mac address generation
8445     # By generating here the mac address both the allocator and the hooks get
8446     # the real final mac address rather than the 'auto' or 'generate' value.
8447     # There is a race condition between the generation and the instance object
8448     # creation, which means that we know the mac is valid now, but we're not
8449     # sure it will be when we actually add the instance. If things go bad
8450     # adding the instance will abort because of a duplicate mac, and the
8451     # creation job will fail.
8452     for nic in self.nics:
8453       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8454         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8455
8456     #### allocator run
8457
8458     if self.op.iallocator is not None:
8459       self._RunAllocator()
8460
8461     #### node related checks
8462
8463     # check primary node
8464     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8465     assert self.pnode is not None, \
8466       "Cannot retrieve locked node %s" % self.op.pnode
8467     if pnode.offline:
8468       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8469                                  pnode.name, errors.ECODE_STATE)
8470     if pnode.drained:
8471       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8472                                  pnode.name, errors.ECODE_STATE)
8473     if not pnode.vm_capable:
8474       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8475                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8476
8477     self.secondaries = []
8478
8479     # mirror node verification
8480     if self.op.disk_template in constants.DTS_INT_MIRROR:
8481       if self.op.snode == pnode.name:
8482         raise errors.OpPrereqError("The secondary node cannot be the"
8483                                    " primary node", errors.ECODE_INVAL)
8484       _CheckNodeOnline(self, self.op.snode)
8485       _CheckNodeNotDrained(self, self.op.snode)
8486       _CheckNodeVmCapable(self, self.op.snode)
8487       self.secondaries.append(self.op.snode)
8488
8489     nodenames = [pnode.name] + self.secondaries
8490
8491     if not self.adopt_disks:
8492       # Check lv size requirements, if not adopting
8493       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8494       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8495
8496     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8497       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8498                                 disk[constants.IDISK_ADOPT])
8499                      for disk in self.disks])
8500       if len(all_lvs) != len(self.disks):
8501         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8502                                    errors.ECODE_INVAL)
8503       for lv_name in all_lvs:
8504         try:
8505           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8506           # to ReserveLV uses the same syntax
8507           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8508         except errors.ReservationError:
8509           raise errors.OpPrereqError("LV named %s used by another instance" %
8510                                      lv_name, errors.ECODE_NOTUNIQUE)
8511
8512       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8513       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8514
8515       node_lvs = self.rpc.call_lv_list([pnode.name],
8516                                        vg_names.payload.keys())[pnode.name]
8517       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8518       node_lvs = node_lvs.payload
8519
8520       delta = all_lvs.difference(node_lvs.keys())
8521       if delta:
8522         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8523                                    utils.CommaJoin(delta),
8524                                    errors.ECODE_INVAL)
8525       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8526       if online_lvs:
8527         raise errors.OpPrereqError("Online logical volumes found, cannot"
8528                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8529                                    errors.ECODE_STATE)
8530       # update the size of disk based on what is found
8531       for dsk in self.disks:
8532         dsk[constants.IDISK_SIZE] = \
8533           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8534                                         dsk[constants.IDISK_ADOPT])][0]))
8535
8536     elif self.op.disk_template == constants.DT_BLOCK:
8537       # Normalize and de-duplicate device paths
8538       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8539                        for disk in self.disks])
8540       if len(all_disks) != len(self.disks):
8541         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8542                                    errors.ECODE_INVAL)
8543       baddisks = [d for d in all_disks
8544                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8545       if baddisks:
8546         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8547                                    " cannot be adopted" %
8548                                    (", ".join(baddisks),
8549                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8550                                    errors.ECODE_INVAL)
8551
8552       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8553                                             list(all_disks))[pnode.name]
8554       node_disks.Raise("Cannot get block device information from node %s" %
8555                        pnode.name)
8556       node_disks = node_disks.payload
8557       delta = all_disks.difference(node_disks.keys())
8558       if delta:
8559         raise errors.OpPrereqError("Missing block device(s): %s" %
8560                                    utils.CommaJoin(delta),
8561                                    errors.ECODE_INVAL)
8562       for dsk in self.disks:
8563         dsk[constants.IDISK_SIZE] = \
8564           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8565
8566     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8567
8568     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8569     # check OS parameters (remotely)
8570     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8571
8572     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8573
8574     # memory check on primary node
8575     if self.op.start:
8576       _CheckNodeFreeMemory(self, self.pnode.name,
8577                            "creating instance %s" % self.op.instance_name,
8578                            self.be_full[constants.BE_MEMORY],
8579                            self.op.hypervisor)
8580
8581     self.dry_run_result = list(nodenames)
8582
8583   def Exec(self, feedback_fn):
8584     """Create and add the instance to the cluster.
8585
8586     """
8587     instance = self.op.instance_name
8588     pnode_name = self.pnode.name
8589
8590     ht_kind = self.op.hypervisor
8591     if ht_kind in constants.HTS_REQ_PORT:
8592       network_port = self.cfg.AllocatePort()
8593     else:
8594       network_port = None
8595
8596     disks = _GenerateDiskTemplate(self,
8597                                   self.op.disk_template,
8598                                   instance, pnode_name,
8599                                   self.secondaries,
8600                                   self.disks,
8601                                   self.instance_file_storage_dir,
8602                                   self.op.file_driver,
8603                                   0,
8604                                   feedback_fn)
8605
8606     iobj = objects.Instance(name=instance, os=self.op.os_type,
8607                             primary_node=pnode_name,
8608                             nics=self.nics, disks=disks,
8609                             disk_template=self.op.disk_template,
8610                             admin_up=False,
8611                             network_port=network_port,
8612                             beparams=self.op.beparams,
8613                             hvparams=self.op.hvparams,
8614                             hypervisor=self.op.hypervisor,
8615                             osparams=self.op.osparams,
8616                             )
8617
8618     if self.op.tags:
8619       for tag in self.op.tags:
8620         iobj.AddTag(tag)
8621
8622     if self.adopt_disks:
8623       if self.op.disk_template == constants.DT_PLAIN:
8624         # rename LVs to the newly-generated names; we need to construct
8625         # 'fake' LV disks with the old data, plus the new unique_id
8626         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8627         rename_to = []
8628         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8629           rename_to.append(t_dsk.logical_id)
8630           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8631           self.cfg.SetDiskID(t_dsk, pnode_name)
8632         result = self.rpc.call_blockdev_rename(pnode_name,
8633                                                zip(tmp_disks, rename_to))
8634         result.Raise("Failed to rename adoped LVs")
8635     else:
8636       feedback_fn("* creating instance disks...")
8637       try:
8638         _CreateDisks(self, iobj)
8639       except errors.OpExecError:
8640         self.LogWarning("Device creation failed, reverting...")
8641         try:
8642           _RemoveDisks(self, iobj)
8643         finally:
8644           self.cfg.ReleaseDRBDMinors(instance)
8645           raise
8646
8647     feedback_fn("adding instance %s to cluster config" % instance)
8648
8649     self.cfg.AddInstance(iobj, self.proc.GetECId())
8650
8651     # Declare that we don't want to remove the instance lock anymore, as we've
8652     # added the instance to the config
8653     del self.remove_locks[locking.LEVEL_INSTANCE]
8654
8655     if self.op.mode == constants.INSTANCE_IMPORT:
8656       # Release unused nodes
8657       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8658     else:
8659       # Release all nodes
8660       _ReleaseLocks(self, locking.LEVEL_NODE)
8661
8662     disk_abort = False
8663     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8664       feedback_fn("* wiping instance disks...")
8665       try:
8666         _WipeDisks(self, iobj)
8667       except errors.OpExecError, err:
8668         logging.exception("Wiping disks failed")
8669         self.LogWarning("Wiping instance disks failed (%s)", err)
8670         disk_abort = True
8671
8672     if disk_abort:
8673       # Something is already wrong with the disks, don't do anything else
8674       pass
8675     elif self.op.wait_for_sync:
8676       disk_abort = not _WaitForSync(self, iobj)
8677     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8678       # make sure the disks are not degraded (still sync-ing is ok)
8679       time.sleep(15)
8680       feedback_fn("* checking mirrors status")
8681       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8682     else:
8683       disk_abort = False
8684
8685     if disk_abort:
8686       _RemoveDisks(self, iobj)
8687       self.cfg.RemoveInstance(iobj.name)
8688       # Make sure the instance lock gets removed
8689       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8690       raise errors.OpExecError("There are some degraded disks for"
8691                                " this instance")
8692
8693     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8694       if self.op.mode == constants.INSTANCE_CREATE:
8695         if not self.op.no_install:
8696           feedback_fn("* running the instance OS create scripts...")
8697           # FIXME: pass debug option from opcode to backend
8698           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8699                                                  self.op.debug_level)
8700           result.Raise("Could not add os for instance %s"
8701                        " on node %s" % (instance, pnode_name))
8702
8703       elif self.op.mode == constants.INSTANCE_IMPORT:
8704         feedback_fn("* running the instance OS import scripts...")
8705
8706         transfers = []
8707
8708         for idx, image in enumerate(self.src_images):
8709           if not image:
8710             continue
8711
8712           # FIXME: pass debug option from opcode to backend
8713           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8714                                              constants.IEIO_FILE, (image, ),
8715                                              constants.IEIO_SCRIPT,
8716                                              (iobj.disks[idx], idx),
8717                                              None)
8718           transfers.append(dt)
8719
8720         import_result = \
8721           masterd.instance.TransferInstanceData(self, feedback_fn,
8722                                                 self.op.src_node, pnode_name,
8723                                                 self.pnode.secondary_ip,
8724                                                 iobj, transfers)
8725         if not compat.all(import_result):
8726           self.LogWarning("Some disks for instance %s on node %s were not"
8727                           " imported successfully" % (instance, pnode_name))
8728
8729       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8730         feedback_fn("* preparing remote import...")
8731         # The source cluster will stop the instance before attempting to make a
8732         # connection. In some cases stopping an instance can take a long time,
8733         # hence the shutdown timeout is added to the connection timeout.
8734         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8735                            self.op.source_shutdown_timeout)
8736         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8737
8738         assert iobj.primary_node == self.pnode.name
8739         disk_results = \
8740           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8741                                         self.source_x509_ca,
8742                                         self._cds, timeouts)
8743         if not compat.all(disk_results):
8744           # TODO: Should the instance still be started, even if some disks
8745           # failed to import (valid for local imports, too)?
8746           self.LogWarning("Some disks for instance %s on node %s were not"
8747                           " imported successfully" % (instance, pnode_name))
8748
8749         # Run rename script on newly imported instance
8750         assert iobj.name == instance
8751         feedback_fn("Running rename script for %s" % instance)
8752         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8753                                                    self.source_instance_name,
8754                                                    self.op.debug_level)
8755         if result.fail_msg:
8756           self.LogWarning("Failed to run rename script for %s on node"
8757                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8758
8759       else:
8760         # also checked in the prereq part
8761         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8762                                      % self.op.mode)
8763
8764     if self.op.start:
8765       iobj.admin_up = True
8766       self.cfg.Update(iobj, feedback_fn)
8767       logging.info("Starting instance %s on node %s", instance, pnode_name)
8768       feedback_fn("* starting instance...")
8769       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8770       result.Raise("Could not start instance")
8771
8772     return list(iobj.all_nodes)
8773
8774
8775 class LUInstanceConsole(NoHooksLU):
8776   """Connect to an instance's console.
8777
8778   This is somewhat special in that it returns the command line that
8779   you need to run on the master node in order to connect to the
8780   console.
8781
8782   """
8783   REQ_BGL = False
8784
8785   def ExpandNames(self):
8786     self._ExpandAndLockInstance()
8787
8788   def CheckPrereq(self):
8789     """Check prerequisites.
8790
8791     This checks that the instance is in the cluster.
8792
8793     """
8794     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8795     assert self.instance is not None, \
8796       "Cannot retrieve locked instance %s" % self.op.instance_name
8797     _CheckNodeOnline(self, self.instance.primary_node)
8798
8799   def Exec(self, feedback_fn):
8800     """Connect to the console of an instance
8801
8802     """
8803     instance = self.instance
8804     node = instance.primary_node
8805
8806     node_insts = self.rpc.call_instance_list([node],
8807                                              [instance.hypervisor])[node]
8808     node_insts.Raise("Can't get node information from %s" % node)
8809
8810     if instance.name not in node_insts.payload:
8811       if instance.admin_up:
8812         state = constants.INSTST_ERRORDOWN
8813       else:
8814         state = constants.INSTST_ADMINDOWN
8815       raise errors.OpExecError("Instance %s is not running (state %s)" %
8816                                (instance.name, state))
8817
8818     logging.debug("Connecting to console of %s on %s", instance.name, node)
8819
8820     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8821
8822
8823 def _GetInstanceConsole(cluster, instance):
8824   """Returns console information for an instance.
8825
8826   @type cluster: L{objects.Cluster}
8827   @type instance: L{objects.Instance}
8828   @rtype: dict
8829
8830   """
8831   hyper = hypervisor.GetHypervisor(instance.hypervisor)
8832   # beparams and hvparams are passed separately, to avoid editing the
8833   # instance and then saving the defaults in the instance itself.
8834   hvparams = cluster.FillHV(instance)
8835   beparams = cluster.FillBE(instance)
8836   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8837
8838   assert console.instance == instance.name
8839   assert console.Validate()
8840
8841   return console.ToDict()
8842
8843
8844 class LUInstanceReplaceDisks(LogicalUnit):
8845   """Replace the disks of an instance.
8846
8847   """
8848   HPATH = "mirrors-replace"
8849   HTYPE = constants.HTYPE_INSTANCE
8850   REQ_BGL = False
8851
8852   def CheckArguments(self):
8853     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8854                                   self.op.iallocator)
8855
8856   def ExpandNames(self):
8857     self._ExpandAndLockInstance()
8858
8859     assert locking.LEVEL_NODE not in self.needed_locks
8860     assert locking.LEVEL_NODEGROUP not in self.needed_locks
8861
8862     assert self.op.iallocator is None or self.op.remote_node is None, \
8863       "Conflicting options"
8864
8865     if self.op.remote_node is not None:
8866       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8867
8868       # Warning: do not remove the locking of the new secondary here
8869       # unless DRBD8.AddChildren is changed to work in parallel;
8870       # currently it doesn't since parallel invocations of
8871       # FindUnusedMinor will conflict
8872       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8873       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8874     else:
8875       self.needed_locks[locking.LEVEL_NODE] = []
8876       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8877
8878       if self.op.iallocator is not None:
8879         # iallocator will select a new node in the same group
8880         self.needed_locks[locking.LEVEL_NODEGROUP] = []
8881
8882     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8883                                    self.op.iallocator, self.op.remote_node,
8884                                    self.op.disks, False, self.op.early_release)
8885
8886     self.tasklets = [self.replacer]
8887
8888   def DeclareLocks(self, level):
8889     if level == locking.LEVEL_NODEGROUP:
8890       assert self.op.remote_node is None
8891       assert self.op.iallocator is not None
8892       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8893
8894       self.share_locks[locking.LEVEL_NODEGROUP] = 1
8895       self.needed_locks[locking.LEVEL_NODEGROUP] = \
8896         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8897
8898     elif level == locking.LEVEL_NODE:
8899       if self.op.iallocator is not None:
8900         assert self.op.remote_node is None
8901         assert not self.needed_locks[locking.LEVEL_NODE]
8902
8903         # Lock member nodes of all locked groups
8904         self.needed_locks[locking.LEVEL_NODE] = [node_name
8905           for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8906           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8907       else:
8908         self._LockInstancesNodes()
8909
8910   def BuildHooksEnv(self):
8911     """Build hooks env.
8912
8913     This runs on the master, the primary and all the secondaries.
8914
8915     """
8916     instance = self.replacer.instance
8917     env = {
8918       "MODE": self.op.mode,
8919       "NEW_SECONDARY": self.op.remote_node,
8920       "OLD_SECONDARY": instance.secondary_nodes[0],
8921       }
8922     env.update(_BuildInstanceHookEnvByObject(self, instance))
8923     return env
8924
8925   def BuildHooksNodes(self):
8926     """Build hooks nodes.
8927
8928     """
8929     instance = self.replacer.instance
8930     nl = [
8931       self.cfg.GetMasterNode(),
8932       instance.primary_node,
8933       ]
8934     if self.op.remote_node is not None:
8935       nl.append(self.op.remote_node)
8936     return nl, nl
8937
8938   def CheckPrereq(self):
8939     """Check prerequisites.
8940
8941     """
8942     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8943             self.op.iallocator is None)
8944
8945     owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8946     if owned_groups:
8947       groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8948       if owned_groups != groups:
8949         raise errors.OpExecError("Node groups used by instance '%s' changed"
8950                                  " since lock was acquired, current list is %r,"
8951                                  " used to be '%s'" %
8952                                  (self.op.instance_name,
8953                                   utils.CommaJoin(groups),
8954                                   utils.CommaJoin(owned_groups)))
8955
8956     return LogicalUnit.CheckPrereq(self)
8957
8958
8959 class TLReplaceDisks(Tasklet):
8960   """Replaces disks for an instance.
8961
8962   Note: Locking is not within the scope of this class.
8963
8964   """
8965   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8966                disks, delay_iallocator, early_release):
8967     """Initializes this class.
8968
8969     """
8970     Tasklet.__init__(self, lu)
8971
8972     # Parameters
8973     self.instance_name = instance_name
8974     self.mode = mode
8975     self.iallocator_name = iallocator_name
8976     self.remote_node = remote_node
8977     self.disks = disks
8978     self.delay_iallocator = delay_iallocator
8979     self.early_release = early_release
8980
8981     # Runtime data
8982     self.instance = None
8983     self.new_node = None
8984     self.target_node = None
8985     self.other_node = None
8986     self.remote_node_info = None
8987     self.node_secondary_ip = None
8988
8989   @staticmethod
8990   def CheckArguments(mode, remote_node, iallocator):
8991     """Helper function for users of this class.
8992
8993     """
8994     # check for valid parameter combination
8995     if mode == constants.REPLACE_DISK_CHG:
8996       if remote_node is None and iallocator is None:
8997         raise errors.OpPrereqError("When changing the secondary either an"
8998                                    " iallocator script must be used or the"
8999                                    " new node given", errors.ECODE_INVAL)
9000
9001       if remote_node is not None and iallocator is not None:
9002         raise errors.OpPrereqError("Give either the iallocator or the new"
9003                                    " secondary, not both", errors.ECODE_INVAL)
9004
9005     elif remote_node is not None or iallocator is not None:
9006       # Not replacing the secondary
9007       raise errors.OpPrereqError("The iallocator and new node options can"
9008                                  " only be used when changing the"
9009                                  " secondary node", errors.ECODE_INVAL)
9010
9011   @staticmethod
9012   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9013     """Compute a new secondary node using an IAllocator.
9014
9015     """
9016     ial = IAllocator(lu.cfg, lu.rpc,
9017                      mode=constants.IALLOCATOR_MODE_RELOC,
9018                      name=instance_name,
9019                      relocate_from=relocate_from)
9020
9021     ial.Run(iallocator_name)
9022
9023     if not ial.success:
9024       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9025                                  " %s" % (iallocator_name, ial.info),
9026                                  errors.ECODE_NORES)
9027
9028     if len(ial.result) != ial.required_nodes:
9029       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9030                                  " of nodes (%s), required %s" %
9031                                  (iallocator_name,
9032                                   len(ial.result), ial.required_nodes),
9033                                  errors.ECODE_FAULT)
9034
9035     remote_node_name = ial.result[0]
9036
9037     lu.LogInfo("Selected new secondary for instance '%s': %s",
9038                instance_name, remote_node_name)
9039
9040     return remote_node_name
9041
9042   def _FindFaultyDisks(self, node_name):
9043     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9044                                     node_name, True)
9045
9046   def _CheckDisksActivated(self, instance):
9047     """Checks if the instance disks are activated.
9048
9049     @param instance: The instance to check disks
9050     @return: True if they are activated, False otherwise
9051
9052     """
9053     nodes = instance.all_nodes
9054
9055     for idx, dev in enumerate(instance.disks):
9056       for node in nodes:
9057         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9058         self.cfg.SetDiskID(dev, node)
9059
9060         result = self.rpc.call_blockdev_find(node, dev)
9061
9062         if result.offline:
9063           continue
9064         elif result.fail_msg or not result.payload:
9065           return False
9066
9067     return True
9068
9069   def CheckPrereq(self):
9070     """Check prerequisites.
9071
9072     This checks that the instance is in the cluster.
9073
9074     """
9075     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9076     assert instance is not None, \
9077       "Cannot retrieve locked instance %s" % self.instance_name
9078
9079     if instance.disk_template != constants.DT_DRBD8:
9080       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9081                                  " instances", errors.ECODE_INVAL)
9082
9083     if len(instance.secondary_nodes) != 1:
9084       raise errors.OpPrereqError("The instance has a strange layout,"
9085                                  " expected one secondary but found %d" %
9086                                  len(instance.secondary_nodes),
9087                                  errors.ECODE_FAULT)
9088
9089     if not self.delay_iallocator:
9090       self._CheckPrereq2()
9091
9092   def _CheckPrereq2(self):
9093     """Check prerequisites, second part.
9094
9095     This function should always be part of CheckPrereq. It was separated and is
9096     now called from Exec because during node evacuation iallocator was only
9097     called with an unmodified cluster model, not taking planned changes into
9098     account.
9099
9100     """
9101     instance = self.instance
9102     secondary_node = instance.secondary_nodes[0]
9103
9104     if self.iallocator_name is None:
9105       remote_node = self.remote_node
9106     else:
9107       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9108                                        instance.name, instance.secondary_nodes)
9109
9110     if remote_node is None:
9111       self.remote_node_info = None
9112     else:
9113       assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9114              "Remote node '%s' is not locked" % remote_node
9115
9116       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9117       assert self.remote_node_info is not None, \
9118         "Cannot retrieve locked node %s" % remote_node
9119
9120     if remote_node == self.instance.primary_node:
9121       raise errors.OpPrereqError("The specified node is the primary node of"
9122                                  " the instance", errors.ECODE_INVAL)
9123
9124     if remote_node == secondary_node:
9125       raise errors.OpPrereqError("The specified node is already the"
9126                                  " secondary node of the instance",
9127                                  errors.ECODE_INVAL)
9128
9129     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9130                                     constants.REPLACE_DISK_CHG):
9131       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9132                                  errors.ECODE_INVAL)
9133
9134     if self.mode == constants.REPLACE_DISK_AUTO:
9135       if not self._CheckDisksActivated(instance):
9136         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9137                                    " first" % self.instance_name,
9138                                    errors.ECODE_STATE)
9139       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9140       faulty_secondary = self._FindFaultyDisks(secondary_node)
9141
9142       if faulty_primary and faulty_secondary:
9143         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9144                                    " one node and can not be repaired"
9145                                    " automatically" % self.instance_name,
9146                                    errors.ECODE_STATE)
9147
9148       if faulty_primary:
9149         self.disks = faulty_primary
9150         self.target_node = instance.primary_node
9151         self.other_node = secondary_node
9152         check_nodes = [self.target_node, self.other_node]
9153       elif faulty_secondary:
9154         self.disks = faulty_secondary
9155         self.target_node = secondary_node
9156         self.other_node = instance.primary_node
9157         check_nodes = [self.target_node, self.other_node]
9158       else:
9159         self.disks = []
9160         check_nodes = []
9161
9162     else:
9163       # Non-automatic modes
9164       if self.mode == constants.REPLACE_DISK_PRI:
9165         self.target_node = instance.primary_node
9166         self.other_node = secondary_node
9167         check_nodes = [self.target_node, self.other_node]
9168
9169       elif self.mode == constants.REPLACE_DISK_SEC:
9170         self.target_node = secondary_node
9171         self.other_node = instance.primary_node
9172         check_nodes = [self.target_node, self.other_node]
9173
9174       elif self.mode == constants.REPLACE_DISK_CHG:
9175         self.new_node = remote_node
9176         self.other_node = instance.primary_node
9177         self.target_node = secondary_node
9178         check_nodes = [self.new_node, self.other_node]
9179
9180         _CheckNodeNotDrained(self.lu, remote_node)
9181         _CheckNodeVmCapable(self.lu, remote_node)
9182
9183         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9184         assert old_node_info is not None
9185         if old_node_info.offline and not self.early_release:
9186           # doesn't make sense to delay the release
9187           self.early_release = True
9188           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9189                           " early-release mode", secondary_node)
9190
9191       else:
9192         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9193                                      self.mode)
9194
9195       # If not specified all disks should be replaced
9196       if not self.disks:
9197         self.disks = range(len(self.instance.disks))
9198
9199     for node in check_nodes:
9200       _CheckNodeOnline(self.lu, node)
9201
9202     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9203                                                           self.other_node,
9204                                                           self.target_node]
9205                               if node_name is not None)
9206
9207     # Release unneeded node locks
9208     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9209
9210     # Release any owned node group
9211     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9212       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9213
9214     # Check whether disks are valid
9215     for disk_idx in self.disks:
9216       instance.FindDisk(disk_idx)
9217
9218     # Get secondary node IP addresses
9219     self.node_secondary_ip = \
9220       dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9221            for node_name in touched_nodes)
9222
9223   def Exec(self, feedback_fn):
9224     """Execute disk replacement.
9225
9226     This dispatches the disk replacement to the appropriate handler.
9227
9228     """
9229     if self.delay_iallocator:
9230       self._CheckPrereq2()
9231
9232     if __debug__:
9233       # Verify owned locks before starting operation
9234       owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9235       assert set(owned_locks) == set(self.node_secondary_ip), \
9236           ("Incorrect node locks, owning %s, expected %s" %
9237            (owned_locks, self.node_secondary_ip.keys()))
9238
9239       owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9240       assert list(owned_locks) == [self.instance_name], \
9241           "Instance '%s' not locked" % self.instance_name
9242
9243       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9244           "Should not own any node group lock at this point"
9245
9246     if not self.disks:
9247       feedback_fn("No disks need replacement")
9248       return
9249
9250     feedback_fn("Replacing disk(s) %s for %s" %
9251                 (utils.CommaJoin(self.disks), self.instance.name))
9252
9253     activate_disks = (not self.instance.admin_up)
9254
9255     # Activate the instance disks if we're replacing them on a down instance
9256     if activate_disks:
9257       _StartInstanceDisks(self.lu, self.instance, True)
9258
9259     try:
9260       # Should we replace the secondary node?
9261       if self.new_node is not None:
9262         fn = self._ExecDrbd8Secondary
9263       else:
9264         fn = self._ExecDrbd8DiskOnly
9265
9266       result = fn(feedback_fn)
9267     finally:
9268       # Deactivate the instance disks if we're replacing them on a
9269       # down instance
9270       if activate_disks:
9271         _SafeShutdownInstanceDisks(self.lu, self.instance)
9272
9273     if __debug__:
9274       # Verify owned locks
9275       owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9276       nodes = frozenset(self.node_secondary_ip)
9277       assert ((self.early_release and not owned_locks) or
9278               (not self.early_release and not (set(owned_locks) - nodes))), \
9279         ("Not owning the correct locks, early_release=%s, owned=%r,"
9280          " nodes=%r" % (self.early_release, owned_locks, nodes))
9281
9282     return result
9283
9284   def _CheckVolumeGroup(self, nodes):
9285     self.lu.LogInfo("Checking volume groups")
9286
9287     vgname = self.cfg.GetVGName()
9288
9289     # Make sure volume group exists on all involved nodes
9290     results = self.rpc.call_vg_list(nodes)
9291     if not results:
9292       raise errors.OpExecError("Can't list volume groups on the nodes")
9293
9294     for node in nodes:
9295       res = results[node]
9296       res.Raise("Error checking node %s" % node)
9297       if vgname not in res.payload:
9298         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9299                                  (vgname, node))
9300
9301   def _CheckDisksExistence(self, nodes):
9302     # Check disk existence
9303     for idx, dev in enumerate(self.instance.disks):
9304       if idx not in self.disks:
9305         continue
9306
9307       for node in nodes:
9308         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9309         self.cfg.SetDiskID(dev, node)
9310
9311         result = self.rpc.call_blockdev_find(node, dev)
9312
9313         msg = result.fail_msg
9314         if msg or not result.payload:
9315           if not msg:
9316             msg = "disk not found"
9317           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9318                                    (idx, node, msg))
9319
9320   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9321     for idx, dev in enumerate(self.instance.disks):
9322       if idx not in self.disks:
9323         continue
9324
9325       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9326                       (idx, node_name))
9327
9328       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9329                                    ldisk=ldisk):
9330         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9331                                  " replace disks for instance %s" %
9332                                  (node_name, self.instance.name))
9333
9334   def _CreateNewStorage(self, node_name):
9335     iv_names = {}
9336
9337     for idx, dev in enumerate(self.instance.disks):
9338       if idx not in self.disks:
9339         continue
9340
9341       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9342
9343       self.cfg.SetDiskID(dev, node_name)
9344
9345       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9346       names = _GenerateUniqueNames(self.lu, lv_names)
9347
9348       vg_data = dev.children[0].logical_id[0]
9349       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9350                              logical_id=(vg_data, names[0]))
9351       vg_meta = dev.children[1].logical_id[0]
9352       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9353                              logical_id=(vg_meta, names[1]))
9354
9355       new_lvs = [lv_data, lv_meta]
9356       old_lvs = dev.children
9357       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9358
9359       # we pass force_create=True to force the LVM creation
9360       for new_lv in new_lvs:
9361         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9362                         _GetInstanceInfoText(self.instance), False)
9363
9364     return iv_names
9365
9366   def _CheckDevices(self, node_name, iv_names):
9367     for name, (dev, _, _) in iv_names.iteritems():
9368       self.cfg.SetDiskID(dev, node_name)
9369
9370       result = self.rpc.call_blockdev_find(node_name, dev)
9371
9372       msg = result.fail_msg
9373       if msg or not result.payload:
9374         if not msg:
9375           msg = "disk not found"
9376         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9377                                  (name, msg))
9378
9379       if result.payload.is_degraded:
9380         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9381
9382   def _RemoveOldStorage(self, node_name, iv_names):
9383     for name, (_, old_lvs, _) in iv_names.iteritems():
9384       self.lu.LogInfo("Remove logical volumes for %s" % name)
9385
9386       for lv in old_lvs:
9387         self.cfg.SetDiskID(lv, node_name)
9388
9389         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9390         if msg:
9391           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9392                              hint="remove unused LVs manually")
9393
9394   def _ExecDrbd8DiskOnly(self, feedback_fn):
9395     """Replace a disk on the primary or secondary for DRBD 8.
9396
9397     The algorithm for replace is quite complicated:
9398
9399       1. for each disk to be replaced:
9400
9401         1. create new LVs on the target node with unique names
9402         1. detach old LVs from the drbd device
9403         1. rename old LVs to name_replaced.<time_t>
9404         1. rename new LVs to old LVs
9405         1. attach the new LVs (with the old names now) to the drbd device
9406
9407       1. wait for sync across all devices
9408
9409       1. for each modified disk:
9410
9411         1. remove old LVs (which have the name name_replaces.<time_t>)
9412
9413     Failures are not very well handled.
9414
9415     """
9416     steps_total = 6
9417
9418     # Step: check device activation
9419     self.lu.LogStep(1, steps_total, "Check device existence")
9420     self._CheckDisksExistence([self.other_node, self.target_node])
9421     self._CheckVolumeGroup([self.target_node, self.other_node])
9422
9423     # Step: check other node consistency
9424     self.lu.LogStep(2, steps_total, "Check peer consistency")
9425     self._CheckDisksConsistency(self.other_node,
9426                                 self.other_node == self.instance.primary_node,
9427                                 False)
9428
9429     # Step: create new storage
9430     self.lu.LogStep(3, steps_total, "Allocate new storage")
9431     iv_names = self._CreateNewStorage(self.target_node)
9432
9433     # Step: for each lv, detach+rename*2+attach
9434     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9435     for dev, old_lvs, new_lvs in iv_names.itervalues():
9436       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9437
9438       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9439                                                      old_lvs)
9440       result.Raise("Can't detach drbd from local storage on node"
9441                    " %s for device %s" % (self.target_node, dev.iv_name))
9442       #dev.children = []
9443       #cfg.Update(instance)
9444
9445       # ok, we created the new LVs, so now we know we have the needed
9446       # storage; as such, we proceed on the target node to rename
9447       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9448       # using the assumption that logical_id == physical_id (which in
9449       # turn is the unique_id on that node)
9450
9451       # FIXME(iustin): use a better name for the replaced LVs
9452       temp_suffix = int(time.time())
9453       ren_fn = lambda d, suff: (d.physical_id[0],
9454                                 d.physical_id[1] + "_replaced-%s" % suff)
9455
9456       # Build the rename list based on what LVs exist on the node
9457       rename_old_to_new = []
9458       for to_ren in old_lvs:
9459         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9460         if not result.fail_msg and result.payload:
9461           # device exists
9462           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9463
9464       self.lu.LogInfo("Renaming the old LVs on the target node")
9465       result = self.rpc.call_blockdev_rename(self.target_node,
9466                                              rename_old_to_new)
9467       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9468
9469       # Now we rename the new LVs to the old LVs
9470       self.lu.LogInfo("Renaming the new LVs on the target node")
9471       rename_new_to_old = [(new, old.physical_id)
9472                            for old, new in zip(old_lvs, new_lvs)]
9473       result = self.rpc.call_blockdev_rename(self.target_node,
9474                                              rename_new_to_old)
9475       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9476
9477       for old, new in zip(old_lvs, new_lvs):
9478         new.logical_id = old.logical_id
9479         self.cfg.SetDiskID(new, self.target_node)
9480
9481       for disk in old_lvs:
9482         disk.logical_id = ren_fn(disk, temp_suffix)
9483         self.cfg.SetDiskID(disk, self.target_node)
9484
9485       # Now that the new lvs have the old name, we can add them to the device
9486       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9487       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9488                                                   new_lvs)
9489       msg = result.fail_msg
9490       if msg:
9491         for new_lv in new_lvs:
9492           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9493                                                new_lv).fail_msg
9494           if msg2:
9495             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9496                                hint=("cleanup manually the unused logical"
9497                                      "volumes"))
9498         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9499
9500       dev.children = new_lvs
9501
9502       self.cfg.Update(self.instance, feedback_fn)
9503
9504     cstep = 5
9505     if self.early_release:
9506       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9507       cstep += 1
9508       self._RemoveOldStorage(self.target_node, iv_names)
9509       # WARNING: we release both node locks here, do not do other RPCs
9510       # than WaitForSync to the primary node
9511       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9512                     names=[self.target_node, self.other_node])
9513
9514     # Wait for sync
9515     # This can fail as the old devices are degraded and _WaitForSync
9516     # does a combined result over all disks, so we don't check its return value
9517     self.lu.LogStep(cstep, steps_total, "Sync devices")
9518     cstep += 1
9519     _WaitForSync(self.lu, self.instance)
9520
9521     # Check all devices manually
9522     self._CheckDevices(self.instance.primary_node, iv_names)
9523
9524     # Step: remove old storage
9525     if not self.early_release:
9526       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9527       cstep += 1
9528       self._RemoveOldStorage(self.target_node, iv_names)
9529
9530   def _ExecDrbd8Secondary(self, feedback_fn):
9531     """Replace the secondary node for DRBD 8.
9532
9533     The algorithm for replace is quite complicated:
9534       - for all disks of the instance:
9535         - create new LVs on the new node with same names
9536         - shutdown the drbd device on the old secondary
9537         - disconnect the drbd network on the primary
9538         - create the drbd device on the new secondary
9539         - network attach the drbd on the primary, using an artifice:
9540           the drbd code for Attach() will connect to the network if it
9541           finds a device which is connected to the good local disks but
9542           not network enabled
9543       - wait for sync across all devices
9544       - remove all disks from the old secondary
9545
9546     Failures are not very well handled.
9547
9548     """
9549     steps_total = 6
9550
9551     # Step: check device activation
9552     self.lu.LogStep(1, steps_total, "Check device existence")
9553     self._CheckDisksExistence([self.instance.primary_node])
9554     self._CheckVolumeGroup([self.instance.primary_node])
9555
9556     # Step: check other node consistency
9557     self.lu.LogStep(2, steps_total, "Check peer consistency")
9558     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9559
9560     # Step: create new storage
9561     self.lu.LogStep(3, steps_total, "Allocate new storage")
9562     for idx, dev in enumerate(self.instance.disks):
9563       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9564                       (self.new_node, idx))
9565       # we pass force_create=True to force LVM creation
9566       for new_lv in dev.children:
9567         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9568                         _GetInstanceInfoText(self.instance), False)
9569
9570     # Step 4: dbrd minors and drbd setups changes
9571     # after this, we must manually remove the drbd minors on both the
9572     # error and the success paths
9573     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9574     minors = self.cfg.AllocateDRBDMinor([self.new_node
9575                                          for dev in self.instance.disks],
9576                                         self.instance.name)
9577     logging.debug("Allocated minors %r", minors)
9578
9579     iv_names = {}
9580     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9581       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9582                       (self.new_node, idx))
9583       # create new devices on new_node; note that we create two IDs:
9584       # one without port, so the drbd will be activated without
9585       # networking information on the new node at this stage, and one
9586       # with network, for the latter activation in step 4
9587       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9588       if self.instance.primary_node == o_node1:
9589         p_minor = o_minor1
9590       else:
9591         assert self.instance.primary_node == o_node2, "Three-node instance?"
9592         p_minor = o_minor2
9593
9594       new_alone_id = (self.instance.primary_node, self.new_node, None,
9595                       p_minor, new_minor, o_secret)
9596       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9597                     p_minor, new_minor, o_secret)
9598
9599       iv_names[idx] = (dev, dev.children, new_net_id)
9600       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9601                     new_net_id)
9602       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9603                               logical_id=new_alone_id,
9604                               children=dev.children,
9605                               size=dev.size)
9606       try:
9607         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9608                               _GetInstanceInfoText(self.instance), False)
9609       except errors.GenericError:
9610         self.cfg.ReleaseDRBDMinors(self.instance.name)
9611         raise
9612
9613     # We have new devices, shutdown the drbd on the old secondary
9614     for idx, dev in enumerate(self.instance.disks):
9615       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9616       self.cfg.SetDiskID(dev, self.target_node)
9617       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9618       if msg:
9619         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9620                            "node: %s" % (idx, msg),
9621                            hint=("Please cleanup this device manually as"
9622                                  " soon as possible"))
9623
9624     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9625     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9626                                                self.node_secondary_ip,
9627                                                self.instance.disks)\
9628                                               [self.instance.primary_node]
9629
9630     msg = result.fail_msg
9631     if msg:
9632       # detaches didn't succeed (unlikely)
9633       self.cfg.ReleaseDRBDMinors(self.instance.name)
9634       raise errors.OpExecError("Can't detach the disks from the network on"
9635                                " old node: %s" % (msg,))
9636
9637     # if we managed to detach at least one, we update all the disks of
9638     # the instance to point to the new secondary
9639     self.lu.LogInfo("Updating instance configuration")
9640     for dev, _, new_logical_id in iv_names.itervalues():
9641       dev.logical_id = new_logical_id
9642       self.cfg.SetDiskID(dev, self.instance.primary_node)
9643
9644     self.cfg.Update(self.instance, feedback_fn)
9645
9646     # and now perform the drbd attach
9647     self.lu.LogInfo("Attaching primary drbds to new secondary"
9648                     " (standalone => connected)")
9649     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9650                                             self.new_node],
9651                                            self.node_secondary_ip,
9652                                            self.instance.disks,
9653                                            self.instance.name,
9654                                            False)
9655     for to_node, to_result in result.items():
9656       msg = to_result.fail_msg
9657       if msg:
9658         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9659                            to_node, msg,
9660                            hint=("please do a gnt-instance info to see the"
9661                                  " status of disks"))
9662     cstep = 5
9663     if self.early_release:
9664       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9665       cstep += 1
9666       self._RemoveOldStorage(self.target_node, iv_names)
9667       # WARNING: we release all node locks here, do not do other RPCs
9668       # than WaitForSync to the primary node
9669       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9670                     names=[self.instance.primary_node,
9671                            self.target_node,
9672                            self.new_node])
9673
9674     # Wait for sync
9675     # This can fail as the old devices are degraded and _WaitForSync
9676     # does a combined result over all disks, so we don't check its return value
9677     self.lu.LogStep(cstep, steps_total, "Sync devices")
9678     cstep += 1
9679     _WaitForSync(self.lu, self.instance)
9680
9681     # Check all devices manually
9682     self._CheckDevices(self.instance.primary_node, iv_names)
9683
9684     # Step: remove old storage
9685     if not self.early_release:
9686       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9687       self._RemoveOldStorage(self.target_node, iv_names)
9688
9689
9690 class LURepairNodeStorage(NoHooksLU):
9691   """Repairs the volume group on a node.
9692
9693   """
9694   REQ_BGL = False
9695
9696   def CheckArguments(self):
9697     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9698
9699     storage_type = self.op.storage_type
9700
9701     if (constants.SO_FIX_CONSISTENCY not in
9702         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9703       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9704                                  " repaired" % storage_type,
9705                                  errors.ECODE_INVAL)
9706
9707   def ExpandNames(self):
9708     self.needed_locks = {
9709       locking.LEVEL_NODE: [self.op.node_name],
9710       }
9711
9712   def _CheckFaultyDisks(self, instance, node_name):
9713     """Ensure faulty disks abort the opcode or at least warn."""
9714     try:
9715       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9716                                   node_name, True):
9717         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9718                                    " node '%s'" % (instance.name, node_name),
9719                                    errors.ECODE_STATE)
9720     except errors.OpPrereqError, err:
9721       if self.op.ignore_consistency:
9722         self.proc.LogWarning(str(err.args[0]))
9723       else:
9724         raise
9725
9726   def CheckPrereq(self):
9727     """Check prerequisites.
9728
9729     """
9730     # Check whether any instance on this node has faulty disks
9731     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9732       if not inst.admin_up:
9733         continue
9734       check_nodes = set(inst.all_nodes)
9735       check_nodes.discard(self.op.node_name)
9736       for inst_node_name in check_nodes:
9737         self._CheckFaultyDisks(inst, inst_node_name)
9738
9739   def Exec(self, feedback_fn):
9740     feedback_fn("Repairing storage unit '%s' on %s ..." %
9741                 (self.op.name, self.op.node_name))
9742
9743     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9744     result = self.rpc.call_storage_execute(self.op.node_name,
9745                                            self.op.storage_type, st_args,
9746                                            self.op.name,
9747                                            constants.SO_FIX_CONSISTENCY)
9748     result.Raise("Failed to repair storage unit '%s' on %s" %
9749                  (self.op.name, self.op.node_name))
9750
9751
9752 class LUNodeEvacStrategy(NoHooksLU):
9753   """Computes the node evacuation strategy.
9754
9755   """
9756   REQ_BGL = False
9757
9758   def CheckArguments(self):
9759     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9760
9761   def ExpandNames(self):
9762     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9763     self.needed_locks = locks = {}
9764     if self.op.remote_node is None:
9765       locks[locking.LEVEL_NODE] = locking.ALL_SET
9766     else:
9767       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9768       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9769
9770   def Exec(self, feedback_fn):
9771     instances = []
9772     for node in self.op.nodes:
9773       instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9774     if not instances:
9775       return []
9776
9777     if self.op.remote_node is not None:
9778       result = []
9779       for i in instances:
9780         if i.primary_node == self.op.remote_node:
9781           raise errors.OpPrereqError("Node %s is the primary node of"
9782                                      " instance %s, cannot use it as"
9783                                      " secondary" %
9784                                      (self.op.remote_node, i.name),
9785                                      errors.ECODE_INVAL)
9786         result.append([i.name, self.op.remote_node])
9787     else:
9788       ial = IAllocator(self.cfg, self.rpc,
9789                        mode=constants.IALLOCATOR_MODE_MEVAC,
9790                        evac_nodes=self.op.nodes)
9791       ial.Run(self.op.iallocator, validate=True)
9792       if not ial.success:
9793         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9794                                  errors.ECODE_NORES)
9795       result = ial.result
9796     return result
9797
9798
9799 class LUInstanceGrowDisk(LogicalUnit):
9800   """Grow a disk of an instance.
9801
9802   """
9803   HPATH = "disk-grow"
9804   HTYPE = constants.HTYPE_INSTANCE
9805   REQ_BGL = False
9806
9807   def ExpandNames(self):
9808     self._ExpandAndLockInstance()
9809     self.needed_locks[locking.LEVEL_NODE] = []
9810     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9811
9812   def DeclareLocks(self, level):
9813     if level == locking.LEVEL_NODE:
9814       self._LockInstancesNodes()
9815
9816   def BuildHooksEnv(self):
9817     """Build hooks env.
9818
9819     This runs on the master, the primary and all the secondaries.
9820
9821     """
9822     env = {
9823       "DISK": self.op.disk,
9824       "AMOUNT": self.op.amount,
9825       }
9826     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9827     return env
9828
9829   def BuildHooksNodes(self):
9830     """Build hooks nodes.
9831
9832     """
9833     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9834     return (nl, nl)
9835
9836   def CheckPrereq(self):
9837     """Check prerequisites.
9838
9839     This checks that the instance is in the cluster.
9840
9841     """
9842     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9843     assert instance is not None, \
9844       "Cannot retrieve locked instance %s" % self.op.instance_name
9845     nodenames = list(instance.all_nodes)
9846     for node in nodenames:
9847       _CheckNodeOnline(self, node)
9848
9849     self.instance = instance
9850
9851     if instance.disk_template not in constants.DTS_GROWABLE:
9852       raise errors.OpPrereqError("Instance's disk layout does not support"
9853                                  " growing", errors.ECODE_INVAL)
9854
9855     self.disk = instance.FindDisk(self.op.disk)
9856
9857     if instance.disk_template not in (constants.DT_FILE,
9858                                       constants.DT_SHARED_FILE):
9859       # TODO: check the free disk space for file, when that feature will be
9860       # supported
9861       _CheckNodesFreeDiskPerVG(self, nodenames,
9862                                self.disk.ComputeGrowth(self.op.amount))
9863
9864   def Exec(self, feedback_fn):
9865     """Execute disk grow.
9866
9867     """
9868     instance = self.instance
9869     disk = self.disk
9870
9871     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9872     if not disks_ok:
9873       raise errors.OpExecError("Cannot activate block device to grow")
9874
9875     # First run all grow ops in dry-run mode
9876     for node in instance.all_nodes:
9877       self.cfg.SetDiskID(disk, node)
9878       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
9879       result.Raise("Grow request failed to node %s" % node)
9880
9881     # We know that (as far as we can test) operations across different
9882     # nodes will succeed, time to run it for real
9883     for node in instance.all_nodes:
9884       self.cfg.SetDiskID(disk, node)
9885       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
9886       result.Raise("Grow request failed to node %s" % node)
9887
9888       # TODO: Rewrite code to work properly
9889       # DRBD goes into sync mode for a short amount of time after executing the
9890       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9891       # calling "resize" in sync mode fails. Sleeping for a short amount of
9892       # time is a work-around.
9893       time.sleep(5)
9894
9895     disk.RecordGrow(self.op.amount)
9896     self.cfg.Update(instance, feedback_fn)
9897     if self.op.wait_for_sync:
9898       disk_abort = not _WaitForSync(self, instance, disks=[disk])
9899       if disk_abort:
9900         self.proc.LogWarning("Disk sync-ing has not returned a good"
9901                              " status; please check the instance")
9902       if not instance.admin_up:
9903         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9904     elif not instance.admin_up:
9905       self.proc.LogWarning("Not shutting down the disk even if the instance is"
9906                            " not supposed to be running because no wait for"
9907                            " sync mode was requested")
9908
9909
9910 class LUInstanceQueryData(NoHooksLU):
9911   """Query runtime instance data.
9912
9913   """
9914   REQ_BGL = False
9915
9916   def ExpandNames(self):
9917     self.needed_locks = {}
9918
9919     # Use locking if requested or when non-static information is wanted
9920     if not (self.op.static or self.op.use_locking):
9921       self.LogWarning("Non-static data requested, locks need to be acquired")
9922       self.op.use_locking = True
9923
9924     if self.op.instances or not self.op.use_locking:
9925       # Expand instance names right here
9926       self.wanted_names = _GetWantedInstances(self, self.op.instances)
9927     else:
9928       # Will use acquired locks
9929       self.wanted_names = None
9930
9931     if self.op.use_locking:
9932       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9933
9934       if self.wanted_names is None:
9935         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9936       else:
9937         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9938
9939       self.needed_locks[locking.LEVEL_NODE] = []
9940       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9941       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9942
9943   def DeclareLocks(self, level):
9944     if self.op.use_locking and level == locking.LEVEL_NODE:
9945       self._LockInstancesNodes()
9946
9947   def CheckPrereq(self):
9948     """Check prerequisites.
9949
9950     This only checks the optional instance list against the existing names.
9951
9952     """
9953     if self.wanted_names is None:
9954       assert self.op.use_locking, "Locking was not used"
9955       self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9956
9957     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9958                              for name in self.wanted_names]
9959
9960   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9961     """Returns the status of a block device
9962
9963     """
9964     if self.op.static or not node:
9965       return None
9966
9967     self.cfg.SetDiskID(dev, node)
9968
9969     result = self.rpc.call_blockdev_find(node, dev)
9970     if result.offline:
9971       return None
9972
9973     result.Raise("Can't compute disk status for %s" % instance_name)
9974
9975     status = result.payload
9976     if status is None:
9977       return None
9978
9979     return (status.dev_path, status.major, status.minor,
9980             status.sync_percent, status.estimated_time,
9981             status.is_degraded, status.ldisk_status)
9982
9983   def _ComputeDiskStatus(self, instance, snode, dev):
9984     """Compute block device status.
9985
9986     """
9987     if dev.dev_type in constants.LDS_DRBD:
9988       # we change the snode then (otherwise we use the one passed in)
9989       if dev.logical_id[0] == instance.primary_node:
9990         snode = dev.logical_id[1]
9991       else:
9992         snode = dev.logical_id[0]
9993
9994     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9995                                               instance.name, dev)
9996     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9997
9998     if dev.children:
9999       dev_children = [self._ComputeDiskStatus(instance, snode, child)
10000                       for child in dev.children]
10001     else:
10002       dev_children = []
10003
10004     return {
10005       "iv_name": dev.iv_name,
10006       "dev_type": dev.dev_type,
10007       "logical_id": dev.logical_id,
10008       "physical_id": dev.physical_id,
10009       "pstatus": dev_pstatus,
10010       "sstatus": dev_sstatus,
10011       "children": dev_children,
10012       "mode": dev.mode,
10013       "size": dev.size,
10014       }
10015
10016   def Exec(self, feedback_fn):
10017     """Gather and return data"""
10018     result = {}
10019
10020     cluster = self.cfg.GetClusterInfo()
10021
10022     for instance in self.wanted_instances:
10023       if not self.op.static:
10024         remote_info = self.rpc.call_instance_info(instance.primary_node,
10025                                                   instance.name,
10026                                                   instance.hypervisor)
10027         remote_info.Raise("Error checking node %s" % instance.primary_node)
10028         remote_info = remote_info.payload
10029         if remote_info and "state" in remote_info:
10030           remote_state = "up"
10031         else:
10032           remote_state = "down"
10033       else:
10034         remote_state = None
10035       if instance.admin_up:
10036         config_state = "up"
10037       else:
10038         config_state = "down"
10039
10040       disks = [self._ComputeDiskStatus(instance, None, device)
10041                for device in instance.disks]
10042
10043       result[instance.name] = {
10044         "name": instance.name,
10045         "config_state": config_state,
10046         "run_state": remote_state,
10047         "pnode": instance.primary_node,
10048         "snodes": instance.secondary_nodes,
10049         "os": instance.os,
10050         # this happens to be the same format used for hooks
10051         "nics": _NICListToTuple(self, instance.nics),
10052         "disk_template": instance.disk_template,
10053         "disks": disks,
10054         "hypervisor": instance.hypervisor,
10055         "network_port": instance.network_port,
10056         "hv_instance": instance.hvparams,
10057         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10058         "be_instance": instance.beparams,
10059         "be_actual": cluster.FillBE(instance),
10060         "os_instance": instance.osparams,
10061         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10062         "serial_no": instance.serial_no,
10063         "mtime": instance.mtime,
10064         "ctime": instance.ctime,
10065         "uuid": instance.uuid,
10066         }
10067
10068     return result
10069
10070
10071 class LUInstanceSetParams(LogicalUnit):
10072   """Modifies an instances's parameters.
10073
10074   """
10075   HPATH = "instance-modify"
10076   HTYPE = constants.HTYPE_INSTANCE
10077   REQ_BGL = False
10078
10079   def CheckArguments(self):
10080     if not (self.op.nics or self.op.disks or self.op.disk_template or
10081             self.op.hvparams or self.op.beparams or self.op.os_name):
10082       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10083
10084     if self.op.hvparams:
10085       _CheckGlobalHvParams(self.op.hvparams)
10086
10087     # Disk validation
10088     disk_addremove = 0
10089     for disk_op, disk_dict in self.op.disks:
10090       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10091       if disk_op == constants.DDM_REMOVE:
10092         disk_addremove += 1
10093         continue
10094       elif disk_op == constants.DDM_ADD:
10095         disk_addremove += 1
10096       else:
10097         if not isinstance(disk_op, int):
10098           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10099         if not isinstance(disk_dict, dict):
10100           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10101           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10102
10103       if disk_op == constants.DDM_ADD:
10104         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10105         if mode not in constants.DISK_ACCESS_SET:
10106           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10107                                      errors.ECODE_INVAL)
10108         size = disk_dict.get(constants.IDISK_SIZE, None)
10109         if size is None:
10110           raise errors.OpPrereqError("Required disk parameter size missing",
10111                                      errors.ECODE_INVAL)
10112         try:
10113           size = int(size)
10114         except (TypeError, ValueError), err:
10115           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10116                                      str(err), errors.ECODE_INVAL)
10117         disk_dict[constants.IDISK_SIZE] = size
10118       else:
10119         # modification of disk
10120         if constants.IDISK_SIZE in disk_dict:
10121           raise errors.OpPrereqError("Disk size change not possible, use"
10122                                      " grow-disk", errors.ECODE_INVAL)
10123
10124     if disk_addremove > 1:
10125       raise errors.OpPrereqError("Only one disk add or remove operation"
10126                                  " supported at a time", errors.ECODE_INVAL)
10127
10128     if self.op.disks and self.op.disk_template is not None:
10129       raise errors.OpPrereqError("Disk template conversion and other disk"
10130                                  " changes not supported at the same time",
10131                                  errors.ECODE_INVAL)
10132
10133     if (self.op.disk_template and
10134         self.op.disk_template in constants.DTS_INT_MIRROR and
10135         self.op.remote_node is None):
10136       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10137                                  " one requires specifying a secondary node",
10138                                  errors.ECODE_INVAL)
10139
10140     # NIC validation
10141     nic_addremove = 0
10142     for nic_op, nic_dict in self.op.nics:
10143       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10144       if nic_op == constants.DDM_REMOVE:
10145         nic_addremove += 1
10146         continue
10147       elif nic_op == constants.DDM_ADD:
10148         nic_addremove += 1
10149       else:
10150         if not isinstance(nic_op, int):
10151           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10152         if not isinstance(nic_dict, dict):
10153           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10154           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10155
10156       # nic_dict should be a dict
10157       nic_ip = nic_dict.get(constants.INIC_IP, None)
10158       if nic_ip is not None:
10159         if nic_ip.lower() == constants.VALUE_NONE:
10160           nic_dict[constants.INIC_IP] = None
10161         else:
10162           if not netutils.IPAddress.IsValid(nic_ip):
10163             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10164                                        errors.ECODE_INVAL)
10165
10166       nic_bridge = nic_dict.get('bridge', None)
10167       nic_link = nic_dict.get(constants.INIC_LINK, None)
10168       if nic_bridge and nic_link:
10169         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10170                                    " at the same time", errors.ECODE_INVAL)
10171       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10172         nic_dict['bridge'] = None
10173       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10174         nic_dict[constants.INIC_LINK] = None
10175
10176       if nic_op == constants.DDM_ADD:
10177         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10178         if nic_mac is None:
10179           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10180
10181       if constants.INIC_MAC in nic_dict:
10182         nic_mac = nic_dict[constants.INIC_MAC]
10183         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10184           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10185
10186         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10187           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10188                                      " modifying an existing nic",
10189                                      errors.ECODE_INVAL)
10190
10191     if nic_addremove > 1:
10192       raise errors.OpPrereqError("Only one NIC add or remove operation"
10193                                  " supported at a time", errors.ECODE_INVAL)
10194
10195   def ExpandNames(self):
10196     self._ExpandAndLockInstance()
10197     self.needed_locks[locking.LEVEL_NODE] = []
10198     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10199
10200   def DeclareLocks(self, level):
10201     if level == locking.LEVEL_NODE:
10202       self._LockInstancesNodes()
10203       if self.op.disk_template and self.op.remote_node:
10204         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10205         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10206
10207   def BuildHooksEnv(self):
10208     """Build hooks env.
10209
10210     This runs on the master, primary and secondaries.
10211
10212     """
10213     args = dict()
10214     if constants.BE_MEMORY in self.be_new:
10215       args['memory'] = self.be_new[constants.BE_MEMORY]
10216     if constants.BE_VCPUS in self.be_new:
10217       args['vcpus'] = self.be_new[constants.BE_VCPUS]
10218     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10219     # information at all.
10220     if self.op.nics:
10221       args['nics'] = []
10222       nic_override = dict(self.op.nics)
10223       for idx, nic in enumerate(self.instance.nics):
10224         if idx in nic_override:
10225           this_nic_override = nic_override[idx]
10226         else:
10227           this_nic_override = {}
10228         if constants.INIC_IP in this_nic_override:
10229           ip = this_nic_override[constants.INIC_IP]
10230         else:
10231           ip = nic.ip
10232         if constants.INIC_MAC in this_nic_override:
10233           mac = this_nic_override[constants.INIC_MAC]
10234         else:
10235           mac = nic.mac
10236         if idx in self.nic_pnew:
10237           nicparams = self.nic_pnew[idx]
10238         else:
10239           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10240         mode = nicparams[constants.NIC_MODE]
10241         link = nicparams[constants.NIC_LINK]
10242         args['nics'].append((ip, mac, mode, link))
10243       if constants.DDM_ADD in nic_override:
10244         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10245         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10246         nicparams = self.nic_pnew[constants.DDM_ADD]
10247         mode = nicparams[constants.NIC_MODE]
10248         link = nicparams[constants.NIC_LINK]
10249         args['nics'].append((ip, mac, mode, link))
10250       elif constants.DDM_REMOVE in nic_override:
10251         del args['nics'][-1]
10252
10253     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10254     if self.op.disk_template:
10255       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10256
10257     return env
10258
10259   def BuildHooksNodes(self):
10260     """Build hooks nodes.
10261
10262     """
10263     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10264     return (nl, nl)
10265
10266   def CheckPrereq(self):
10267     """Check prerequisites.
10268
10269     This only checks the instance list against the existing names.
10270
10271     """
10272     # checking the new params on the primary/secondary nodes
10273
10274     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10275     cluster = self.cluster = self.cfg.GetClusterInfo()
10276     assert self.instance is not None, \
10277       "Cannot retrieve locked instance %s" % self.op.instance_name
10278     pnode = instance.primary_node
10279     nodelist = list(instance.all_nodes)
10280
10281     # OS change
10282     if self.op.os_name and not self.op.force:
10283       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10284                       self.op.force_variant)
10285       instance_os = self.op.os_name
10286     else:
10287       instance_os = instance.os
10288
10289     if self.op.disk_template:
10290       if instance.disk_template == self.op.disk_template:
10291         raise errors.OpPrereqError("Instance already has disk template %s" %
10292                                    instance.disk_template, errors.ECODE_INVAL)
10293
10294       if (instance.disk_template,
10295           self.op.disk_template) not in self._DISK_CONVERSIONS:
10296         raise errors.OpPrereqError("Unsupported disk template conversion from"
10297                                    " %s to %s" % (instance.disk_template,
10298                                                   self.op.disk_template),
10299                                    errors.ECODE_INVAL)
10300       _CheckInstanceDown(self, instance, "cannot change disk template")
10301       if self.op.disk_template in constants.DTS_INT_MIRROR:
10302         if self.op.remote_node == pnode:
10303           raise errors.OpPrereqError("Given new secondary node %s is the same"
10304                                      " as the primary node of the instance" %
10305                                      self.op.remote_node, errors.ECODE_STATE)
10306         _CheckNodeOnline(self, self.op.remote_node)
10307         _CheckNodeNotDrained(self, self.op.remote_node)
10308         # FIXME: here we assume that the old instance type is DT_PLAIN
10309         assert instance.disk_template == constants.DT_PLAIN
10310         disks = [{constants.IDISK_SIZE: d.size,
10311                   constants.IDISK_VG: d.logical_id[0]}
10312                  for d in instance.disks]
10313         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10314         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10315
10316     # hvparams processing
10317     if self.op.hvparams:
10318       hv_type = instance.hypervisor
10319       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10320       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10321       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10322
10323       # local check
10324       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10325       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10326       self.hv_new = hv_new # the new actual values
10327       self.hv_inst = i_hvdict # the new dict (without defaults)
10328     else:
10329       self.hv_new = self.hv_inst = {}
10330
10331     # beparams processing
10332     if self.op.beparams:
10333       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10334                                    use_none=True)
10335       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10336       be_new = cluster.SimpleFillBE(i_bedict)
10337       self.be_new = be_new # the new actual values
10338       self.be_inst = i_bedict # the new dict (without defaults)
10339     else:
10340       self.be_new = self.be_inst = {}
10341     be_old = cluster.FillBE(instance)
10342
10343     # osparams processing
10344     if self.op.osparams:
10345       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10346       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10347       self.os_inst = i_osdict # the new dict (without defaults)
10348     else:
10349       self.os_inst = {}
10350
10351     self.warn = []
10352
10353     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10354         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10355       mem_check_list = [pnode]
10356       if be_new[constants.BE_AUTO_BALANCE]:
10357         # either we changed auto_balance to yes or it was from before
10358         mem_check_list.extend(instance.secondary_nodes)
10359       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10360                                                   instance.hypervisor)
10361       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10362                                          instance.hypervisor)
10363       pninfo = nodeinfo[pnode]
10364       msg = pninfo.fail_msg
10365       if msg:
10366         # Assume the primary node is unreachable and go ahead
10367         self.warn.append("Can't get info from primary node %s: %s" %
10368                          (pnode,  msg))
10369       elif not isinstance(pninfo.payload.get('memory_free', None), int):
10370         self.warn.append("Node data from primary node %s doesn't contain"
10371                          " free memory information" % pnode)
10372       elif instance_info.fail_msg:
10373         self.warn.append("Can't get instance runtime information: %s" %
10374                         instance_info.fail_msg)
10375       else:
10376         if instance_info.payload:
10377           current_mem = int(instance_info.payload['memory'])
10378         else:
10379           # Assume instance not running
10380           # (there is a slight race condition here, but it's not very probable,
10381           # and we have no other way to check)
10382           current_mem = 0
10383         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10384                     pninfo.payload['memory_free'])
10385         if miss_mem > 0:
10386           raise errors.OpPrereqError("This change will prevent the instance"
10387                                      " from starting, due to %d MB of memory"
10388                                      " missing on its primary node" % miss_mem,
10389                                      errors.ECODE_NORES)
10390
10391       if be_new[constants.BE_AUTO_BALANCE]:
10392         for node, nres in nodeinfo.items():
10393           if node not in instance.secondary_nodes:
10394             continue
10395           nres.Raise("Can't get info from secondary node %s" % node,
10396                      prereq=True, ecode=errors.ECODE_STATE)
10397           if not isinstance(nres.payload.get('memory_free', None), int):
10398             raise errors.OpPrereqError("Secondary node %s didn't return free"
10399                                        " memory information" % node,
10400                                        errors.ECODE_STATE)
10401           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10402             raise errors.OpPrereqError("This change will prevent the instance"
10403                                        " from failover to its secondary node"
10404                                        " %s, due to not enough memory" % node,
10405                                        errors.ECODE_STATE)
10406
10407     # NIC processing
10408     self.nic_pnew = {}
10409     self.nic_pinst = {}
10410     for nic_op, nic_dict in self.op.nics:
10411       if nic_op == constants.DDM_REMOVE:
10412         if not instance.nics:
10413           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10414                                      errors.ECODE_INVAL)
10415         continue
10416       if nic_op != constants.DDM_ADD:
10417         # an existing nic
10418         if not instance.nics:
10419           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10420                                      " no NICs" % nic_op,
10421                                      errors.ECODE_INVAL)
10422         if nic_op < 0 or nic_op >= len(instance.nics):
10423           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10424                                      " are 0 to %d" %
10425                                      (nic_op, len(instance.nics) - 1),
10426                                      errors.ECODE_INVAL)
10427         old_nic_params = instance.nics[nic_op].nicparams
10428         old_nic_ip = instance.nics[nic_op].ip
10429       else:
10430         old_nic_params = {}
10431         old_nic_ip = None
10432
10433       update_params_dict = dict([(key, nic_dict[key])
10434                                  for key in constants.NICS_PARAMETERS
10435                                  if key in nic_dict])
10436
10437       if 'bridge' in nic_dict:
10438         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10439
10440       new_nic_params = _GetUpdatedParams(old_nic_params,
10441                                          update_params_dict)
10442       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10443       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10444       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10445       self.nic_pinst[nic_op] = new_nic_params
10446       self.nic_pnew[nic_op] = new_filled_nic_params
10447       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10448
10449       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10450         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10451         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10452         if msg:
10453           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10454           if self.op.force:
10455             self.warn.append(msg)
10456           else:
10457             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10458       if new_nic_mode == constants.NIC_MODE_ROUTED:
10459         if constants.INIC_IP in nic_dict:
10460           nic_ip = nic_dict[constants.INIC_IP]
10461         else:
10462           nic_ip = old_nic_ip
10463         if nic_ip is None:
10464           raise errors.OpPrereqError('Cannot set the nic ip to None'
10465                                      ' on a routed nic', errors.ECODE_INVAL)
10466       if constants.INIC_MAC in nic_dict:
10467         nic_mac = nic_dict[constants.INIC_MAC]
10468         if nic_mac is None:
10469           raise errors.OpPrereqError('Cannot set the nic mac to None',
10470                                      errors.ECODE_INVAL)
10471         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10472           # otherwise generate the mac
10473           nic_dict[constants.INIC_MAC] = \
10474             self.cfg.GenerateMAC(self.proc.GetECId())
10475         else:
10476           # or validate/reserve the current one
10477           try:
10478             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10479           except errors.ReservationError:
10480             raise errors.OpPrereqError("MAC address %s already in use"
10481                                        " in cluster" % nic_mac,
10482                                        errors.ECODE_NOTUNIQUE)
10483
10484     # DISK processing
10485     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10486       raise errors.OpPrereqError("Disk operations not supported for"
10487                                  " diskless instances",
10488                                  errors.ECODE_INVAL)
10489     for disk_op, _ in self.op.disks:
10490       if disk_op == constants.DDM_REMOVE:
10491         if len(instance.disks) == 1:
10492           raise errors.OpPrereqError("Cannot remove the last disk of"
10493                                      " an instance", errors.ECODE_INVAL)
10494         _CheckInstanceDown(self, instance, "cannot remove disks")
10495
10496       if (disk_op == constants.DDM_ADD and
10497           len(instance.disks) >= constants.MAX_DISKS):
10498         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10499                                    " add more" % constants.MAX_DISKS,
10500                                    errors.ECODE_STATE)
10501       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10502         # an existing disk
10503         if disk_op < 0 or disk_op >= len(instance.disks):
10504           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10505                                      " are 0 to %d" %
10506                                      (disk_op, len(instance.disks)),
10507                                      errors.ECODE_INVAL)
10508
10509     return
10510
10511   def _ConvertPlainToDrbd(self, feedback_fn):
10512     """Converts an instance from plain to drbd.
10513
10514     """
10515     feedback_fn("Converting template to drbd")
10516     instance = self.instance
10517     pnode = instance.primary_node
10518     snode = self.op.remote_node
10519
10520     # create a fake disk info for _GenerateDiskTemplate
10521     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10522                   constants.IDISK_VG: d.logical_id[0]}
10523                  for d in instance.disks]
10524     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10525                                       instance.name, pnode, [snode],
10526                                       disk_info, None, None, 0, feedback_fn)
10527     info = _GetInstanceInfoText(instance)
10528     feedback_fn("Creating aditional volumes...")
10529     # first, create the missing data and meta devices
10530     for disk in new_disks:
10531       # unfortunately this is... not too nice
10532       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10533                             info, True)
10534       for child in disk.children:
10535         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10536     # at this stage, all new LVs have been created, we can rename the
10537     # old ones
10538     feedback_fn("Renaming original volumes...")
10539     rename_list = [(o, n.children[0].logical_id)
10540                    for (o, n) in zip(instance.disks, new_disks)]
10541     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10542     result.Raise("Failed to rename original LVs")
10543
10544     feedback_fn("Initializing DRBD devices...")
10545     # all child devices are in place, we can now create the DRBD devices
10546     for disk in new_disks:
10547       for node in [pnode, snode]:
10548         f_create = node == pnode
10549         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10550
10551     # at this point, the instance has been modified
10552     instance.disk_template = constants.DT_DRBD8
10553     instance.disks = new_disks
10554     self.cfg.Update(instance, feedback_fn)
10555
10556     # disks are created, waiting for sync
10557     disk_abort = not _WaitForSync(self, instance,
10558                                   oneshot=not self.op.wait_for_sync)
10559     if disk_abort:
10560       raise errors.OpExecError("There are some degraded disks for"
10561                                " this instance, please cleanup manually")
10562
10563   def _ConvertDrbdToPlain(self, feedback_fn):
10564     """Converts an instance from drbd to plain.
10565
10566     """
10567     instance = self.instance
10568     assert len(instance.secondary_nodes) == 1
10569     pnode = instance.primary_node
10570     snode = instance.secondary_nodes[0]
10571     feedback_fn("Converting template to plain")
10572
10573     old_disks = instance.disks
10574     new_disks = [d.children[0] for d in old_disks]
10575
10576     # copy over size and mode
10577     for parent, child in zip(old_disks, new_disks):
10578       child.size = parent.size
10579       child.mode = parent.mode
10580
10581     # update instance structure
10582     instance.disks = new_disks
10583     instance.disk_template = constants.DT_PLAIN
10584     self.cfg.Update(instance, feedback_fn)
10585
10586     feedback_fn("Removing volumes on the secondary node...")
10587     for disk in old_disks:
10588       self.cfg.SetDiskID(disk, snode)
10589       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10590       if msg:
10591         self.LogWarning("Could not remove block device %s on node %s,"
10592                         " continuing anyway: %s", disk.iv_name, snode, msg)
10593
10594     feedback_fn("Removing unneeded volumes on the primary node...")
10595     for idx, disk in enumerate(old_disks):
10596       meta = disk.children[1]
10597       self.cfg.SetDiskID(meta, pnode)
10598       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10599       if msg:
10600         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10601                         " continuing anyway: %s", idx, pnode, msg)
10602
10603   def Exec(self, feedback_fn):
10604     """Modifies an instance.
10605
10606     All parameters take effect only at the next restart of the instance.
10607
10608     """
10609     # Process here the warnings from CheckPrereq, as we don't have a
10610     # feedback_fn there.
10611     for warn in self.warn:
10612       feedback_fn("WARNING: %s" % warn)
10613
10614     result = []
10615     instance = self.instance
10616     # disk changes
10617     for disk_op, disk_dict in self.op.disks:
10618       if disk_op == constants.DDM_REMOVE:
10619         # remove the last disk
10620         device = instance.disks.pop()
10621         device_idx = len(instance.disks)
10622         for node, disk in device.ComputeNodeTree(instance.primary_node):
10623           self.cfg.SetDiskID(disk, node)
10624           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10625           if msg:
10626             self.LogWarning("Could not remove disk/%d on node %s: %s,"
10627                             " continuing anyway", device_idx, node, msg)
10628         result.append(("disk/%d" % device_idx, "remove"))
10629       elif disk_op == constants.DDM_ADD:
10630         # add a new disk
10631         if instance.disk_template in (constants.DT_FILE,
10632                                         constants.DT_SHARED_FILE):
10633           file_driver, file_path = instance.disks[0].logical_id
10634           file_path = os.path.dirname(file_path)
10635         else:
10636           file_driver = file_path = None
10637         disk_idx_base = len(instance.disks)
10638         new_disk = _GenerateDiskTemplate(self,
10639                                          instance.disk_template,
10640                                          instance.name, instance.primary_node,
10641                                          instance.secondary_nodes,
10642                                          [disk_dict],
10643                                          file_path,
10644                                          file_driver,
10645                                          disk_idx_base, feedback_fn)[0]
10646         instance.disks.append(new_disk)
10647         info = _GetInstanceInfoText(instance)
10648
10649         logging.info("Creating volume %s for instance %s",
10650                      new_disk.iv_name, instance.name)
10651         # Note: this needs to be kept in sync with _CreateDisks
10652         #HARDCODE
10653         for node in instance.all_nodes:
10654           f_create = node == instance.primary_node
10655           try:
10656             _CreateBlockDev(self, node, instance, new_disk,
10657                             f_create, info, f_create)
10658           except errors.OpExecError, err:
10659             self.LogWarning("Failed to create volume %s (%s) on"
10660                             " node %s: %s",
10661                             new_disk.iv_name, new_disk, node, err)
10662         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10663                        (new_disk.size, new_disk.mode)))
10664       else:
10665         # change a given disk
10666         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10667         result.append(("disk.mode/%d" % disk_op,
10668                        disk_dict[constants.IDISK_MODE]))
10669
10670     if self.op.disk_template:
10671       r_shut = _ShutdownInstanceDisks(self, instance)
10672       if not r_shut:
10673         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10674                                  " proceed with disk template conversion")
10675       mode = (instance.disk_template, self.op.disk_template)
10676       try:
10677         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10678       except:
10679         self.cfg.ReleaseDRBDMinors(instance.name)
10680         raise
10681       result.append(("disk_template", self.op.disk_template))
10682
10683     # NIC changes
10684     for nic_op, nic_dict in self.op.nics:
10685       if nic_op == constants.DDM_REMOVE:
10686         # remove the last nic
10687         del instance.nics[-1]
10688         result.append(("nic.%d" % len(instance.nics), "remove"))
10689       elif nic_op == constants.DDM_ADD:
10690         # mac and bridge should be set, by now
10691         mac = nic_dict[constants.INIC_MAC]
10692         ip = nic_dict.get(constants.INIC_IP, None)
10693         nicparams = self.nic_pinst[constants.DDM_ADD]
10694         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10695         instance.nics.append(new_nic)
10696         result.append(("nic.%d" % (len(instance.nics) - 1),
10697                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
10698                        (new_nic.mac, new_nic.ip,
10699                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10700                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10701                        )))
10702       else:
10703         for key in (constants.INIC_MAC, constants.INIC_IP):
10704           if key in nic_dict:
10705             setattr(instance.nics[nic_op], key, nic_dict[key])
10706         if nic_op in self.nic_pinst:
10707           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10708         for key, val in nic_dict.iteritems():
10709           result.append(("nic.%s/%d" % (key, nic_op), val))
10710
10711     # hvparams changes
10712     if self.op.hvparams:
10713       instance.hvparams = self.hv_inst
10714       for key, val in self.op.hvparams.iteritems():
10715         result.append(("hv/%s" % key, val))
10716
10717     # beparams changes
10718     if self.op.beparams:
10719       instance.beparams = self.be_inst
10720       for key, val in self.op.beparams.iteritems():
10721         result.append(("be/%s" % key, val))
10722
10723     # OS change
10724     if self.op.os_name:
10725       instance.os = self.op.os_name
10726
10727     # osparams changes
10728     if self.op.osparams:
10729       instance.osparams = self.os_inst
10730       for key, val in self.op.osparams.iteritems():
10731         result.append(("os/%s" % key, val))
10732
10733     self.cfg.Update(instance, feedback_fn)
10734
10735     return result
10736
10737   _DISK_CONVERSIONS = {
10738     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10739     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10740     }
10741
10742
10743 class LUBackupQuery(NoHooksLU):
10744   """Query the exports list
10745
10746   """
10747   REQ_BGL = False
10748
10749   def ExpandNames(self):
10750     self.needed_locks = {}
10751     self.share_locks[locking.LEVEL_NODE] = 1
10752     if not self.op.nodes:
10753       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10754     else:
10755       self.needed_locks[locking.LEVEL_NODE] = \
10756         _GetWantedNodes(self, self.op.nodes)
10757
10758   def Exec(self, feedback_fn):
10759     """Compute the list of all the exported system images.
10760
10761     @rtype: dict
10762     @return: a dictionary with the structure node->(export-list)
10763         where export-list is a list of the instances exported on
10764         that node.
10765
10766     """
10767     self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10768     rpcresult = self.rpc.call_export_list(self.nodes)
10769     result = {}
10770     for node in rpcresult:
10771       if rpcresult[node].fail_msg:
10772         result[node] = False
10773       else:
10774         result[node] = rpcresult[node].payload
10775
10776     return result
10777
10778
10779 class LUBackupPrepare(NoHooksLU):
10780   """Prepares an instance for an export and returns useful information.
10781
10782   """
10783   REQ_BGL = False
10784
10785   def ExpandNames(self):
10786     self._ExpandAndLockInstance()
10787
10788   def CheckPrereq(self):
10789     """Check prerequisites.
10790
10791     """
10792     instance_name = self.op.instance_name
10793
10794     self.instance = self.cfg.GetInstanceInfo(instance_name)
10795     assert self.instance is not None, \
10796           "Cannot retrieve locked instance %s" % self.op.instance_name
10797     _CheckNodeOnline(self, self.instance.primary_node)
10798
10799     self._cds = _GetClusterDomainSecret()
10800
10801   def Exec(self, feedback_fn):
10802     """Prepares an instance for an export.
10803
10804     """
10805     instance = self.instance
10806
10807     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10808       salt = utils.GenerateSecret(8)
10809
10810       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10811       result = self.rpc.call_x509_cert_create(instance.primary_node,
10812                                               constants.RIE_CERT_VALIDITY)
10813       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10814
10815       (name, cert_pem) = result.payload
10816
10817       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10818                                              cert_pem)
10819
10820       return {
10821         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10822         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10823                           salt),
10824         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10825         }
10826
10827     return None
10828
10829
10830 class LUBackupExport(LogicalUnit):
10831   """Export an instance to an image in the cluster.
10832
10833   """
10834   HPATH = "instance-export"
10835   HTYPE = constants.HTYPE_INSTANCE
10836   REQ_BGL = False
10837
10838   def CheckArguments(self):
10839     """Check the arguments.
10840
10841     """
10842     self.x509_key_name = self.op.x509_key_name
10843     self.dest_x509_ca_pem = self.op.destination_x509_ca
10844
10845     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10846       if not self.x509_key_name:
10847         raise errors.OpPrereqError("Missing X509 key name for encryption",
10848                                    errors.ECODE_INVAL)
10849
10850       if not self.dest_x509_ca_pem:
10851         raise errors.OpPrereqError("Missing destination X509 CA",
10852                                    errors.ECODE_INVAL)
10853
10854   def ExpandNames(self):
10855     self._ExpandAndLockInstance()
10856
10857     # Lock all nodes for local exports
10858     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10859       # FIXME: lock only instance primary and destination node
10860       #
10861       # Sad but true, for now we have do lock all nodes, as we don't know where
10862       # the previous export might be, and in this LU we search for it and
10863       # remove it from its current node. In the future we could fix this by:
10864       #  - making a tasklet to search (share-lock all), then create the
10865       #    new one, then one to remove, after
10866       #  - removing the removal operation altogether
10867       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10868
10869   def DeclareLocks(self, level):
10870     """Last minute lock declaration."""
10871     # All nodes are locked anyway, so nothing to do here.
10872
10873   def BuildHooksEnv(self):
10874     """Build hooks env.
10875
10876     This will run on the master, primary node and target node.
10877
10878     """
10879     env = {
10880       "EXPORT_MODE": self.op.mode,
10881       "EXPORT_NODE": self.op.target_node,
10882       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10883       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10884       # TODO: Generic function for boolean env variables
10885       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10886       }
10887
10888     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10889
10890     return env
10891
10892   def BuildHooksNodes(self):
10893     """Build hooks nodes.
10894
10895     """
10896     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10897
10898     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10899       nl.append(self.op.target_node)
10900
10901     return (nl, nl)
10902
10903   def CheckPrereq(self):
10904     """Check prerequisites.
10905
10906     This checks that the instance and node names are valid.
10907
10908     """
10909     instance_name = self.op.instance_name
10910
10911     self.instance = self.cfg.GetInstanceInfo(instance_name)
10912     assert self.instance is not None, \
10913           "Cannot retrieve locked instance %s" % self.op.instance_name
10914     _CheckNodeOnline(self, self.instance.primary_node)
10915
10916     if (self.op.remove_instance and self.instance.admin_up and
10917         not self.op.shutdown):
10918       raise errors.OpPrereqError("Can not remove instance without shutting it"
10919                                  " down before")
10920
10921     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10922       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10923       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10924       assert self.dst_node is not None
10925
10926       _CheckNodeOnline(self, self.dst_node.name)
10927       _CheckNodeNotDrained(self, self.dst_node.name)
10928
10929       self._cds = None
10930       self.dest_disk_info = None
10931       self.dest_x509_ca = None
10932
10933     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10934       self.dst_node = None
10935
10936       if len(self.op.target_node) != len(self.instance.disks):
10937         raise errors.OpPrereqError(("Received destination information for %s"
10938                                     " disks, but instance %s has %s disks") %
10939                                    (len(self.op.target_node), instance_name,
10940                                     len(self.instance.disks)),
10941                                    errors.ECODE_INVAL)
10942
10943       cds = _GetClusterDomainSecret()
10944
10945       # Check X509 key name
10946       try:
10947         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10948       except (TypeError, ValueError), err:
10949         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10950
10951       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10952         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10953                                    errors.ECODE_INVAL)
10954
10955       # Load and verify CA
10956       try:
10957         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10958       except OpenSSL.crypto.Error, err:
10959         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10960                                    (err, ), errors.ECODE_INVAL)
10961
10962       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10963       if errcode is not None:
10964         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10965                                    (msg, ), errors.ECODE_INVAL)
10966
10967       self.dest_x509_ca = cert
10968
10969       # Verify target information
10970       disk_info = []
10971       for idx, disk_data in enumerate(self.op.target_node):
10972         try:
10973           (host, port, magic) = \
10974             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10975         except errors.GenericError, err:
10976           raise errors.OpPrereqError("Target info for disk %s: %s" %
10977                                      (idx, err), errors.ECODE_INVAL)
10978
10979         disk_info.append((host, port, magic))
10980
10981       assert len(disk_info) == len(self.op.target_node)
10982       self.dest_disk_info = disk_info
10983
10984     else:
10985       raise errors.ProgrammerError("Unhandled export mode %r" %
10986                                    self.op.mode)
10987
10988     # instance disk type verification
10989     # TODO: Implement export support for file-based disks
10990     for disk in self.instance.disks:
10991       if disk.dev_type == constants.LD_FILE:
10992         raise errors.OpPrereqError("Export not supported for instances with"
10993                                    " file-based disks", errors.ECODE_INVAL)
10994
10995   def _CleanupExports(self, feedback_fn):
10996     """Removes exports of current instance from all other nodes.
10997
10998     If an instance in a cluster with nodes A..D was exported to node C, its
10999     exports will be removed from the nodes A, B and D.
11000
11001     """
11002     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11003
11004     nodelist = self.cfg.GetNodeList()
11005     nodelist.remove(self.dst_node.name)
11006
11007     # on one-node clusters nodelist will be empty after the removal
11008     # if we proceed the backup would be removed because OpBackupQuery
11009     # substitutes an empty list with the full cluster node list.
11010     iname = self.instance.name
11011     if nodelist:
11012       feedback_fn("Removing old exports for instance %s" % iname)
11013       exportlist = self.rpc.call_export_list(nodelist)
11014       for node in exportlist:
11015         if exportlist[node].fail_msg:
11016           continue
11017         if iname in exportlist[node].payload:
11018           msg = self.rpc.call_export_remove(node, iname).fail_msg
11019           if msg:
11020             self.LogWarning("Could not remove older export for instance %s"
11021                             " on node %s: %s", iname, node, msg)
11022
11023   def Exec(self, feedback_fn):
11024     """Export an instance to an image in the cluster.
11025
11026     """
11027     assert self.op.mode in constants.EXPORT_MODES
11028
11029     instance = self.instance
11030     src_node = instance.primary_node
11031
11032     if self.op.shutdown:
11033       # shutdown the instance, but not the disks
11034       feedback_fn("Shutting down instance %s" % instance.name)
11035       result = self.rpc.call_instance_shutdown(src_node, instance,
11036                                                self.op.shutdown_timeout)
11037       # TODO: Maybe ignore failures if ignore_remove_failures is set
11038       result.Raise("Could not shutdown instance %s on"
11039                    " node %s" % (instance.name, src_node))
11040
11041     # set the disks ID correctly since call_instance_start needs the
11042     # correct drbd minor to create the symlinks
11043     for disk in instance.disks:
11044       self.cfg.SetDiskID(disk, src_node)
11045
11046     activate_disks = (not instance.admin_up)
11047
11048     if activate_disks:
11049       # Activate the instance disks if we'exporting a stopped instance
11050       feedback_fn("Activating disks for %s" % instance.name)
11051       _StartInstanceDisks(self, instance, None)
11052
11053     try:
11054       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11055                                                      instance)
11056
11057       helper.CreateSnapshots()
11058       try:
11059         if (self.op.shutdown and instance.admin_up and
11060             not self.op.remove_instance):
11061           assert not activate_disks
11062           feedback_fn("Starting instance %s" % instance.name)
11063           result = self.rpc.call_instance_start(src_node, instance, None, None)
11064           msg = result.fail_msg
11065           if msg:
11066             feedback_fn("Failed to start instance: %s" % msg)
11067             _ShutdownInstanceDisks(self, instance)
11068             raise errors.OpExecError("Could not start instance: %s" % msg)
11069
11070         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11071           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11072         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11073           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11074           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11075
11076           (key_name, _, _) = self.x509_key_name
11077
11078           dest_ca_pem = \
11079             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11080                                             self.dest_x509_ca)
11081
11082           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11083                                                      key_name, dest_ca_pem,
11084                                                      timeouts)
11085       finally:
11086         helper.Cleanup()
11087
11088       # Check for backwards compatibility
11089       assert len(dresults) == len(instance.disks)
11090       assert compat.all(isinstance(i, bool) for i in dresults), \
11091              "Not all results are boolean: %r" % dresults
11092
11093     finally:
11094       if activate_disks:
11095         feedback_fn("Deactivating disks for %s" % instance.name)
11096         _ShutdownInstanceDisks(self, instance)
11097
11098     if not (compat.all(dresults) and fin_resu):
11099       failures = []
11100       if not fin_resu:
11101         failures.append("export finalization")
11102       if not compat.all(dresults):
11103         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11104                                if not dsk)
11105         failures.append("disk export: disk(s) %s" % fdsk)
11106
11107       raise errors.OpExecError("Export failed, errors in %s" %
11108                                utils.CommaJoin(failures))
11109
11110     # At this point, the export was successful, we can cleanup/finish
11111
11112     # Remove instance if requested
11113     if self.op.remove_instance:
11114       feedback_fn("Removing instance %s" % instance.name)
11115       _RemoveInstance(self, feedback_fn, instance,
11116                       self.op.ignore_remove_failures)
11117
11118     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11119       self._CleanupExports(feedback_fn)
11120
11121     return fin_resu, dresults
11122
11123
11124 class LUBackupRemove(NoHooksLU):
11125   """Remove exports related to the named instance.
11126
11127   """
11128   REQ_BGL = False
11129
11130   def ExpandNames(self):
11131     self.needed_locks = {}
11132     # We need all nodes to be locked in order for RemoveExport to work, but we
11133     # don't need to lock the instance itself, as nothing will happen to it (and
11134     # we can remove exports also for a removed instance)
11135     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11136
11137   def Exec(self, feedback_fn):
11138     """Remove any export.
11139
11140     """
11141     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11142     # If the instance was not found we'll try with the name that was passed in.
11143     # This will only work if it was an FQDN, though.
11144     fqdn_warn = False
11145     if not instance_name:
11146       fqdn_warn = True
11147       instance_name = self.op.instance_name
11148
11149     locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11150     exportlist = self.rpc.call_export_list(locked_nodes)
11151     found = False
11152     for node in exportlist:
11153       msg = exportlist[node].fail_msg
11154       if msg:
11155         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11156         continue
11157       if instance_name in exportlist[node].payload:
11158         found = True
11159         result = self.rpc.call_export_remove(node, instance_name)
11160         msg = result.fail_msg
11161         if msg:
11162           logging.error("Could not remove export for instance %s"
11163                         " on node %s: %s", instance_name, node, msg)
11164
11165     if fqdn_warn and not found:
11166       feedback_fn("Export not found. If trying to remove an export belonging"
11167                   " to a deleted instance please use its Fully Qualified"
11168                   " Domain Name.")
11169
11170
11171 class LUGroupAdd(LogicalUnit):
11172   """Logical unit for creating node groups.
11173
11174   """
11175   HPATH = "group-add"
11176   HTYPE = constants.HTYPE_GROUP
11177   REQ_BGL = False
11178
11179   def ExpandNames(self):
11180     # We need the new group's UUID here so that we can create and acquire the
11181     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11182     # that it should not check whether the UUID exists in the configuration.
11183     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11184     self.needed_locks = {}
11185     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11186
11187   def CheckPrereq(self):
11188     """Check prerequisites.
11189
11190     This checks that the given group name is not an existing node group
11191     already.
11192
11193     """
11194     try:
11195       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11196     except errors.OpPrereqError:
11197       pass
11198     else:
11199       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11200                                  " node group (UUID: %s)" %
11201                                  (self.op.group_name, existing_uuid),
11202                                  errors.ECODE_EXISTS)
11203
11204     if self.op.ndparams:
11205       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11206
11207   def BuildHooksEnv(self):
11208     """Build hooks env.
11209
11210     """
11211     return {
11212       "GROUP_NAME": self.op.group_name,
11213       }
11214
11215   def BuildHooksNodes(self):
11216     """Build hooks nodes.
11217
11218     """
11219     mn = self.cfg.GetMasterNode()
11220     return ([mn], [mn])
11221
11222   def Exec(self, feedback_fn):
11223     """Add the node group to the cluster.
11224
11225     """
11226     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11227                                   uuid=self.group_uuid,
11228                                   alloc_policy=self.op.alloc_policy,
11229                                   ndparams=self.op.ndparams)
11230
11231     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11232     del self.remove_locks[locking.LEVEL_NODEGROUP]
11233
11234
11235 class LUGroupAssignNodes(NoHooksLU):
11236   """Logical unit for assigning nodes to groups.
11237
11238   """
11239   REQ_BGL = False
11240
11241   def ExpandNames(self):
11242     # These raise errors.OpPrereqError on their own:
11243     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11244     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11245
11246     # We want to lock all the affected nodes and groups. We have readily
11247     # available the list of nodes, and the *destination* group. To gather the
11248     # list of "source" groups, we need to fetch node information later on.
11249     self.needed_locks = {
11250       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11251       locking.LEVEL_NODE: self.op.nodes,
11252       }
11253
11254   def DeclareLocks(self, level):
11255     if level == locking.LEVEL_NODEGROUP:
11256       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11257
11258       # Try to get all affected nodes' groups without having the group or node
11259       # lock yet. Needs verification later in the code flow.
11260       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11261
11262       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11263
11264   def CheckPrereq(self):
11265     """Check prerequisites.
11266
11267     """
11268     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11269     assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11270             frozenset(self.op.nodes))
11271
11272     expected_locks = (set([self.group_uuid]) |
11273                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11274     actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11275     if actual_locks != expected_locks:
11276       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11277                                " current groups are '%s', used to be '%s'" %
11278                                (utils.CommaJoin(expected_locks),
11279                                 utils.CommaJoin(actual_locks)))
11280
11281     self.node_data = self.cfg.GetAllNodesInfo()
11282     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11283     instance_data = self.cfg.GetAllInstancesInfo()
11284
11285     if self.group is None:
11286       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11287                                (self.op.group_name, self.group_uuid))
11288
11289     (new_splits, previous_splits) = \
11290       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11291                                              for node in self.op.nodes],
11292                                             self.node_data, instance_data)
11293
11294     if new_splits:
11295       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11296
11297       if not self.op.force:
11298         raise errors.OpExecError("The following instances get split by this"
11299                                  " change and --force was not given: %s" %
11300                                  fmt_new_splits)
11301       else:
11302         self.LogWarning("This operation will split the following instances: %s",
11303                         fmt_new_splits)
11304
11305         if previous_splits:
11306           self.LogWarning("In addition, these already-split instances continue"
11307                           " to be split across groups: %s",
11308                           utils.CommaJoin(utils.NiceSort(previous_splits)))
11309
11310   def Exec(self, feedback_fn):
11311     """Assign nodes to a new group.
11312
11313     """
11314     for node in self.op.nodes:
11315       self.node_data[node].group = self.group_uuid
11316
11317     # FIXME: Depends on side-effects of modifying the result of
11318     # C{cfg.GetAllNodesInfo}
11319
11320     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11321
11322   @staticmethod
11323   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11324     """Check for split instances after a node assignment.
11325
11326     This method considers a series of node assignments as an atomic operation,
11327     and returns information about split instances after applying the set of
11328     changes.
11329
11330     In particular, it returns information about newly split instances, and
11331     instances that were already split, and remain so after the change.
11332
11333     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11334     considered.
11335
11336     @type changes: list of (node_name, new_group_uuid) pairs.
11337     @param changes: list of node assignments to consider.
11338     @param node_data: a dict with data for all nodes
11339     @param instance_data: a dict with all instances to consider
11340     @rtype: a two-tuple
11341     @return: a list of instances that were previously okay and result split as a
11342       consequence of this change, and a list of instances that were previously
11343       split and this change does not fix.
11344
11345     """
11346     changed_nodes = dict((node, group) for node, group in changes
11347                          if node_data[node].group != group)
11348
11349     all_split_instances = set()
11350     previously_split_instances = set()
11351
11352     def InstanceNodes(instance):
11353       return [instance.primary_node] + list(instance.secondary_nodes)
11354
11355     for inst in instance_data.values():
11356       if inst.disk_template not in constants.DTS_INT_MIRROR:
11357         continue
11358
11359       instance_nodes = InstanceNodes(inst)
11360
11361       if len(set(node_data[node].group for node in instance_nodes)) > 1:
11362         previously_split_instances.add(inst.name)
11363
11364       if len(set(changed_nodes.get(node, node_data[node].group)
11365                  for node in instance_nodes)) > 1:
11366         all_split_instances.add(inst.name)
11367
11368     return (list(all_split_instances - previously_split_instances),
11369             list(previously_split_instances & all_split_instances))
11370
11371
11372 class _GroupQuery(_QueryBase):
11373   FIELDS = query.GROUP_FIELDS
11374
11375   def ExpandNames(self, lu):
11376     lu.needed_locks = {}
11377
11378     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11379     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11380
11381     if not self.names:
11382       self.wanted = [name_to_uuid[name]
11383                      for name in utils.NiceSort(name_to_uuid.keys())]
11384     else:
11385       # Accept names to be either names or UUIDs.
11386       missing = []
11387       self.wanted = []
11388       all_uuid = frozenset(self._all_groups.keys())
11389
11390       for name in self.names:
11391         if name in all_uuid:
11392           self.wanted.append(name)
11393         elif name in name_to_uuid:
11394           self.wanted.append(name_to_uuid[name])
11395         else:
11396           missing.append(name)
11397
11398       if missing:
11399         raise errors.OpPrereqError("Some groups do not exist: %s" %
11400                                    utils.CommaJoin(missing),
11401                                    errors.ECODE_NOENT)
11402
11403   def DeclareLocks(self, lu, level):
11404     pass
11405
11406   def _GetQueryData(self, lu):
11407     """Computes the list of node groups and their attributes.
11408
11409     """
11410     do_nodes = query.GQ_NODE in self.requested_data
11411     do_instances = query.GQ_INST in self.requested_data
11412
11413     group_to_nodes = None
11414     group_to_instances = None
11415
11416     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11417     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11418     # latter GetAllInstancesInfo() is not enough, for we have to go through
11419     # instance->node. Hence, we will need to process nodes even if we only need
11420     # instance information.
11421     if do_nodes or do_instances:
11422       all_nodes = lu.cfg.GetAllNodesInfo()
11423       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11424       node_to_group = {}
11425
11426       for node in all_nodes.values():
11427         if node.group in group_to_nodes:
11428           group_to_nodes[node.group].append(node.name)
11429           node_to_group[node.name] = node.group
11430
11431       if do_instances:
11432         all_instances = lu.cfg.GetAllInstancesInfo()
11433         group_to_instances = dict((uuid, []) for uuid in self.wanted)
11434
11435         for instance in all_instances.values():
11436           node = instance.primary_node
11437           if node in node_to_group:
11438             group_to_instances[node_to_group[node]].append(instance.name)
11439
11440         if not do_nodes:
11441           # Do not pass on node information if it was not requested.
11442           group_to_nodes = None
11443
11444     return query.GroupQueryData([self._all_groups[uuid]
11445                                  for uuid in self.wanted],
11446                                 group_to_nodes, group_to_instances)
11447
11448
11449 class LUGroupQuery(NoHooksLU):
11450   """Logical unit for querying node groups.
11451
11452   """
11453   REQ_BGL = False
11454
11455   def CheckArguments(self):
11456     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11457                           self.op.output_fields, False)
11458
11459   def ExpandNames(self):
11460     self.gq.ExpandNames(self)
11461
11462   def Exec(self, feedback_fn):
11463     return self.gq.OldStyleQuery(self)
11464
11465
11466 class LUGroupSetParams(LogicalUnit):
11467   """Modifies the parameters of a node group.
11468
11469   """
11470   HPATH = "group-modify"
11471   HTYPE = constants.HTYPE_GROUP
11472   REQ_BGL = False
11473
11474   def CheckArguments(self):
11475     all_changes = [
11476       self.op.ndparams,
11477       self.op.alloc_policy,
11478       ]
11479
11480     if all_changes.count(None) == len(all_changes):
11481       raise errors.OpPrereqError("Please pass at least one modification",
11482                                  errors.ECODE_INVAL)
11483
11484   def ExpandNames(self):
11485     # This raises errors.OpPrereqError on its own:
11486     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11487
11488     self.needed_locks = {
11489       locking.LEVEL_NODEGROUP: [self.group_uuid],
11490       }
11491
11492   def CheckPrereq(self):
11493     """Check prerequisites.
11494
11495     """
11496     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11497
11498     if self.group is None:
11499       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11500                                (self.op.group_name, self.group_uuid))
11501
11502     if self.op.ndparams:
11503       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11504       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11505       self.new_ndparams = new_ndparams
11506
11507   def BuildHooksEnv(self):
11508     """Build hooks env.
11509
11510     """
11511     return {
11512       "GROUP_NAME": self.op.group_name,
11513       "NEW_ALLOC_POLICY": self.op.alloc_policy,
11514       }
11515
11516   def BuildHooksNodes(self):
11517     """Build hooks nodes.
11518
11519     """
11520     mn = self.cfg.GetMasterNode()
11521     return ([mn], [mn])
11522
11523   def Exec(self, feedback_fn):
11524     """Modifies the node group.
11525
11526     """
11527     result = []
11528
11529     if self.op.ndparams:
11530       self.group.ndparams = self.new_ndparams
11531       result.append(("ndparams", str(self.group.ndparams)))
11532
11533     if self.op.alloc_policy:
11534       self.group.alloc_policy = self.op.alloc_policy
11535
11536     self.cfg.Update(self.group, feedback_fn)
11537     return result
11538
11539
11540
11541 class LUGroupRemove(LogicalUnit):
11542   HPATH = "group-remove"
11543   HTYPE = constants.HTYPE_GROUP
11544   REQ_BGL = False
11545
11546   def ExpandNames(self):
11547     # This will raises errors.OpPrereqError on its own:
11548     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11549     self.needed_locks = {
11550       locking.LEVEL_NODEGROUP: [self.group_uuid],
11551       }
11552
11553   def CheckPrereq(self):
11554     """Check prerequisites.
11555
11556     This checks that the given group name exists as a node group, that is
11557     empty (i.e., contains no nodes), and that is not the last group of the
11558     cluster.
11559
11560     """
11561     # Verify that the group is empty.
11562     group_nodes = [node.name
11563                    for node in self.cfg.GetAllNodesInfo().values()
11564                    if node.group == self.group_uuid]
11565
11566     if group_nodes:
11567       raise errors.OpPrereqError("Group '%s' not empty, has the following"
11568                                  " nodes: %s" %
11569                                  (self.op.group_name,
11570                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
11571                                  errors.ECODE_STATE)
11572
11573     # Verify the cluster would not be left group-less.
11574     if len(self.cfg.GetNodeGroupList()) == 1:
11575       raise errors.OpPrereqError("Group '%s' is the only group,"
11576                                  " cannot be removed" %
11577                                  self.op.group_name,
11578                                  errors.ECODE_STATE)
11579
11580   def BuildHooksEnv(self):
11581     """Build hooks env.
11582
11583     """
11584     return {
11585       "GROUP_NAME": self.op.group_name,
11586       }
11587
11588   def BuildHooksNodes(self):
11589     """Build hooks nodes.
11590
11591     """
11592     mn = self.cfg.GetMasterNode()
11593     return ([mn], [mn])
11594
11595   def Exec(self, feedback_fn):
11596     """Remove the node group.
11597
11598     """
11599     try:
11600       self.cfg.RemoveNodeGroup(self.group_uuid)
11601     except errors.ConfigurationError:
11602       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11603                                (self.op.group_name, self.group_uuid))
11604
11605     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11606
11607
11608 class LUGroupRename(LogicalUnit):
11609   HPATH = "group-rename"
11610   HTYPE = constants.HTYPE_GROUP
11611   REQ_BGL = False
11612
11613   def ExpandNames(self):
11614     # This raises errors.OpPrereqError on its own:
11615     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11616
11617     self.needed_locks = {
11618       locking.LEVEL_NODEGROUP: [self.group_uuid],
11619       }
11620
11621   def CheckPrereq(self):
11622     """Check prerequisites.
11623
11624     Ensures requested new name is not yet used.
11625
11626     """
11627     try:
11628       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11629     except errors.OpPrereqError:
11630       pass
11631     else:
11632       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11633                                  " node group (UUID: %s)" %
11634                                  (self.op.new_name, new_name_uuid),
11635                                  errors.ECODE_EXISTS)
11636
11637   def BuildHooksEnv(self):
11638     """Build hooks env.
11639
11640     """
11641     return {
11642       "OLD_NAME": self.op.group_name,
11643       "NEW_NAME": self.op.new_name,
11644       }
11645
11646   def BuildHooksNodes(self):
11647     """Build hooks nodes.
11648
11649     """
11650     mn = self.cfg.GetMasterNode()
11651
11652     all_nodes = self.cfg.GetAllNodesInfo()
11653     all_nodes.pop(mn, None)
11654
11655     run_nodes = [mn]
11656     run_nodes.extend(node.name for node in all_nodes.values()
11657                      if node.group == self.group_uuid)
11658
11659     return (run_nodes, run_nodes)
11660
11661   def Exec(self, feedback_fn):
11662     """Rename the node group.
11663
11664     """
11665     group = self.cfg.GetNodeGroup(self.group_uuid)
11666
11667     if group is None:
11668       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11669                                (self.op.group_name, self.group_uuid))
11670
11671     group.name = self.op.new_name
11672     self.cfg.Update(group, feedback_fn)
11673
11674     return self.op.new_name
11675
11676
11677 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11678   """Generic tags LU.
11679
11680   This is an abstract class which is the parent of all the other tags LUs.
11681
11682   """
11683   def ExpandNames(self):
11684     self.group_uuid = None
11685     self.needed_locks = {}
11686     if self.op.kind == constants.TAG_NODE:
11687       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11688       self.needed_locks[locking.LEVEL_NODE] = self.op.name
11689     elif self.op.kind == constants.TAG_INSTANCE:
11690       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11691       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11692     elif self.op.kind == constants.TAG_NODEGROUP:
11693       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11694
11695     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11696     # not possible to acquire the BGL based on opcode parameters)
11697
11698   def CheckPrereq(self):
11699     """Check prerequisites.
11700
11701     """
11702     if self.op.kind == constants.TAG_CLUSTER:
11703       self.target = self.cfg.GetClusterInfo()
11704     elif self.op.kind == constants.TAG_NODE:
11705       self.target = self.cfg.GetNodeInfo(self.op.name)
11706     elif self.op.kind == constants.TAG_INSTANCE:
11707       self.target = self.cfg.GetInstanceInfo(self.op.name)
11708     elif self.op.kind == constants.TAG_NODEGROUP:
11709       self.target = self.cfg.GetNodeGroup(self.group_uuid)
11710     else:
11711       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11712                                  str(self.op.kind), errors.ECODE_INVAL)
11713
11714
11715 class LUTagsGet(TagsLU):
11716   """Returns the tags of a given object.
11717
11718   """
11719   REQ_BGL = False
11720
11721   def ExpandNames(self):
11722     TagsLU.ExpandNames(self)
11723
11724     # Share locks as this is only a read operation
11725     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11726
11727   def Exec(self, feedback_fn):
11728     """Returns the tag list.
11729
11730     """
11731     return list(self.target.GetTags())
11732
11733
11734 class LUTagsSearch(NoHooksLU):
11735   """Searches the tags for a given pattern.
11736
11737   """
11738   REQ_BGL = False
11739
11740   def ExpandNames(self):
11741     self.needed_locks = {}
11742
11743   def CheckPrereq(self):
11744     """Check prerequisites.
11745
11746     This checks the pattern passed for validity by compiling it.
11747
11748     """
11749     try:
11750       self.re = re.compile(self.op.pattern)
11751     except re.error, err:
11752       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11753                                  (self.op.pattern, err), errors.ECODE_INVAL)
11754
11755   def Exec(self, feedback_fn):
11756     """Returns the tag list.
11757
11758     """
11759     cfg = self.cfg
11760     tgts = [("/cluster", cfg.GetClusterInfo())]
11761     ilist = cfg.GetAllInstancesInfo().values()
11762     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11763     nlist = cfg.GetAllNodesInfo().values()
11764     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11765     tgts.extend(("/nodegroup/%s" % n.name, n)
11766                 for n in cfg.GetAllNodeGroupsInfo().values())
11767     results = []
11768     for path, target in tgts:
11769       for tag in target.GetTags():
11770         if self.re.search(tag):
11771           results.append((path, tag))
11772     return results
11773
11774
11775 class LUTagsSet(TagsLU):
11776   """Sets a tag on a given object.
11777
11778   """
11779   REQ_BGL = False
11780
11781   def CheckPrereq(self):
11782     """Check prerequisites.
11783
11784     This checks the type and length of the tag name and value.
11785
11786     """
11787     TagsLU.CheckPrereq(self)
11788     for tag in self.op.tags:
11789       objects.TaggableObject.ValidateTag(tag)
11790
11791   def Exec(self, feedback_fn):
11792     """Sets the tag.
11793
11794     """
11795     try:
11796       for tag in self.op.tags:
11797         self.target.AddTag(tag)
11798     except errors.TagError, err:
11799       raise errors.OpExecError("Error while setting tag: %s" % str(err))
11800     self.cfg.Update(self.target, feedback_fn)
11801
11802
11803 class LUTagsDel(TagsLU):
11804   """Delete a list of tags from a given object.
11805
11806   """
11807   REQ_BGL = False
11808
11809   def CheckPrereq(self):
11810     """Check prerequisites.
11811
11812     This checks that we have the given tag.
11813
11814     """
11815     TagsLU.CheckPrereq(self)
11816     for tag in self.op.tags:
11817       objects.TaggableObject.ValidateTag(tag)
11818     del_tags = frozenset(self.op.tags)
11819     cur_tags = self.target.GetTags()
11820
11821     diff_tags = del_tags - cur_tags
11822     if diff_tags:
11823       diff_names = ("'%s'" % i for i in sorted(diff_tags))
11824       raise errors.OpPrereqError("Tag(s) %s not found" %
11825                                  (utils.CommaJoin(diff_names), ),
11826                                  errors.ECODE_NOENT)
11827
11828   def Exec(self, feedback_fn):
11829     """Remove the tag from the object.
11830
11831     """
11832     for tag in self.op.tags:
11833       self.target.RemoveTag(tag)
11834     self.cfg.Update(self.target, feedback_fn)
11835
11836
11837 class LUTestDelay(NoHooksLU):
11838   """Sleep for a specified amount of time.
11839
11840   This LU sleeps on the master and/or nodes for a specified amount of
11841   time.
11842
11843   """
11844   REQ_BGL = False
11845
11846   def ExpandNames(self):
11847     """Expand names and set required locks.
11848
11849     This expands the node list, if any.
11850
11851     """
11852     self.needed_locks = {}
11853     if self.op.on_nodes:
11854       # _GetWantedNodes can be used here, but is not always appropriate to use
11855       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11856       # more information.
11857       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11858       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11859
11860   def _TestDelay(self):
11861     """Do the actual sleep.
11862
11863     """
11864     if self.op.on_master:
11865       if not utils.TestDelay(self.op.duration):
11866         raise errors.OpExecError("Error during master delay test")
11867     if self.op.on_nodes:
11868       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11869       for node, node_result in result.items():
11870         node_result.Raise("Failure during rpc call to node %s" % node)
11871
11872   def Exec(self, feedback_fn):
11873     """Execute the test delay opcode, with the wanted repetitions.
11874
11875     """
11876     if self.op.repeat == 0:
11877       self._TestDelay()
11878     else:
11879       top_value = self.op.repeat - 1
11880       for i in range(self.op.repeat):
11881         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11882         self._TestDelay()
11883
11884
11885 class LUTestJqueue(NoHooksLU):
11886   """Utility LU to test some aspects of the job queue.
11887
11888   """
11889   REQ_BGL = False
11890
11891   # Must be lower than default timeout for WaitForJobChange to see whether it
11892   # notices changed jobs
11893   _CLIENT_CONNECT_TIMEOUT = 20.0
11894   _CLIENT_CONFIRM_TIMEOUT = 60.0
11895
11896   @classmethod
11897   def _NotifyUsingSocket(cls, cb, errcls):
11898     """Opens a Unix socket and waits for another program to connect.
11899
11900     @type cb: callable
11901     @param cb: Callback to send socket name to client
11902     @type errcls: class
11903     @param errcls: Exception class to use for errors
11904
11905     """
11906     # Using a temporary directory as there's no easy way to create temporary
11907     # sockets without writing a custom loop around tempfile.mktemp and
11908     # socket.bind
11909     tmpdir = tempfile.mkdtemp()
11910     try:
11911       tmpsock = utils.PathJoin(tmpdir, "sock")
11912
11913       logging.debug("Creating temporary socket at %s", tmpsock)
11914       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11915       try:
11916         sock.bind(tmpsock)
11917         sock.listen(1)
11918
11919         # Send details to client
11920         cb(tmpsock)
11921
11922         # Wait for client to connect before continuing
11923         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11924         try:
11925           (conn, _) = sock.accept()
11926         except socket.error, err:
11927           raise errcls("Client didn't connect in time (%s)" % err)
11928       finally:
11929         sock.close()
11930     finally:
11931       # Remove as soon as client is connected
11932       shutil.rmtree(tmpdir)
11933
11934     # Wait for client to close
11935     try:
11936       try:
11937         # pylint: disable-msg=E1101
11938         # Instance of '_socketobject' has no ... member
11939         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11940         conn.recv(1)
11941       except socket.error, err:
11942         raise errcls("Client failed to confirm notification (%s)" % err)
11943     finally:
11944       conn.close()
11945
11946   def _SendNotification(self, test, arg, sockname):
11947     """Sends a notification to the client.
11948
11949     @type test: string
11950     @param test: Test name
11951     @param arg: Test argument (depends on test)
11952     @type sockname: string
11953     @param sockname: Socket path
11954
11955     """
11956     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11957
11958   def _Notify(self, prereq, test, arg):
11959     """Notifies the client of a test.
11960
11961     @type prereq: bool
11962     @param prereq: Whether this is a prereq-phase test
11963     @type test: string
11964     @param test: Test name
11965     @param arg: Test argument (depends on test)
11966
11967     """
11968     if prereq:
11969       errcls = errors.OpPrereqError
11970     else:
11971       errcls = errors.OpExecError
11972
11973     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11974                                                   test, arg),
11975                                    errcls)
11976
11977   def CheckArguments(self):
11978     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11979     self.expandnames_calls = 0
11980
11981   def ExpandNames(self):
11982     checkargs_calls = getattr(self, "checkargs_calls", 0)
11983     if checkargs_calls < 1:
11984       raise errors.ProgrammerError("CheckArguments was not called")
11985
11986     self.expandnames_calls += 1
11987
11988     if self.op.notify_waitlock:
11989       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11990
11991     self.LogInfo("Expanding names")
11992
11993     # Get lock on master node (just to get a lock, not for a particular reason)
11994     self.needed_locks = {
11995       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11996       }
11997
11998   def Exec(self, feedback_fn):
11999     if self.expandnames_calls < 1:
12000       raise errors.ProgrammerError("ExpandNames was not called")
12001
12002     if self.op.notify_exec:
12003       self._Notify(False, constants.JQT_EXEC, None)
12004
12005     self.LogInfo("Executing")
12006
12007     if self.op.log_messages:
12008       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12009       for idx, msg in enumerate(self.op.log_messages):
12010         self.LogInfo("Sending log message %s", idx + 1)
12011         feedback_fn(constants.JQT_MSGPREFIX + msg)
12012         # Report how many test messages have been sent
12013         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12014
12015     if self.op.fail:
12016       raise errors.OpExecError("Opcode failure was requested")
12017
12018     return True
12019
12020
12021 class IAllocator(object):
12022   """IAllocator framework.
12023
12024   An IAllocator instance has three sets of attributes:
12025     - cfg that is needed to query the cluster
12026     - input data (all members of the _KEYS class attribute are required)
12027     - four buffer attributes (in|out_data|text), that represent the
12028       input (to the external script) in text and data structure format,
12029       and the output from it, again in two formats
12030     - the result variables from the script (success, info, nodes) for
12031       easy usage
12032
12033   """
12034   # pylint: disable-msg=R0902
12035   # lots of instance attributes
12036
12037   def __init__(self, cfg, rpc, mode, **kwargs):
12038     self.cfg = cfg
12039     self.rpc = rpc
12040     # init buffer variables
12041     self.in_text = self.out_text = self.in_data = self.out_data = None
12042     # init all input fields so that pylint is happy
12043     self.mode = mode
12044     self.memory = self.disks = self.disk_template = None
12045     self.os = self.tags = self.nics = self.vcpus = None
12046     self.hypervisor = None
12047     self.relocate_from = None
12048     self.name = None
12049     self.evac_nodes = None
12050     self.instances = None
12051     self.evac_mode = None
12052     self.target_groups = []
12053     # computed fields
12054     self.required_nodes = None
12055     # init result fields
12056     self.success = self.info = self.result = None
12057
12058     try:
12059       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12060     except KeyError:
12061       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12062                                    " IAllocator" % self.mode)
12063
12064     keyset = [n for (n, _) in keydata]
12065
12066     for key in kwargs:
12067       if key not in keyset:
12068         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12069                                      " IAllocator" % key)
12070       setattr(self, key, kwargs[key])
12071
12072     for key in keyset:
12073       if key not in kwargs:
12074         raise errors.ProgrammerError("Missing input parameter '%s' to"
12075                                      " IAllocator" % key)
12076     self._BuildInputData(compat.partial(fn, self), keydata)
12077
12078   def _ComputeClusterData(self):
12079     """Compute the generic allocator input data.
12080
12081     This is the data that is independent of the actual operation.
12082
12083     """
12084     cfg = self.cfg
12085     cluster_info = cfg.GetClusterInfo()
12086     # cluster data
12087     data = {
12088       "version": constants.IALLOCATOR_VERSION,
12089       "cluster_name": cfg.GetClusterName(),
12090       "cluster_tags": list(cluster_info.GetTags()),
12091       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12092       # we don't have job IDs
12093       }
12094     ninfo = cfg.GetAllNodesInfo()
12095     iinfo = cfg.GetAllInstancesInfo().values()
12096     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12097
12098     # node data
12099     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12100
12101     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12102       hypervisor_name = self.hypervisor
12103     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12104       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12105     else:
12106       hypervisor_name = cluster_info.enabled_hypervisors[0]
12107
12108     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12109                                         hypervisor_name)
12110     node_iinfo = \
12111       self.rpc.call_all_instances_info(node_list,
12112                                        cluster_info.enabled_hypervisors)
12113
12114     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12115
12116     config_ndata = self._ComputeBasicNodeData(ninfo)
12117     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12118                                                  i_list, config_ndata)
12119     assert len(data["nodes"]) == len(ninfo), \
12120         "Incomplete node data computed"
12121
12122     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12123
12124     self.in_data = data
12125
12126   @staticmethod
12127   def _ComputeNodeGroupData(cfg):
12128     """Compute node groups data.
12129
12130     """
12131     ng = dict((guuid, {
12132       "name": gdata.name,
12133       "alloc_policy": gdata.alloc_policy,
12134       })
12135       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12136
12137     return ng
12138
12139   @staticmethod
12140   def _ComputeBasicNodeData(node_cfg):
12141     """Compute global node data.
12142
12143     @rtype: dict
12144     @returns: a dict of name: (node dict, node config)
12145
12146     """
12147     # fill in static (config-based) values
12148     node_results = dict((ninfo.name, {
12149       "tags": list(ninfo.GetTags()),
12150       "primary_ip": ninfo.primary_ip,
12151       "secondary_ip": ninfo.secondary_ip,
12152       "offline": ninfo.offline,
12153       "drained": ninfo.drained,
12154       "master_candidate": ninfo.master_candidate,
12155       "group": ninfo.group,
12156       "master_capable": ninfo.master_capable,
12157       "vm_capable": ninfo.vm_capable,
12158       })
12159       for ninfo in node_cfg.values())
12160
12161     return node_results
12162
12163   @staticmethod
12164   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12165                               node_results):
12166     """Compute global node data.
12167
12168     @param node_results: the basic node structures as filled from the config
12169
12170     """
12171     # make a copy of the current dict
12172     node_results = dict(node_results)
12173     for nname, nresult in node_data.items():
12174       assert nname in node_results, "Missing basic data for node %s" % nname
12175       ninfo = node_cfg[nname]
12176
12177       if not (ninfo.offline or ninfo.drained):
12178         nresult.Raise("Can't get data for node %s" % nname)
12179         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12180                                 nname)
12181         remote_info = nresult.payload
12182
12183         for attr in ['memory_total', 'memory_free', 'memory_dom0',
12184                      'vg_size', 'vg_free', 'cpu_total']:
12185           if attr not in remote_info:
12186             raise errors.OpExecError("Node '%s' didn't return attribute"
12187                                      " '%s'" % (nname, attr))
12188           if not isinstance(remote_info[attr], int):
12189             raise errors.OpExecError("Node '%s' returned invalid value"
12190                                      " for '%s': %s" %
12191                                      (nname, attr, remote_info[attr]))
12192         # compute memory used by primary instances
12193         i_p_mem = i_p_up_mem = 0
12194         for iinfo, beinfo in i_list:
12195           if iinfo.primary_node == nname:
12196             i_p_mem += beinfo[constants.BE_MEMORY]
12197             if iinfo.name not in node_iinfo[nname].payload:
12198               i_used_mem = 0
12199             else:
12200               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
12201             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12202             remote_info['memory_free'] -= max(0, i_mem_diff)
12203
12204             if iinfo.admin_up:
12205               i_p_up_mem += beinfo[constants.BE_MEMORY]
12206
12207         # compute memory used by instances
12208         pnr_dyn = {
12209           "total_memory": remote_info['memory_total'],
12210           "reserved_memory": remote_info['memory_dom0'],
12211           "free_memory": remote_info['memory_free'],
12212           "total_disk": remote_info['vg_size'],
12213           "free_disk": remote_info['vg_free'],
12214           "total_cpus": remote_info['cpu_total'],
12215           "i_pri_memory": i_p_mem,
12216           "i_pri_up_memory": i_p_up_mem,
12217           }
12218         pnr_dyn.update(node_results[nname])
12219         node_results[nname] = pnr_dyn
12220
12221     return node_results
12222
12223   @staticmethod
12224   def _ComputeInstanceData(cluster_info, i_list):
12225     """Compute global instance data.
12226
12227     """
12228     instance_data = {}
12229     for iinfo, beinfo in i_list:
12230       nic_data = []
12231       for nic in iinfo.nics:
12232         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12233         nic_dict = {
12234           "mac": nic.mac,
12235           "ip": nic.ip,
12236           "mode": filled_params[constants.NIC_MODE],
12237           "link": filled_params[constants.NIC_LINK],
12238           }
12239         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12240           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12241         nic_data.append(nic_dict)
12242       pir = {
12243         "tags": list(iinfo.GetTags()),
12244         "admin_up": iinfo.admin_up,
12245         "vcpus": beinfo[constants.BE_VCPUS],
12246         "memory": beinfo[constants.BE_MEMORY],
12247         "os": iinfo.os,
12248         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12249         "nics": nic_data,
12250         "disks": [{constants.IDISK_SIZE: dsk.size,
12251                    constants.IDISK_MODE: dsk.mode}
12252                   for dsk in iinfo.disks],
12253         "disk_template": iinfo.disk_template,
12254         "hypervisor": iinfo.hypervisor,
12255         }
12256       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12257                                                  pir["disks"])
12258       instance_data[iinfo.name] = pir
12259
12260     return instance_data
12261
12262   def _AddNewInstance(self):
12263     """Add new instance data to allocator structure.
12264
12265     This in combination with _AllocatorGetClusterData will create the
12266     correct structure needed as input for the allocator.
12267
12268     The checks for the completeness of the opcode must have already been
12269     done.
12270
12271     """
12272     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12273
12274     if self.disk_template in constants.DTS_INT_MIRROR:
12275       self.required_nodes = 2
12276     else:
12277       self.required_nodes = 1
12278
12279     request = {
12280       "name": self.name,
12281       "disk_template": self.disk_template,
12282       "tags": self.tags,
12283       "os": self.os,
12284       "vcpus": self.vcpus,
12285       "memory": self.memory,
12286       "disks": self.disks,
12287       "disk_space_total": disk_space,
12288       "nics": self.nics,
12289       "required_nodes": self.required_nodes,
12290       "hypervisor": self.hypervisor,
12291       }
12292
12293     return request
12294
12295   def _AddRelocateInstance(self):
12296     """Add relocate instance data to allocator structure.
12297
12298     This in combination with _IAllocatorGetClusterData will create the
12299     correct structure needed as input for the allocator.
12300
12301     The checks for the completeness of the opcode must have already been
12302     done.
12303
12304     """
12305     instance = self.cfg.GetInstanceInfo(self.name)
12306     if instance is None:
12307       raise errors.ProgrammerError("Unknown instance '%s' passed to"
12308                                    " IAllocator" % self.name)
12309
12310     if instance.disk_template not in constants.DTS_MIRRORED:
12311       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12312                                  errors.ECODE_INVAL)
12313
12314     if instance.disk_template in constants.DTS_INT_MIRROR and \
12315         len(instance.secondary_nodes) != 1:
12316       raise errors.OpPrereqError("Instance has not exactly one secondary node",
12317                                  errors.ECODE_STATE)
12318
12319     self.required_nodes = 1
12320     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12321     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12322
12323     request = {
12324       "name": self.name,
12325       "disk_space_total": disk_space,
12326       "required_nodes": self.required_nodes,
12327       "relocate_from": self.relocate_from,
12328       }
12329     return request
12330
12331   def _AddEvacuateNodes(self):
12332     """Add evacuate nodes data to allocator structure.
12333
12334     """
12335     request = {
12336       "evac_nodes": self.evac_nodes
12337       }
12338     return request
12339
12340   def _AddNodeEvacuate(self):
12341     """Get data for node-evacuate requests.
12342
12343     """
12344     return {
12345       "instances": self.instances,
12346       "evac_mode": self.evac_mode,
12347       }
12348
12349   def _AddChangeGroup(self):
12350     """Get data for node-evacuate requests.
12351
12352     """
12353     return {
12354       "instances": self.instances,
12355       "target_groups": self.target_groups,
12356       }
12357
12358   def _BuildInputData(self, fn, keydata):
12359     """Build input data structures.
12360
12361     """
12362     self._ComputeClusterData()
12363
12364     request = fn()
12365     request["type"] = self.mode
12366     for keyname, keytype in keydata:
12367       if keyname not in request:
12368         raise errors.ProgrammerError("Request parameter %s is missing" %
12369                                      keyname)
12370       val = request[keyname]
12371       if not keytype(val):
12372         raise errors.ProgrammerError("Request parameter %s doesn't pass"
12373                                      " validation, value %s, expected"
12374                                      " type %s" % (keyname, val, keytype))
12375     self.in_data["request"] = request
12376
12377     self.in_text = serializer.Dump(self.in_data)
12378
12379   _STRING_LIST = ht.TListOf(ht.TString)
12380   _JOBSET_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12381      # pylint: disable-msg=E1101
12382      # Class '...' has no 'OP_ID' member
12383      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12384                           opcodes.OpInstanceMigrate.OP_ID,
12385                           opcodes.OpInstanceReplaceDisks.OP_ID])
12386      })))
12387   _MODE_DATA = {
12388     constants.IALLOCATOR_MODE_ALLOC:
12389       (_AddNewInstance,
12390        [
12391         ("name", ht.TString),
12392         ("memory", ht.TInt),
12393         ("disks", ht.TListOf(ht.TDict)),
12394         ("disk_template", ht.TString),
12395         ("os", ht.TString),
12396         ("tags", _STRING_LIST),
12397         ("nics", ht.TListOf(ht.TDict)),
12398         ("vcpus", ht.TInt),
12399         ("hypervisor", ht.TString),
12400         ], ht.TList),
12401     constants.IALLOCATOR_MODE_RELOC:
12402       (_AddRelocateInstance,
12403        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12404        ht.TList),
12405     constants.IALLOCATOR_MODE_MEVAC:
12406       (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12407        ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12408      constants.IALLOCATOR_MODE_NODE_EVAC:
12409       (_AddNodeEvacuate, [
12410         ("instances", _STRING_LIST),
12411         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12412         ], _JOBSET_LIST),
12413      constants.IALLOCATOR_MODE_CHG_GROUP:
12414       (_AddChangeGroup, [
12415         ("instances", _STRING_LIST),
12416         ("target_groups", _STRING_LIST),
12417         ], _JOBSET_LIST),
12418     }
12419
12420   def Run(self, name, validate=True, call_fn=None):
12421     """Run an instance allocator and return the results.
12422
12423     """
12424     if call_fn is None:
12425       call_fn = self.rpc.call_iallocator_runner
12426
12427     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12428     result.Raise("Failure while running the iallocator script")
12429
12430     self.out_text = result.payload
12431     if validate:
12432       self._ValidateResult()
12433
12434   def _ValidateResult(self):
12435     """Process the allocator results.
12436
12437     This will process and if successful save the result in
12438     self.out_data and the other parameters.
12439
12440     """
12441     try:
12442       rdict = serializer.Load(self.out_text)
12443     except Exception, err:
12444       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12445
12446     if not isinstance(rdict, dict):
12447       raise errors.OpExecError("Can't parse iallocator results: not a dict")
12448
12449     # TODO: remove backwards compatiblity in later versions
12450     if "nodes" in rdict and "result" not in rdict:
12451       rdict["result"] = rdict["nodes"]
12452       del rdict["nodes"]
12453
12454     for key in "success", "info", "result":
12455       if key not in rdict:
12456         raise errors.OpExecError("Can't parse iallocator results:"
12457                                  " missing key '%s'" % key)
12458       setattr(self, key, rdict[key])
12459
12460     if not self._result_check(self.result):
12461       raise errors.OpExecError("Iallocator returned invalid result,"
12462                                " expected %s, got %s" %
12463                                (self._result_check, self.result),
12464                                errors.ECODE_INVAL)
12465
12466     if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12467                      constants.IALLOCATOR_MODE_MEVAC):
12468       node2group = dict((name, ndata["group"])
12469                         for (name, ndata) in self.in_data["nodes"].items())
12470
12471       fn = compat.partial(self._NodesToGroups, node2group,
12472                           self.in_data["nodegroups"])
12473
12474       if self.mode == constants.IALLOCATOR_MODE_RELOC:
12475         assert self.relocate_from is not None
12476         assert self.required_nodes == 1
12477
12478         request_groups = fn(self.relocate_from)
12479         result_groups = fn(rdict["result"])
12480
12481         if result_groups != request_groups:
12482           raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12483                                    " differ from original groups (%s)" %
12484                                    (utils.CommaJoin(result_groups),
12485                                     utils.CommaJoin(request_groups)))
12486       elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12487         request_groups = fn(self.evac_nodes)
12488         for (instance_name, secnode) in self.result:
12489           result_groups = fn([secnode])
12490           if result_groups != request_groups:
12491             raise errors.OpExecError("Iallocator returned new secondary node"
12492                                      " '%s' (group '%s') for instance '%s'"
12493                                      " which is not in original group '%s'" %
12494                                      (secnode, utils.CommaJoin(result_groups),
12495                                       instance_name,
12496                                       utils.CommaJoin(request_groups)))
12497       else:
12498         raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12499
12500     self.out_data = rdict
12501
12502   @staticmethod
12503   def _NodesToGroups(node2group, groups, nodes):
12504     """Returns a list of unique group names for a list of nodes.
12505
12506     @type node2group: dict
12507     @param node2group: Map from node name to group UUID
12508     @type groups: dict
12509     @param groups: Group information
12510     @type nodes: list
12511     @param nodes: Node names
12512
12513     """
12514     result = set()
12515
12516     for node in nodes:
12517       try:
12518         group_uuid = node2group[node]
12519       except KeyError:
12520         # Ignore unknown node
12521         pass
12522       else:
12523         try:
12524           group = groups[group_uuid]
12525         except KeyError:
12526           # Can't find group, let's use UUID
12527           group_name = group_uuid
12528         else:
12529           group_name = group["name"]
12530
12531         result.add(group_name)
12532
12533     return sorted(result)
12534
12535
12536 class LUTestAllocator(NoHooksLU):
12537   """Run allocator tests.
12538
12539   This LU runs the allocator tests
12540
12541   """
12542   def CheckPrereq(self):
12543     """Check prerequisites.
12544
12545     This checks the opcode parameters depending on the director and mode test.
12546
12547     """
12548     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12549       for attr in ["memory", "disks", "disk_template",
12550                    "os", "tags", "nics", "vcpus"]:
12551         if not hasattr(self.op, attr):
12552           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12553                                      attr, errors.ECODE_INVAL)
12554       iname = self.cfg.ExpandInstanceName(self.op.name)
12555       if iname is not None:
12556         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12557                                    iname, errors.ECODE_EXISTS)
12558       if not isinstance(self.op.nics, list):
12559         raise errors.OpPrereqError("Invalid parameter 'nics'",
12560                                    errors.ECODE_INVAL)
12561       if not isinstance(self.op.disks, list):
12562         raise errors.OpPrereqError("Invalid parameter 'disks'",
12563                                    errors.ECODE_INVAL)
12564       for row in self.op.disks:
12565         if (not isinstance(row, dict) or
12566             constants.IDISK_SIZE not in row or
12567             not isinstance(row[constants.IDISK_SIZE], int) or
12568             constants.IDISK_MODE not in row or
12569             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
12570           raise errors.OpPrereqError("Invalid contents of the 'disks'"
12571                                      " parameter", errors.ECODE_INVAL)
12572       if self.op.hypervisor is None:
12573         self.op.hypervisor = self.cfg.GetHypervisorType()
12574     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12575       fname = _ExpandInstanceName(self.cfg, self.op.name)
12576       self.op.name = fname
12577       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12578     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12579       if not hasattr(self.op, "evac_nodes"):
12580         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12581                                    " opcode input", errors.ECODE_INVAL)
12582     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
12583                           constants.IALLOCATOR_MODE_NODE_EVAC):
12584       if not self.op.instances:
12585         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
12586       self.op.instances = _GetWantedInstances(self, self.op.instances)
12587     else:
12588       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12589                                  self.op.mode, errors.ECODE_INVAL)
12590
12591     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12592       if self.op.allocator is None:
12593         raise errors.OpPrereqError("Missing allocator name",
12594                                    errors.ECODE_INVAL)
12595     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12596       raise errors.OpPrereqError("Wrong allocator test '%s'" %
12597                                  self.op.direction, errors.ECODE_INVAL)
12598
12599   def Exec(self, feedback_fn):
12600     """Run the allocator test.
12601
12602     """
12603     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12604       ial = IAllocator(self.cfg, self.rpc,
12605                        mode=self.op.mode,
12606                        name=self.op.name,
12607                        memory=self.op.memory,
12608                        disks=self.op.disks,
12609                        disk_template=self.op.disk_template,
12610                        os=self.op.os,
12611                        tags=self.op.tags,
12612                        nics=self.op.nics,
12613                        vcpus=self.op.vcpus,
12614                        hypervisor=self.op.hypervisor,
12615                        )
12616     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12617       ial = IAllocator(self.cfg, self.rpc,
12618                        mode=self.op.mode,
12619                        name=self.op.name,
12620                        relocate_from=list(self.relocate_from),
12621                        )
12622     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12623       ial = IAllocator(self.cfg, self.rpc,
12624                        mode=self.op.mode,
12625                        evac_nodes=self.op.evac_nodes)
12626     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
12627       ial = IAllocator(self.cfg, self.rpc,
12628                        mode=self.op.mode,
12629                        instances=self.op.instances,
12630                        target_groups=self.op.target_groups)
12631     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
12632       ial = IAllocator(self.cfg, self.rpc,
12633                        mode=self.op.mode,
12634                        instances=self.op.instances,
12635                        evac_mode=self.op.evac_mode)
12636     else:
12637       raise errors.ProgrammerError("Uncatched mode %s in"
12638                                    " LUTestAllocator.Exec", self.op.mode)
12639
12640     if self.op.direction == constants.IALLOCATOR_DIR_IN:
12641       result = ial.in_text
12642     else:
12643       ial.Run(self.op.allocator, validate=False)
12644       result = ial.out_text
12645     return result
12646
12647
12648 #: Query type implementations
12649 _QUERY_IMPL = {
12650   constants.QR_INSTANCE: _InstanceQuery,
12651   constants.QR_NODE: _NodeQuery,
12652   constants.QR_GROUP: _GroupQuery,
12653   constants.QR_OS: _OsQuery,
12654   }
12655
12656 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12657
12658
12659 def _GetQueryImplementation(name):
12660   """Returns the implemtnation for a query type.
12661
12662   @param name: Query type, must be one of L{constants.QR_VIA_OP}
12663
12664   """
12665   try:
12666     return _QUERY_IMPL[name]
12667   except KeyError:
12668     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12669                                errors.ECODE_INVAL)