code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 class ResultWithJobs:
  78   """Data container for LU results with jobs.
  79
  80   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  81   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  82   contained in the C{jobs} attribute and include the job IDs in the opcode
  83   result.
  84
  85   """
  86   def __init__(self, jobs, **kwargs):
  87     """Initializes this class.
  88
  89     Additional return values can be specified as keyword arguments.
  90
  91     @type jobs: list of lists of L{opcode.OpCode}
  92     @param jobs: A list of lists of opcode objects
  93
  94     """
  95     self.jobs = jobs
  96     self.other = kwargs
  97
  98
  99 class LogicalUnit(object):
 100   """Logical Unit base class.
 101
 102   Subclasses must follow these rules:
 103     - implement ExpandNames
 104     - implement CheckPrereq (except when tasklets are used)
 105     - implement Exec (except when tasklets are used)
 106     - implement BuildHooksEnv
 107     - implement BuildHooksNodes
 108     - redefine HPATH and HTYPE
 109     - optionally redefine their run requirements:
 110         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 111
 112   Note that all commands require root permissions.
 113
 114   @ivar dry_run_result: the value (if any) that will be returned to the caller
 115       in dry-run mode (signalled by opcode dry_run parameter)
 116
 117   """
 118   HPATH = None
 119   HTYPE = None
 120   REQ_BGL = True
 121
 122   def __init__(self, processor, op, context, rpc):
 123     """Constructor for LogicalUnit.
 124
 125     This needs to be overridden in derived classes in order to check op
 126     validity.
 127
 128     """
 129     self.proc = processor
 130     self.op = op
 131     self.cfg = context.cfg
 132     self.context = context
 133     self.rpc = rpc
 134     # Dicts used to declare locking needs to mcpu
 135     self.needed_locks = None
 136     self.acquired_locks = {}
 137     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 138     self.add_locks = {}
 139     self.remove_locks = {}
 140     # Used to force good behavior when calling helper functions
 141     self.recalculate_locks = {}
 142     # logging
 143     self.Log = processor.Log # pylint: disable-msg=C0103
 144     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 145     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 146     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 147     # support for dry-run
 148     self.dry_run_result = None
 149     # support for generic debug attribute
 150     if (not hasattr(self.op, "debug_level") or
 151         not isinstance(self.op.debug_level, int)):
 152       self.op.debug_level = 0
 153
 154     # Tasklets
 155     self.tasklets = None
 156
 157     # Validate opcode parameters and set defaults
 158     self.op.Validate(True)
 159
 160     self.CheckArguments()
 161
 162   def CheckArguments(self):
 163     """Check syntactic validity for the opcode arguments.
 164
 165     This method is for doing a simple syntactic check and ensure
 166     validity of opcode parameters, without any cluster-related
 167     checks. While the same can be accomplished in ExpandNames and/or
 168     CheckPrereq, doing these separate is better because:
 169
 170       - ExpandNames is left as as purely a lock-related function
 171       - CheckPrereq is run after we have acquired locks (and possible
 172         waited for them)
 173
 174     The function is allowed to change the self.op attribute so that
 175     later methods can no longer worry about missing parameters.
 176
 177     """
 178     pass
 179
 180   def ExpandNames(self):
 181     """Expand names for this LU.
 182
 183     This method is called before starting to execute the opcode, and it should
 184     update all the parameters of the opcode to their canonical form (e.g. a
 185     short node name must be fully expanded after this method has successfully
 186     completed). This way locking, hooks, logging, etc. can work correctly.
 187
 188     LUs which implement this method must also populate the self.needed_locks
 189     member, as a dict with lock levels as keys, and a list of needed lock names
 190     as values. Rules:
 191
 192       - use an empty dict if you don't need any lock
 193       - if you don't need any lock at a particular level omit that level
 194       - don't put anything for the BGL level
 195       - if you want all locks at a level use locking.ALL_SET as a value
 196
 197     If you need to share locks (rather than acquire them exclusively) at one
 198     level you can modify self.share_locks, setting a true value (usually 1) for
 199     that level. By default locks are not shared.
 200
 201     This function can also define a list of tasklets, which then will be
 202     executed in order instead of the usual LU-level CheckPrereq and Exec
 203     functions, if those are not defined by the LU.
 204
 205     Examples::
 206
 207       # Acquire all nodes and one instance
 208       self.needed_locks = {
 209         locking.LEVEL_NODE: locking.ALL_SET,
 210         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 211       }
 212       # Acquire just two nodes
 213       self.needed_locks = {
 214         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 215       }
 216       # Acquire no locks
 217       self.needed_locks = {} # No, you can't leave it to the default value None
 218
 219     """
 220     # The implementation of this method is mandatory only if the new LU is
 221     # concurrent, so that old LUs don't need to be changed all at the same
 222     # time.
 223     if self.REQ_BGL:
 224       self.needed_locks = {} # Exclusive LUs don't need locks.
 225     else:
 226       raise NotImplementedError
 227
 228   def DeclareLocks(self, level):
 229     """Declare LU locking needs for a level
 230
 231     While most LUs can just declare their locking needs at ExpandNames time,
 232     sometimes there's the need to calculate some locks after having acquired
 233     the ones before. This function is called just before acquiring locks at a
 234     particular level, but after acquiring the ones at lower levels, and permits
 235     such calculations. It can be used to modify self.needed_locks, and by
 236     default it does nothing.
 237
 238     This function is only called if you have something already set in
 239     self.needed_locks for the level.
 240
 241     @param level: Locking level which is going to be locked
 242     @type level: member of ganeti.locking.LEVELS
 243
 244     """
 245
 246   def CheckPrereq(self):
 247     """Check prerequisites for this LU.
 248
 249     This method should check that the prerequisites for the execution
 250     of this LU are fulfilled. It can do internode communication, but
 251     it should be idempotent - no cluster or system changes are
 252     allowed.
 253
 254     The method should raise errors.OpPrereqError in case something is
 255     not fulfilled. Its return value is ignored.
 256
 257     This method should also update all the parameters of the opcode to
 258     their canonical form if it hasn't been done by ExpandNames before.
 259
 260     """
 261     if self.tasklets is not None:
 262       for (idx, tl) in enumerate(self.tasklets):
 263         logging.debug("Checking prerequisites for tasklet %s/%s",
 264                       idx + 1, len(self.tasklets))
 265         tl.CheckPrereq()
 266     else:
 267       pass
 268
 269   def Exec(self, feedback_fn):
 270     """Execute the LU.
 271
 272     This method should implement the actual work. It should raise
 273     errors.OpExecError for failures that are somewhat dealt with in
 274     code, or expected.
 275
 276     """
 277     if self.tasklets is not None:
 278       for (idx, tl) in enumerate(self.tasklets):
 279         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 280         tl.Exec(feedback_fn)
 281     else:
 282       raise NotImplementedError
 283
 284   def BuildHooksEnv(self):
 285     """Build hooks environment for this LU.
 286
 287     @rtype: dict
 288     @return: Dictionary containing the environment that will be used for
 289       running the hooks for this LU. The keys of the dict must not be prefixed
 290       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 291       will extend the environment with additional variables. If no environment
 292       should be defined, an empty dictionary should be returned (not C{None}).
 293     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 294       will not be called.
 295
 296     """
 297     raise NotImplementedError
 298
 299   def BuildHooksNodes(self):
 300     """Build list of nodes to run LU's hooks.
 301
 302     @rtype: tuple; (list, list)
 303     @return: Tuple containing a list of node names on which the hook
 304       should run before the execution and a list of node names on which the
 305       hook should run after the execution. No nodes should be returned as an
 306       empty list (and not None).
 307     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 308       will not be called.
 309
 310     """
 311     raise NotImplementedError
 312
 313   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 314     """Notify the LU about the results of its hooks.
 315
 316     This method is called every time a hooks phase is executed, and notifies
 317     the Logical Unit about the hooks' result. The LU can then use it to alter
 318     its result based on the hooks.  By default the method does nothing and the
 319     previous result is passed back unchanged but any LU can define it if it
 320     wants to use the local cluster hook-scripts somehow.
 321
 322     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 323         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 324     @param hook_results: the results of the multi-node hooks rpc call
 325     @param feedback_fn: function used send feedback back to the caller
 326     @param lu_result: the previous Exec result this LU had, or None
 327         in the PRE phase
 328     @return: the new Exec result, based on the previous result
 329         and hook results
 330
 331     """
 332     # API must be kept, thus we ignore the unused argument and could
 333     # be a function warnings
 334     # pylint: disable-msg=W0613,R0201
 335     return lu_result
 336
 337   def _ExpandAndLockInstance(self):
 338     """Helper function to expand and lock an instance.
 339
 340     Many LUs that work on an instance take its name in self.op.instance_name
 341     and need to expand it and then declare the expanded name for locking. This
 342     function does it, and then updates self.op.instance_name to the expanded
 343     name. It also initializes needed_locks as a dict, if this hasn't been done
 344     before.
 345
 346     """
 347     if self.needed_locks is None:
 348       self.needed_locks = {}
 349     else:
 350       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 351         "_ExpandAndLockInstance called with instance-level locks set"
 352     self.op.instance_name = _ExpandInstanceName(self.cfg,
 353                                                 self.op.instance_name)
 354     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 355
 356   def _LockInstancesNodes(self, primary_only=False):
 357     """Helper function to declare instances' nodes for locking.
 358
 359     This function should be called after locking one or more instances to lock
 360     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 361     with all primary or secondary nodes for instances already locked and
 362     present in self.needed_locks[locking.LEVEL_INSTANCE].
 363
 364     It should be called from DeclareLocks, and for safety only works if
 365     self.recalculate_locks[locking.LEVEL_NODE] is set.
 366
 367     In the future it may grow parameters to just lock some instance's nodes, or
 368     to just lock primaries or secondary nodes, if needed.
 369
 370     If should be called in DeclareLocks in a way similar to::
 371
 372       if level == locking.LEVEL_NODE:
 373         self._LockInstancesNodes()
 374
 375     @type primary_only: boolean
 376     @param primary_only: only lock primary nodes of locked instances
 377
 378     """
 379     assert locking.LEVEL_NODE in self.recalculate_locks, \
 380       "_LockInstancesNodes helper function called with no nodes to recalculate"
 381
 382     # TODO: check if we're really been called with the instance locks held
 383
 384     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 385     # future we might want to have different behaviors depending on the value
 386     # of self.recalculate_locks[locking.LEVEL_NODE]
 387     wanted_nodes = []
 388     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 389       instance = self.context.cfg.GetInstanceInfo(instance_name)
 390       wanted_nodes.append(instance.primary_node)
 391       if not primary_only:
 392         wanted_nodes.extend(instance.secondary_nodes)
 393
 394     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 395       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 396     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 397       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 398
 399     del self.recalculate_locks[locking.LEVEL_NODE]
 400
 401
 402 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 403   """Simple LU which runs no hooks.
 404
 405   This LU is intended as a parent for other LogicalUnits which will
 406   run no hooks, in order to reduce duplicate code.
 407
 408   """
 409   HPATH = None
 410   HTYPE = None
 411
 412   def BuildHooksEnv(self):
 413     """Empty BuildHooksEnv for NoHooksLu.
 414
 415     This just raises an error.
 416
 417     """
 418     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 419
 420   def BuildHooksNodes(self):
 421     """Empty BuildHooksNodes for NoHooksLU.
 422
 423     """
 424     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 425
 426
 427 class Tasklet:
 428   """Tasklet base class.
 429
 430   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 431   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 432   tasklets know nothing about locks.
 433
 434   Subclasses must follow these rules:
 435     - Implement CheckPrereq
 436     - Implement Exec
 437
 438   """
 439   def __init__(self, lu):
 440     self.lu = lu
 441
 442     # Shortcuts
 443     self.cfg = lu.cfg
 444     self.rpc = lu.rpc
 445
 446   def CheckPrereq(self):
 447     """Check prerequisites for this tasklets.
 448
 449     This method should check whether the prerequisites for the execution of
 450     this tasklet are fulfilled. It can do internode communication, but it
 451     should be idempotent - no cluster or system changes are allowed.
 452
 453     The method should raise errors.OpPrereqError in case something is not
 454     fulfilled. Its return value is ignored.
 455
 456     This method should also update all parameters to their canonical form if it
 457     hasn't been done before.
 458
 459     """
 460     pass
 461
 462   def Exec(self, feedback_fn):
 463     """Execute the tasklet.
 464
 465     This method should implement the actual work. It should raise
 466     errors.OpExecError for failures that are somewhat dealt with in code, or
 467     expected.
 468
 469     """
 470     raise NotImplementedError
 471
 472
 473 class _QueryBase:
 474   """Base for query utility classes.
 475
 476   """
 477   #: Attribute holding field definitions
 478   FIELDS = None
 479
 480   def __init__(self, filter_, fields, use_locking):
 481     """Initializes this class.
 482
 483     """
 484     self.use_locking = use_locking
 485
 486     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 487                              namefield="name")
 488     self.requested_data = self.query.RequestedData()
 489     self.names = self.query.RequestedNames()
 490
 491     # Sort only if no names were requested
 492     self.sort_by_name = not self.names
 493
 494     self.do_locking = None
 495     self.wanted = None
 496
 497   def _GetNames(self, lu, all_names, lock_level):
 498     """Helper function to determine names asked for in the query.
 499
 500     """
 501     if self.do_locking:
 502       names = lu.acquired_locks[lock_level]
 503     else:
 504       names = all_names
 505
 506     if self.wanted == locking.ALL_SET:
 507       assert not self.names
 508       # caller didn't specify names, so ordering is not important
 509       return utils.NiceSort(names)
 510
 511     # caller specified names and we must keep the same order
 512     assert self.names
 513     assert not self.do_locking or lu.acquired_locks[lock_level]
 514
 515     missing = set(self.wanted).difference(names)
 516     if missing:
 517       raise errors.OpExecError("Some items were removed before retrieving"
 518                                " their data: %s" % missing)
 519
 520     # Return expanded names
 521     return self.wanted
 522
 523   def ExpandNames(self, lu):
 524     """Expand names for this query.
 525
 526     See L{LogicalUnit.ExpandNames}.
 527
 528     """
 529     raise NotImplementedError()
 530
 531   def DeclareLocks(self, lu, level):
 532     """Declare locks for this query.
 533
 534     See L{LogicalUnit.DeclareLocks}.
 535
 536     """
 537     raise NotImplementedError()
 538
 539   def _GetQueryData(self, lu):
 540     """Collects all data for this query.
 541
 542     @return: Query data object
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def NewStyleQuery(self, lu):
 548     """Collect data and execute query.
 549
 550     """
 551     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 552                                   sort_by_name=self.sort_by_name)
 553
 554   def OldStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return self.query.OldStyleQuery(self._GetQueryData(lu),
 559                                     sort_by_name=self.sort_by_name)
 560
 561
 562 def _GetWantedNodes(lu, nodes):
 563   """Returns list of checked and expanded node names.
 564
 565   @type lu: L{LogicalUnit}
 566   @param lu: the logical unit on whose behalf we execute
 567   @type nodes: list
 568   @param nodes: list of node names or None for all nodes
 569   @rtype: list
 570   @return: the list of nodes, sorted
 571   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 572
 573   """
 574   if nodes:
 575     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 576
 577   return utils.NiceSort(lu.cfg.GetNodeList())
 578
 579
 580 def _GetWantedInstances(lu, instances):
 581   """Returns list of checked and expanded instance names.
 582
 583   @type lu: L{LogicalUnit}
 584   @param lu: the logical unit on whose behalf we execute
 585   @type instances: list
 586   @param instances: list of instance names or None for all instances
 587   @rtype: list
 588   @return: the list of instances, sorted
 589   @raise errors.OpPrereqError: if the instances parameter is wrong type
 590   @raise errors.OpPrereqError: if any of the passed instances is not found
 591
 592   """
 593   if instances:
 594     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 595   else:
 596     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 597   return wanted
 598
 599
 600 def _GetUpdatedParams(old_params, update_dict,
 601                       use_default=True, use_none=False):
 602   """Return the new version of a parameter dictionary.
 603
 604   @type old_params: dict
 605   @param old_params: old parameters
 606   @type update_dict: dict
 607   @param update_dict: dict containing new parameter values, or
 608       constants.VALUE_DEFAULT to reset the parameter to its default
 609       value
 610   @param use_default: boolean
 611   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 612       values as 'to be deleted' values
 613   @param use_none: boolean
 614   @type use_none: whether to recognise C{None} values as 'to be
 615       deleted' values
 616   @rtype: dict
 617   @return: the new parameter dictionary
 618
 619   """
 620   params_copy = copy.deepcopy(old_params)
 621   for key, val in update_dict.iteritems():
 622     if ((use_default and val == constants.VALUE_DEFAULT) or
 623         (use_none and val is None)):
 624       try:
 625         del params_copy[key]
 626       except KeyError:
 627         pass
 628     else:
 629       params_copy[key] = val
 630   return params_copy
 631
 632
 633 def _ReleaseLocks(lu, level, names=None, keep=None):
 634   """Releases locks owned by an LU.
 635
 636   @type lu: L{LogicalUnit}
 637   @param level: Lock level
 638   @type names: list or None
 639   @param names: Names of locks to release
 640   @type keep: list or None
 641   @param keep: Names of locks to retain
 642
 643   """
 644   assert not (keep is not None and names is not None), \
 645          "Only one of the 'names' and the 'keep' parameters can be given"
 646
 647   if names is not None:
 648     should_release = names.__contains__
 649   elif keep:
 650     should_release = lambda name: name not in keep
 651   else:
 652     should_release = None
 653
 654   if should_release:
 655     retain = []
 656     release = []
 657
 658     # Determine which locks to release
 659     for name in lu.acquired_locks[level]:
 660       if should_release(name):
 661         release.append(name)
 662       else:
 663         retain.append(name)
 664
 665     assert len(lu.acquired_locks[level]) == (len(retain) + len(release))
 666
 667     # Release just some locks
 668     lu.context.glm.release(level, names=release)
 669     lu.acquired_locks[level] = retain
 670
 671     assert frozenset(lu.context.glm.list_owned(level)) == frozenset(retain)
 672   else:
 673     # Release everything
 674     lu.context.glm.release(level)
 675     del lu.acquired_locks[level]
 676
 677     assert not lu.context.glm.list_owned(level), "No locks should be owned"
 678
 679
 680 def _RunPostHook(lu, node_name):
 681   """Runs the post-hook for an opcode on a single node.
 682
 683   """
 684   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 685   try:
 686     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 687   except:
 688     # pylint: disable-msg=W0702
 689     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 690
 691
 692 def _CheckOutputFields(static, dynamic, selected):
 693   """Checks whether all selected fields are valid.
 694
 695   @type static: L{utils.FieldSet}
 696   @param static: static fields set
 697   @type dynamic: L{utils.FieldSet}
 698   @param dynamic: dynamic fields set
 699
 700   """
 701   f = utils.FieldSet()
 702   f.Extend(static)
 703   f.Extend(dynamic)
 704
 705   delta = f.NonMatching(selected)
 706   if delta:
 707     raise errors.OpPrereqError("Unknown output fields selected: %s"
 708                                % ",".join(delta), errors.ECODE_INVAL)
 709
 710
 711 def _CheckGlobalHvParams(params):
 712   """Validates that given hypervisor params are not global ones.
 713
 714   This will ensure that instances don't get customised versions of
 715   global params.
 716
 717   """
 718   used_globals = constants.HVC_GLOBALS.intersection(params)
 719   if used_globals:
 720     msg = ("The following hypervisor parameters are global and cannot"
 721            " be customized at instance level, please modify them at"
 722            " cluster level: %s" % utils.CommaJoin(used_globals))
 723     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 724
 725
 726 def _CheckNodeOnline(lu, node, msg=None):
 727   """Ensure that a given node is online.
 728
 729   @param lu: the LU on behalf of which we make the check
 730   @param node: the node to check
 731   @param msg: if passed, should be a message to replace the default one
 732   @raise errors.OpPrereqError: if the node is offline
 733
 734   """
 735   if msg is None:
 736     msg = "Can't use offline node"
 737   if lu.cfg.GetNodeInfo(node).offline:
 738     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 739
 740
 741 def _CheckNodeNotDrained(lu, node):
 742   """Ensure that a given node is not drained.
 743
 744   @param lu: the LU on behalf of which we make the check
 745   @param node: the node to check
 746   @raise errors.OpPrereqError: if the node is drained
 747
 748   """
 749   if lu.cfg.GetNodeInfo(node).drained:
 750     raise errors.OpPrereqError("Can't use drained node %s" % node,
 751                                errors.ECODE_STATE)
 752
 753
 754 def _CheckNodeVmCapable(lu, node):
 755   """Ensure that a given node is vm capable.
 756
 757   @param lu: the LU on behalf of which we make the check
 758   @param node: the node to check
 759   @raise errors.OpPrereqError: if the node is not vm capable
 760
 761   """
 762   if not lu.cfg.GetNodeInfo(node).vm_capable:
 763     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 764                                errors.ECODE_STATE)
 765
 766
 767 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 768   """Ensure that a node supports a given OS.
 769
 770   @param lu: the LU on behalf of which we make the check
 771   @param node: the node to check
 772   @param os_name: the OS to query about
 773   @param force_variant: whether to ignore variant errors
 774   @raise errors.OpPrereqError: if the node is not supporting the OS
 775
 776   """
 777   result = lu.rpc.call_os_get(node, os_name)
 778   result.Raise("OS '%s' not in supported OS list for node %s" %
 779                (os_name, node),
 780                prereq=True, ecode=errors.ECODE_INVAL)
 781   if not force_variant:
 782     _CheckOSVariant(result.payload, os_name)
 783
 784
 785 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 786   """Ensure that a node has the given secondary ip.
 787
 788   @type lu: L{LogicalUnit}
 789   @param lu: the LU on behalf of which we make the check
 790   @type node: string
 791   @param node: the node to check
 792   @type secondary_ip: string
 793   @param secondary_ip: the ip to check
 794   @type prereq: boolean
 795   @param prereq: whether to throw a prerequisite or an execute error
 796   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 797   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 798
 799   """
 800   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 801   result.Raise("Failure checking secondary ip on node %s" % node,
 802                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 803   if not result.payload:
 804     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 805            " please fix and re-run this command" % secondary_ip)
 806     if prereq:
 807       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 808     else:
 809       raise errors.OpExecError(msg)
 810
 811
 812 def _GetClusterDomainSecret():
 813   """Reads the cluster domain secret.
 814
 815   """
 816   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 817                                strict=True)
 818
 819
 820 def _CheckInstanceDown(lu, instance, reason):
 821   """Ensure that an instance is not running."""
 822   if instance.admin_up:
 823     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 824                                (instance.name, reason), errors.ECODE_STATE)
 825
 826   pnode = instance.primary_node
 827   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 828   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 829               prereq=True, ecode=errors.ECODE_ENVIRON)
 830
 831   if instance.name in ins_l.payload:
 832     raise errors.OpPrereqError("Instance %s is running, %s" %
 833                                (instance.name, reason), errors.ECODE_STATE)
 834
 835
 836 def _ExpandItemName(fn, name, kind):
 837   """Expand an item name.
 838
 839   @param fn: the function to use for expansion
 840   @param name: requested item name
 841   @param kind: text description ('Node' or 'Instance')
 842   @return: the resolved (full) name
 843   @raise errors.OpPrereqError: if the item is not found
 844
 845   """
 846   full_name = fn(name)
 847   if full_name is None:
 848     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 849                                errors.ECODE_NOENT)
 850   return full_name
 851
 852
 853 def _ExpandNodeName(cfg, name):
 854   """Wrapper over L{_ExpandItemName} for nodes."""
 855   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 856
 857
 858 def _ExpandInstanceName(cfg, name):
 859   """Wrapper over L{_ExpandItemName} for instance."""
 860   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 861
 862
 863 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 864                           memory, vcpus, nics, disk_template, disks,
 865                           bep, hvp, hypervisor_name):
 866   """Builds instance related env variables for hooks
 867
 868   This builds the hook environment from individual variables.
 869
 870   @type name: string
 871   @param name: the name of the instance
 872   @type primary_node: string
 873   @param primary_node: the name of the instance's primary node
 874   @type secondary_nodes: list
 875   @param secondary_nodes: list of secondary nodes as strings
 876   @type os_type: string
 877   @param os_type: the name of the instance's OS
 878   @type status: boolean
 879   @param status: the should_run status of the instance
 880   @type memory: string
 881   @param memory: the memory size of the instance
 882   @type vcpus: string
 883   @param vcpus: the count of VCPUs the instance has
 884   @type nics: list
 885   @param nics: list of tuples (ip, mac, mode, link) representing
 886       the NICs the instance has
 887   @type disk_template: string
 888   @param disk_template: the disk template of the instance
 889   @type disks: list
 890   @param disks: the list of (size, mode) pairs
 891   @type bep: dict
 892   @param bep: the backend parameters for the instance
 893   @type hvp: dict
 894   @param hvp: the hypervisor parameters for the instance
 895   @type hypervisor_name: string
 896   @param hypervisor_name: the hypervisor for the instance
 897   @rtype: dict
 898   @return: the hook environment for this instance
 899
 900   """
 901   if status:
 902     str_status = "up"
 903   else:
 904     str_status = "down"
 905   env = {
 906     "OP_TARGET": name,
 907     "INSTANCE_NAME": name,
 908     "INSTANCE_PRIMARY": primary_node,
 909     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 910     "INSTANCE_OS_TYPE": os_type,
 911     "INSTANCE_STATUS": str_status,
 912     "INSTANCE_MEMORY": memory,
 913     "INSTANCE_VCPUS": vcpus,
 914     "INSTANCE_DISK_TEMPLATE": disk_template,
 915     "INSTANCE_HYPERVISOR": hypervisor_name,
 916   }
 917
 918   if nics:
 919     nic_count = len(nics)
 920     for idx, (ip, mac, mode, link) in enumerate(nics):
 921       if ip is None:
 922         ip = ""
 923       env["INSTANCE_NIC%d_IP" % idx] = ip
 924       env["INSTANCE_NIC%d_MAC" % idx] = mac
 925       env["INSTANCE_NIC%d_MODE" % idx] = mode
 926       env["INSTANCE_NIC%d_LINK" % idx] = link
 927       if mode == constants.NIC_MODE_BRIDGED:
 928         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 929   else:
 930     nic_count = 0
 931
 932   env["INSTANCE_NIC_COUNT"] = nic_count
 933
 934   if disks:
 935     disk_count = len(disks)
 936     for idx, (size, mode) in enumerate(disks):
 937       env["INSTANCE_DISK%d_SIZE" % idx] = size
 938       env["INSTANCE_DISK%d_MODE" % idx] = mode
 939   else:
 940     disk_count = 0
 941
 942   env["INSTANCE_DISK_COUNT"] = disk_count
 943
 944   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 945     for key, value in source.items():
 946       env["INSTANCE_%s_%s" % (kind, key)] = value
 947
 948   return env
 949
 950
 951 def _NICListToTuple(lu, nics):
 952   """Build a list of nic information tuples.
 953
 954   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 955   value in LUInstanceQueryData.
 956
 957   @type lu:  L{LogicalUnit}
 958   @param lu: the logical unit on whose behalf we execute
 959   @type nics: list of L{objects.NIC}
 960   @param nics: list of nics to convert to hooks tuples
 961
 962   """
 963   hooks_nics = []
 964   cluster = lu.cfg.GetClusterInfo()
 965   for nic in nics:
 966     ip = nic.ip
 967     mac = nic.mac
 968     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 969     mode = filled_params[constants.NIC_MODE]
 970     link = filled_params[constants.NIC_LINK]
 971     hooks_nics.append((ip, mac, mode, link))
 972   return hooks_nics
 973
 974
 975 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 976   """Builds instance related env variables for hooks from an object.
 977
 978   @type lu: L{LogicalUnit}
 979   @param lu: the logical unit on whose behalf we execute
 980   @type instance: L{objects.Instance}
 981   @param instance: the instance for which we should build the
 982       environment
 983   @type override: dict
 984   @param override: dictionary with key/values that will override
 985       our values
 986   @rtype: dict
 987   @return: the hook environment dictionary
 988
 989   """
 990   cluster = lu.cfg.GetClusterInfo()
 991   bep = cluster.FillBE(instance)
 992   hvp = cluster.FillHV(instance)
 993   args = {
 994     'name': instance.name,
 995     'primary_node': instance.primary_node,
 996     'secondary_nodes': instance.secondary_nodes,
 997     'os_type': instance.os,
 998     'status': instance.admin_up,
 999     'memory': bep[constants.BE_MEMORY],
1000     'vcpus': bep[constants.BE_VCPUS],
1001     'nics': _NICListToTuple(lu, instance.nics),
1002     'disk_template': instance.disk_template,
1003     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1004     'bep': bep,
1005     'hvp': hvp,
1006     'hypervisor_name': instance.hypervisor,
1007   }
1008   if override:
1009     args.update(override)
1010   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1011
1012
1013 def _AdjustCandidatePool(lu, exceptions):
1014   """Adjust the candidate pool after node operations.
1015
1016   """
1017   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1018   if mod_list:
1019     lu.LogInfo("Promoted nodes to master candidate role: %s",
1020                utils.CommaJoin(node.name for node in mod_list))
1021     for name in mod_list:
1022       lu.context.ReaddNode(name)
1023   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1024   if mc_now > mc_max:
1025     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1026                (mc_now, mc_max))
1027
1028
1029 def _DecideSelfPromotion(lu, exceptions=None):
1030   """Decide whether I should promote myself as a master candidate.
1031
1032   """
1033   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1034   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1035   # the new node will increase mc_max with one, so:
1036   mc_should = min(mc_should + 1, cp_size)
1037   return mc_now < mc_should
1038
1039
1040 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1041   """Check that the brigdes needed by a list of nics exist.
1042
1043   """
1044   cluster = lu.cfg.GetClusterInfo()
1045   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1046   brlist = [params[constants.NIC_LINK] for params in paramslist
1047             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1048   if brlist:
1049     result = lu.rpc.call_bridges_exist(target_node, brlist)
1050     result.Raise("Error checking bridges on destination node '%s'" %
1051                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1052
1053
1054 def _CheckInstanceBridgesExist(lu, instance, node=None):
1055   """Check that the brigdes needed by an instance exist.
1056
1057   """
1058   if node is None:
1059     node = instance.primary_node
1060   _CheckNicsBridgesExist(lu, instance.nics, node)
1061
1062
1063 def _CheckOSVariant(os_obj, name):
1064   """Check whether an OS name conforms to the os variants specification.
1065
1066   @type os_obj: L{objects.OS}
1067   @param os_obj: OS object to check
1068   @type name: string
1069   @param name: OS name passed by the user, to check for validity
1070
1071   """
1072   if not os_obj.supported_variants:
1073     return
1074   variant = objects.OS.GetVariant(name)
1075   if not variant:
1076     raise errors.OpPrereqError("OS name must include a variant",
1077                                errors.ECODE_INVAL)
1078
1079   if variant not in os_obj.supported_variants:
1080     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1081
1082
1083 def _GetNodeInstancesInner(cfg, fn):
1084   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1085
1086
1087 def _GetNodeInstances(cfg, node_name):
1088   """Returns a list of all primary and secondary instances on a node.
1089
1090   """
1091
1092   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1093
1094
1095 def _GetNodePrimaryInstances(cfg, node_name):
1096   """Returns primary instances on a node.
1097
1098   """
1099   return _GetNodeInstancesInner(cfg,
1100                                 lambda inst: node_name == inst.primary_node)
1101
1102
1103 def _GetNodeSecondaryInstances(cfg, node_name):
1104   """Returns secondary instances on a node.
1105
1106   """
1107   return _GetNodeInstancesInner(cfg,
1108                                 lambda inst: node_name in inst.secondary_nodes)
1109
1110
1111 def _GetStorageTypeArgs(cfg, storage_type):
1112   """Returns the arguments for a storage type.
1113
1114   """
1115   # Special case for file storage
1116   if storage_type == constants.ST_FILE:
1117     # storage.FileStorage wants a list of storage directories
1118     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1119
1120   return []
1121
1122
1123 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1124   faulty = []
1125
1126   for dev in instance.disks:
1127     cfg.SetDiskID(dev, node_name)
1128
1129   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1130   result.Raise("Failed to get disk status from node %s" % node_name,
1131                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1132
1133   for idx, bdev_status in enumerate(result.payload):
1134     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1135       faulty.append(idx)
1136
1137   return faulty
1138
1139
1140 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1141   """Check the sanity of iallocator and node arguments and use the
1142   cluster-wide iallocator if appropriate.
1143
1144   Check that at most one of (iallocator, node) is specified. If none is
1145   specified, then the LU's opcode's iallocator slot is filled with the
1146   cluster-wide default iallocator.
1147
1148   @type iallocator_slot: string
1149   @param iallocator_slot: the name of the opcode iallocator slot
1150   @type node_slot: string
1151   @param node_slot: the name of the opcode target node slot
1152
1153   """
1154   node = getattr(lu.op, node_slot, None)
1155   iallocator = getattr(lu.op, iallocator_slot, None)
1156
1157   if node is not None and iallocator is not None:
1158     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1159                                errors.ECODE_INVAL)
1160   elif node is None and iallocator is None:
1161     default_iallocator = lu.cfg.GetDefaultIAllocator()
1162     if default_iallocator:
1163       setattr(lu.op, iallocator_slot, default_iallocator)
1164     else:
1165       raise errors.OpPrereqError("No iallocator or node given and no"
1166                                  " cluster-wide default iallocator found."
1167                                  " Please specify either an iallocator or a"
1168                                  " node, or set a cluster-wide default"
1169                                  " iallocator.")
1170
1171
1172 class LUClusterPostInit(LogicalUnit):
1173   """Logical unit for running hooks after cluster initialization.
1174
1175   """
1176   HPATH = "cluster-init"
1177   HTYPE = constants.HTYPE_CLUSTER
1178
1179   def BuildHooksEnv(self):
1180     """Build hooks env.
1181
1182     """
1183     return {
1184       "OP_TARGET": self.cfg.GetClusterName(),
1185       }
1186
1187   def BuildHooksNodes(self):
1188     """Build hooks nodes.
1189
1190     """
1191     return ([], [self.cfg.GetMasterNode()])
1192
1193   def Exec(self, feedback_fn):
1194     """Nothing to do.
1195
1196     """
1197     return True
1198
1199
1200 class LUClusterDestroy(LogicalUnit):
1201   """Logical unit for destroying the cluster.
1202
1203   """
1204   HPATH = "cluster-destroy"
1205   HTYPE = constants.HTYPE_CLUSTER
1206
1207   def BuildHooksEnv(self):
1208     """Build hooks env.
1209
1210     """
1211     return {
1212       "OP_TARGET": self.cfg.GetClusterName(),
1213       }
1214
1215   def BuildHooksNodes(self):
1216     """Build hooks nodes.
1217
1218     """
1219     return ([], [])
1220
1221   def CheckPrereq(self):
1222     """Check prerequisites.
1223
1224     This checks whether the cluster is empty.
1225
1226     Any errors are signaled by raising errors.OpPrereqError.
1227
1228     """
1229     master = self.cfg.GetMasterNode()
1230
1231     nodelist = self.cfg.GetNodeList()
1232     if len(nodelist) != 1 or nodelist[0] != master:
1233       raise errors.OpPrereqError("There are still %d node(s) in"
1234                                  " this cluster." % (len(nodelist) - 1),
1235                                  errors.ECODE_INVAL)
1236     instancelist = self.cfg.GetInstanceList()
1237     if instancelist:
1238       raise errors.OpPrereqError("There are still %d instance(s) in"
1239                                  " this cluster." % len(instancelist),
1240                                  errors.ECODE_INVAL)
1241
1242   def Exec(self, feedback_fn):
1243     """Destroys the cluster.
1244
1245     """
1246     master = self.cfg.GetMasterNode()
1247
1248     # Run post hooks on master node before it's removed
1249     _RunPostHook(self, master)
1250
1251     result = self.rpc.call_node_stop_master(master, False)
1252     result.Raise("Could not disable the master role")
1253
1254     return master
1255
1256
1257 def _VerifyCertificate(filename):
1258   """Verifies a certificate for LUClusterVerify.
1259
1260   @type filename: string
1261   @param filename: Path to PEM file
1262
1263   """
1264   try:
1265     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1266                                            utils.ReadFile(filename))
1267   except Exception, err: # pylint: disable-msg=W0703
1268     return (LUClusterVerify.ETYPE_ERROR,
1269             "Failed to load X509 certificate %s: %s" % (filename, err))
1270
1271   (errcode, msg) = \
1272     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1273                                 constants.SSL_CERT_EXPIRATION_ERROR)
1274
1275   if msg:
1276     fnamemsg = "While verifying %s: %s" % (filename, msg)
1277   else:
1278     fnamemsg = None
1279
1280   if errcode is None:
1281     return (None, fnamemsg)
1282   elif errcode == utils.CERT_WARNING:
1283     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1284   elif errcode == utils.CERT_ERROR:
1285     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1286
1287   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1288
1289
1290 class LUClusterVerify(LogicalUnit):
1291   """Verifies the cluster status.
1292
1293   """
1294   HPATH = "cluster-verify"
1295   HTYPE = constants.HTYPE_CLUSTER
1296   REQ_BGL = False
1297
1298   TCLUSTER = "cluster"
1299   TNODE = "node"
1300   TINSTANCE = "instance"
1301
1302   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1303   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1304   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1305   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1306   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1307   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1308   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1309   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1310   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1311   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1312   ENODEDRBD = (TNODE, "ENODEDRBD")
1313   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1314   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1315   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1316   ENODEHV = (TNODE, "ENODEHV")
1317   ENODELVM = (TNODE, "ENODELVM")
1318   ENODEN1 = (TNODE, "ENODEN1")
1319   ENODENET = (TNODE, "ENODENET")
1320   ENODEOS = (TNODE, "ENODEOS")
1321   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1322   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1323   ENODERPC = (TNODE, "ENODERPC")
1324   ENODESSH = (TNODE, "ENODESSH")
1325   ENODEVERSION = (TNODE, "ENODEVERSION")
1326   ENODESETUP = (TNODE, "ENODESETUP")
1327   ENODETIME = (TNODE, "ENODETIME")
1328   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1329
1330   ETYPE_FIELD = "code"
1331   ETYPE_ERROR = "ERROR"
1332   ETYPE_WARNING = "WARNING"
1333
1334   _HOOKS_INDENT_RE = re.compile("^", re.M)
1335
1336   class NodeImage(object):
1337     """A class representing the logical and physical status of a node.
1338
1339     @type name: string
1340     @ivar name: the node name to which this object refers
1341     @ivar volumes: a structure as returned from
1342         L{ganeti.backend.GetVolumeList} (runtime)
1343     @ivar instances: a list of running instances (runtime)
1344     @ivar pinst: list of configured primary instances (config)
1345     @ivar sinst: list of configured secondary instances (config)
1346     @ivar sbp: dictionary of {primary-node: list of instances} for all
1347         instances for which this node is secondary (config)
1348     @ivar mfree: free memory, as reported by hypervisor (runtime)
1349     @ivar dfree: free disk, as reported by the node (runtime)
1350     @ivar offline: the offline status (config)
1351     @type rpc_fail: boolean
1352     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1353         not whether the individual keys were correct) (runtime)
1354     @type lvm_fail: boolean
1355     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1356     @type hyp_fail: boolean
1357     @ivar hyp_fail: whether the RPC call didn't return the instance list
1358     @type ghost: boolean
1359     @ivar ghost: whether this is a known node or not (config)
1360     @type os_fail: boolean
1361     @ivar os_fail: whether the RPC call didn't return valid OS data
1362     @type oslist: list
1363     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1364     @type vm_capable: boolean
1365     @ivar vm_capable: whether the node can host instances
1366
1367     """
1368     def __init__(self, offline=False, name=None, vm_capable=True):
1369       self.name = name
1370       self.volumes = {}
1371       self.instances = []
1372       self.pinst = []
1373       self.sinst = []
1374       self.sbp = {}
1375       self.mfree = 0
1376       self.dfree = 0
1377       self.offline = offline
1378       self.vm_capable = vm_capable
1379       self.rpc_fail = False
1380       self.lvm_fail = False
1381       self.hyp_fail = False
1382       self.ghost = False
1383       self.os_fail = False
1384       self.oslist = {}
1385
1386   def ExpandNames(self):
1387     self.needed_locks = {
1388       locking.LEVEL_NODE: locking.ALL_SET,
1389       locking.LEVEL_INSTANCE: locking.ALL_SET,
1390     }
1391     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1392
1393   def _Error(self, ecode, item, msg, *args, **kwargs):
1394     """Format an error message.
1395
1396     Based on the opcode's error_codes parameter, either format a
1397     parseable error code, or a simpler error string.
1398
1399     This must be called only from Exec and functions called from Exec.
1400
1401     """
1402     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1403     itype, etxt = ecode
1404     # first complete the msg
1405     if args:
1406       msg = msg % args
1407     # then format the whole message
1408     if self.op.error_codes:
1409       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1410     else:
1411       if item:
1412         item = " " + item
1413       else:
1414         item = ""
1415       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1416     # and finally report it via the feedback_fn
1417     self._feedback_fn("  - %s" % msg)
1418
1419   def _ErrorIf(self, cond, *args, **kwargs):
1420     """Log an error message if the passed condition is True.
1421
1422     """
1423     cond = bool(cond) or self.op.debug_simulate_errors
1424     if cond:
1425       self._Error(*args, **kwargs)
1426     # do not mark the operation as failed for WARN cases only
1427     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1428       self.bad = self.bad or cond
1429
1430   def _VerifyNode(self, ninfo, nresult):
1431     """Perform some basic validation on data returned from a node.
1432
1433       - check the result data structure is well formed and has all the
1434         mandatory fields
1435       - check ganeti version
1436
1437     @type ninfo: L{objects.Node}
1438     @param ninfo: the node to check
1439     @param nresult: the results from the node
1440     @rtype: boolean
1441     @return: whether overall this call was successful (and we can expect
1442          reasonable values in the respose)
1443
1444     """
1445     node = ninfo.name
1446     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1447
1448     # main result, nresult should be a non-empty dict
1449     test = not nresult or not isinstance(nresult, dict)
1450     _ErrorIf(test, self.ENODERPC, node,
1451                   "unable to verify node: no data returned")
1452     if test:
1453       return False
1454
1455     # compares ganeti version
1456     local_version = constants.PROTOCOL_VERSION
1457     remote_version = nresult.get("version", None)
1458     test = not (remote_version and
1459                 isinstance(remote_version, (list, tuple)) and
1460                 len(remote_version) == 2)
1461     _ErrorIf(test, self.ENODERPC, node,
1462              "connection to node returned invalid data")
1463     if test:
1464       return False
1465
1466     test = local_version != remote_version[0]
1467     _ErrorIf(test, self.ENODEVERSION, node,
1468              "incompatible protocol versions: master %s,"
1469              " node %s", local_version, remote_version[0])
1470     if test:
1471       return False
1472
1473     # node seems compatible, we can actually try to look into its results
1474
1475     # full package version
1476     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1477                   self.ENODEVERSION, node,
1478                   "software version mismatch: master %s, node %s",
1479                   constants.RELEASE_VERSION, remote_version[1],
1480                   code=self.ETYPE_WARNING)
1481
1482     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1483     if ninfo.vm_capable and isinstance(hyp_result, dict):
1484       for hv_name, hv_result in hyp_result.iteritems():
1485         test = hv_result is not None
1486         _ErrorIf(test, self.ENODEHV, node,
1487                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1488
1489     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1490     if ninfo.vm_capable and isinstance(hvp_result, list):
1491       for item, hv_name, hv_result in hvp_result:
1492         _ErrorIf(True, self.ENODEHV, node,
1493                  "hypervisor %s parameter verify failure (source %s): %s",
1494                  hv_name, item, hv_result)
1495
1496     test = nresult.get(constants.NV_NODESETUP,
1497                        ["Missing NODESETUP results"])
1498     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1499              "; ".join(test))
1500
1501     return True
1502
1503   def _VerifyNodeTime(self, ninfo, nresult,
1504                       nvinfo_starttime, nvinfo_endtime):
1505     """Check the node time.
1506
1507     @type ninfo: L{objects.Node}
1508     @param ninfo: the node to check
1509     @param nresult: the remote results for the node
1510     @param nvinfo_starttime: the start time of the RPC call
1511     @param nvinfo_endtime: the end time of the RPC call
1512
1513     """
1514     node = ninfo.name
1515     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1516
1517     ntime = nresult.get(constants.NV_TIME, None)
1518     try:
1519       ntime_merged = utils.MergeTime(ntime)
1520     except (ValueError, TypeError):
1521       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1522       return
1523
1524     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1525       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1526     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1527       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1528     else:
1529       ntime_diff = None
1530
1531     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1532              "Node time diverges by at least %s from master node time",
1533              ntime_diff)
1534
1535   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1536     """Check the node time.
1537
1538     @type ninfo: L{objects.Node}
1539     @param ninfo: the node to check
1540     @param nresult: the remote results for the node
1541     @param vg_name: the configured VG name
1542
1543     """
1544     if vg_name is None:
1545       return
1546
1547     node = ninfo.name
1548     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1549
1550     # checks vg existence and size > 20G
1551     vglist = nresult.get(constants.NV_VGLIST, None)
1552     test = not vglist
1553     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1554     if not test:
1555       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1556                                             constants.MIN_VG_SIZE)
1557       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1558
1559     # check pv names
1560     pvlist = nresult.get(constants.NV_PVLIST, None)
1561     test = pvlist is None
1562     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1563     if not test:
1564       # check that ':' is not present in PV names, since it's a
1565       # special character for lvcreate (denotes the range of PEs to
1566       # use on the PV)
1567       for _, pvname, owner_vg in pvlist:
1568         test = ":" in pvname
1569         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1570                  " '%s' of VG '%s'", pvname, owner_vg)
1571
1572   def _VerifyNodeNetwork(self, ninfo, nresult):
1573     """Check the node time.
1574
1575     @type ninfo: L{objects.Node}
1576     @param ninfo: the node to check
1577     @param nresult: the remote results for the node
1578
1579     """
1580     node = ninfo.name
1581     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1582
1583     test = constants.NV_NODELIST not in nresult
1584     _ErrorIf(test, self.ENODESSH, node,
1585              "node hasn't returned node ssh connectivity data")
1586     if not test:
1587       if nresult[constants.NV_NODELIST]:
1588         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1589           _ErrorIf(True, self.ENODESSH, node,
1590                    "ssh communication with node '%s': %s", a_node, a_msg)
1591
1592     test = constants.NV_NODENETTEST not in nresult
1593     _ErrorIf(test, self.ENODENET, node,
1594              "node hasn't returned node tcp connectivity data")
1595     if not test:
1596       if nresult[constants.NV_NODENETTEST]:
1597         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1598         for anode in nlist:
1599           _ErrorIf(True, self.ENODENET, node,
1600                    "tcp communication with node '%s': %s",
1601                    anode, nresult[constants.NV_NODENETTEST][anode])
1602
1603     test = constants.NV_MASTERIP not in nresult
1604     _ErrorIf(test, self.ENODENET, node,
1605              "node hasn't returned node master IP reachability data")
1606     if not test:
1607       if not nresult[constants.NV_MASTERIP]:
1608         if node == self.master_node:
1609           msg = "the master node cannot reach the master IP (not configured?)"
1610         else:
1611           msg = "cannot reach the master IP"
1612         _ErrorIf(True, self.ENODENET, node, msg)
1613
1614   def _VerifyInstance(self, instance, instanceconfig, node_image,
1615                       diskstatus):
1616     """Verify an instance.
1617
1618     This function checks to see if the required block devices are
1619     available on the instance's node.
1620
1621     """
1622     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1623     node_current = instanceconfig.primary_node
1624
1625     node_vol_should = {}
1626     instanceconfig.MapLVsByNode(node_vol_should)
1627
1628     for node in node_vol_should:
1629       n_img = node_image[node]
1630       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1631         # ignore missing volumes on offline or broken nodes
1632         continue
1633       for volume in node_vol_should[node]:
1634         test = volume not in n_img.volumes
1635         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1636                  "volume %s missing on node %s", volume, node)
1637
1638     if instanceconfig.admin_up:
1639       pri_img = node_image[node_current]
1640       test = instance not in pri_img.instances and not pri_img.offline
1641       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1642                "instance not running on its primary node %s",
1643                node_current)
1644
1645     for node, n_img in node_image.items():
1646       if node != node_current:
1647         test = instance in n_img.instances
1648         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1649                  "instance should not run on node %s", node)
1650
1651     diskdata = [(nname, success, status, idx)
1652                 for (nname, disks) in diskstatus.items()
1653                 for idx, (success, status) in enumerate(disks)]
1654
1655     for nname, success, bdev_status, idx in diskdata:
1656       # the 'ghost node' construction in Exec() ensures that we have a
1657       # node here
1658       snode = node_image[nname]
1659       bad_snode = snode.ghost or snode.offline
1660       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1661                self.EINSTANCEFAULTYDISK, instance,
1662                "couldn't retrieve status for disk/%s on %s: %s",
1663                idx, nname, bdev_status)
1664       _ErrorIf((instanceconfig.admin_up and success and
1665                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1666                self.EINSTANCEFAULTYDISK, instance,
1667                "disk/%s on %s is faulty", idx, nname)
1668
1669   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1670     """Verify if there are any unknown volumes in the cluster.
1671
1672     The .os, .swap and backup volumes are ignored. All other volumes are
1673     reported as unknown.
1674
1675     @type reserved: L{ganeti.utils.FieldSet}
1676     @param reserved: a FieldSet of reserved volume names
1677
1678     """
1679     for node, n_img in node_image.items():
1680       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1681         # skip non-healthy nodes
1682         continue
1683       for volume in n_img.volumes:
1684         test = ((node not in node_vol_should or
1685                 volume not in node_vol_should[node]) and
1686                 not reserved.Matches(volume))
1687         self._ErrorIf(test, self.ENODEORPHANLV, node,
1688                       "volume %s is unknown", volume)
1689
1690   def _VerifyOrphanInstances(self, instancelist, node_image):
1691     """Verify the list of running instances.
1692
1693     This checks what instances are running but unknown to the cluster.
1694
1695     """
1696     for node, n_img in node_image.items():
1697       for o_inst in n_img.instances:
1698         test = o_inst not in instancelist
1699         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1700                       "instance %s on node %s should not exist", o_inst, node)
1701
1702   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1703     """Verify N+1 Memory Resilience.
1704
1705     Check that if one single node dies we can still start all the
1706     instances it was primary for.
1707
1708     """
1709     cluster_info = self.cfg.GetClusterInfo()
1710     for node, n_img in node_image.items():
1711       # This code checks that every node which is now listed as
1712       # secondary has enough memory to host all instances it is
1713       # supposed to should a single other node in the cluster fail.
1714       # FIXME: not ready for failover to an arbitrary node
1715       # FIXME: does not support file-backed instances
1716       # WARNING: we currently take into account down instances as well
1717       # as up ones, considering that even if they're down someone
1718       # might want to start them even in the event of a node failure.
1719       if n_img.offline:
1720         # we're skipping offline nodes from the N+1 warning, since
1721         # most likely we don't have good memory infromation from them;
1722         # we already list instances living on such nodes, and that's
1723         # enough warning
1724         continue
1725       for prinode, instances in n_img.sbp.items():
1726         needed_mem = 0
1727         for instance in instances:
1728           bep = cluster_info.FillBE(instance_cfg[instance])
1729           if bep[constants.BE_AUTO_BALANCE]:
1730             needed_mem += bep[constants.BE_MEMORY]
1731         test = n_img.mfree < needed_mem
1732         self._ErrorIf(test, self.ENODEN1, node,
1733                       "not enough memory to accomodate instance failovers"
1734                       " should node %s fail (%dMiB needed, %dMiB available)",
1735                       prinode, needed_mem, n_img.mfree)
1736
1737   @classmethod
1738   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1739                    (files_all, files_all_opt, files_mc, files_vm)):
1740     """Verifies file checksums collected from all nodes.
1741
1742     @param errorif: Callback for reporting errors
1743     @param nodeinfo: List of L{objects.Node} objects
1744     @param master_node: Name of master node
1745     @param all_nvinfo: RPC results
1746
1747     """
1748     node_names = frozenset(node.name for node in nodeinfo)
1749
1750     assert master_node in node_names
1751     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1752             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1753            "Found file listed in more than one file list"
1754
1755     # Define functions determining which nodes to consider for a file
1756     file2nodefn = dict([(filename, fn)
1757       for (files, fn) in [(files_all, None),
1758                           (files_all_opt, None),
1759                           (files_mc, lambda node: (node.master_candidate or
1760                                                    node.name == master_node)),
1761                           (files_vm, lambda node: node.vm_capable)]
1762       for filename in files])
1763
1764     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1765
1766     for node in nodeinfo:
1767       nresult = all_nvinfo[node.name]
1768
1769       if nresult.fail_msg or not nresult.payload:
1770         node_files = None
1771       else:
1772         node_files = nresult.payload.get(constants.NV_FILELIST, None)
1773
1774       test = not (node_files and isinstance(node_files, dict))
1775       errorif(test, cls.ENODEFILECHECK, node.name,
1776               "Node did not return file checksum data")
1777       if test:
1778         continue
1779
1780       for (filename, checksum) in node_files.items():
1781         # Check if the file should be considered for a node
1782         fn = file2nodefn[filename]
1783         if fn is None or fn(node):
1784           fileinfo[filename].setdefault(checksum, set()).add(node.name)
1785
1786     for (filename, checksums) in fileinfo.items():
1787       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1788
1789       # Nodes having the file
1790       with_file = frozenset(node_name
1791                             for nodes in fileinfo[filename].values()
1792                             for node_name in nodes)
1793
1794       # Nodes missing file
1795       missing_file = node_names - with_file
1796
1797       if filename in files_all_opt:
1798         # All or no nodes
1799         errorif(missing_file and missing_file != node_names,
1800                 cls.ECLUSTERFILECHECK, None,
1801                 "File %s is optional, but it must exist on all or no nodes (not"
1802                 " found on %s)",
1803                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1804       else:
1805         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1806                 "File %s is missing from node(s) %s", filename,
1807                 utils.CommaJoin(utils.NiceSort(missing_file)))
1808
1809       # See if there are multiple versions of the file
1810       test = len(checksums) > 1
1811       if test:
1812         variants = ["variant %s on %s" %
1813                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1814                     for (idx, (checksum, nodes)) in
1815                       enumerate(sorted(checksums.items()))]
1816       else:
1817         variants = []
1818
1819       errorif(test, cls.ECLUSTERFILECHECK, None,
1820               "File %s found with %s different checksums (%s)",
1821               filename, len(checksums), "; ".join(variants))
1822
1823   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1824                       drbd_map):
1825     """Verifies and the node DRBD status.
1826
1827     @type ninfo: L{objects.Node}
1828     @param ninfo: the node to check
1829     @param nresult: the remote results for the node
1830     @param instanceinfo: the dict of instances
1831     @param drbd_helper: the configured DRBD usermode helper
1832     @param drbd_map: the DRBD map as returned by
1833         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1834
1835     """
1836     node = ninfo.name
1837     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1838
1839     if drbd_helper:
1840       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1841       test = (helper_result == None)
1842       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1843                "no drbd usermode helper returned")
1844       if helper_result:
1845         status, payload = helper_result
1846         test = not status
1847         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1848                  "drbd usermode helper check unsuccessful: %s", payload)
1849         test = status and (payload != drbd_helper)
1850         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1851                  "wrong drbd usermode helper: %s", payload)
1852
1853     # compute the DRBD minors
1854     node_drbd = {}
1855     for minor, instance in drbd_map[node].items():
1856       test = instance not in instanceinfo
1857       _ErrorIf(test, self.ECLUSTERCFG, None,
1858                "ghost instance '%s' in temporary DRBD map", instance)
1859         # ghost instance should not be running, but otherwise we
1860         # don't give double warnings (both ghost instance and
1861         # unallocated minor in use)
1862       if test:
1863         node_drbd[minor] = (instance, False)
1864       else:
1865         instance = instanceinfo[instance]
1866         node_drbd[minor] = (instance.name, instance.admin_up)
1867
1868     # and now check them
1869     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1870     test = not isinstance(used_minors, (tuple, list))
1871     _ErrorIf(test, self.ENODEDRBD, node,
1872              "cannot parse drbd status file: %s", str(used_minors))
1873     if test:
1874       # we cannot check drbd status
1875       return
1876
1877     for minor, (iname, must_exist) in node_drbd.items():
1878       test = minor not in used_minors and must_exist
1879       _ErrorIf(test, self.ENODEDRBD, node,
1880                "drbd minor %d of instance %s is not active", minor, iname)
1881     for minor in used_minors:
1882       test = minor not in node_drbd
1883       _ErrorIf(test, self.ENODEDRBD, node,
1884                "unallocated drbd minor %d is in use", minor)
1885
1886   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1887     """Builds the node OS structures.
1888
1889     @type ninfo: L{objects.Node}
1890     @param ninfo: the node to check
1891     @param nresult: the remote results for the node
1892     @param nimg: the node image object
1893
1894     """
1895     node = ninfo.name
1896     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1897
1898     remote_os = nresult.get(constants.NV_OSLIST, None)
1899     test = (not isinstance(remote_os, list) or
1900             not compat.all(isinstance(v, list) and len(v) == 7
1901                            for v in remote_os))
1902
1903     _ErrorIf(test, self.ENODEOS, node,
1904              "node hasn't returned valid OS data")
1905
1906     nimg.os_fail = test
1907
1908     if test:
1909       return
1910
1911     os_dict = {}
1912
1913     for (name, os_path, status, diagnose,
1914          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1915
1916       if name not in os_dict:
1917         os_dict[name] = []
1918
1919       # parameters is a list of lists instead of list of tuples due to
1920       # JSON lacking a real tuple type, fix it:
1921       parameters = [tuple(v) for v in parameters]
1922       os_dict[name].append((os_path, status, diagnose,
1923                             set(variants), set(parameters), set(api_ver)))
1924
1925     nimg.oslist = os_dict
1926
1927   def _VerifyNodeOS(self, ninfo, nimg, base):
1928     """Verifies the node OS list.
1929
1930     @type ninfo: L{objects.Node}
1931     @param ninfo: the node to check
1932     @param nimg: the node image object
1933     @param base: the 'template' node we match against (e.g. from the master)
1934
1935     """
1936     node = ninfo.name
1937     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1938
1939     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1940
1941     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1942     for os_name, os_data in nimg.oslist.items():
1943       assert os_data, "Empty OS status for OS %s?!" % os_name
1944       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1945       _ErrorIf(not f_status, self.ENODEOS, node,
1946                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1947       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1948                "OS '%s' has multiple entries (first one shadows the rest): %s",
1949                os_name, utils.CommaJoin([v[0] for v in os_data]))
1950       # this will catched in backend too
1951       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1952                and not f_var, self.ENODEOS, node,
1953                "OS %s with API at least %d does not declare any variant",
1954                os_name, constants.OS_API_V15)
1955       # comparisons with the 'base' image
1956       test = os_name not in base.oslist
1957       _ErrorIf(test, self.ENODEOS, node,
1958                "Extra OS %s not present on reference node (%s)",
1959                os_name, base.name)
1960       if test:
1961         continue
1962       assert base.oslist[os_name], "Base node has empty OS status?"
1963       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1964       if not b_status:
1965         # base OS is invalid, skipping
1966         continue
1967       for kind, a, b in [("API version", f_api, b_api),
1968                          ("variants list", f_var, b_var),
1969                          ("parameters", beautify_params(f_param),
1970                           beautify_params(b_param))]:
1971         _ErrorIf(a != b, self.ENODEOS, node,
1972                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1973                  kind, os_name, base.name,
1974                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1975
1976     # check any missing OSes
1977     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1978     _ErrorIf(missing, self.ENODEOS, node,
1979              "OSes present on reference node %s but missing on this node: %s",
1980              base.name, utils.CommaJoin(missing))
1981
1982   def _VerifyOob(self, ninfo, nresult):
1983     """Verifies out of band functionality of a node.
1984
1985     @type ninfo: L{objects.Node}
1986     @param ninfo: the node to check
1987     @param nresult: the remote results for the node
1988
1989     """
1990     node = ninfo.name
1991     # We just have to verify the paths on master and/or master candidates
1992     # as the oob helper is invoked on the master
1993     if ((ninfo.master_candidate or ninfo.master_capable) and
1994         constants.NV_OOB_PATHS in nresult):
1995       for path_result in nresult[constants.NV_OOB_PATHS]:
1996         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1997
1998   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1999     """Verifies and updates the node volume data.
2000
2001     This function will update a L{NodeImage}'s internal structures
2002     with data from the remote call.
2003
2004     @type ninfo: L{objects.Node}
2005     @param ninfo: the node to check
2006     @param nresult: the remote results for the node
2007     @param nimg: the node image object
2008     @param vg_name: the configured VG name
2009
2010     """
2011     node = ninfo.name
2012     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2013
2014     nimg.lvm_fail = True
2015     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2016     if vg_name is None:
2017       pass
2018     elif isinstance(lvdata, basestring):
2019       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2020                utils.SafeEncode(lvdata))
2021     elif not isinstance(lvdata, dict):
2022       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2023     else:
2024       nimg.volumes = lvdata
2025       nimg.lvm_fail = False
2026
2027   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2028     """Verifies and updates the node instance list.
2029
2030     If the listing was successful, then updates this node's instance
2031     list. Otherwise, it marks the RPC call as failed for the instance
2032     list key.
2033
2034     @type ninfo: L{objects.Node}
2035     @param ninfo: the node to check
2036     @param nresult: the remote results for the node
2037     @param nimg: the node image object
2038
2039     """
2040     idata = nresult.get(constants.NV_INSTANCELIST, None)
2041     test = not isinstance(idata, list)
2042     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2043                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2044     if test:
2045       nimg.hyp_fail = True
2046     else:
2047       nimg.instances = idata
2048
2049   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2050     """Verifies and computes a node information map
2051
2052     @type ninfo: L{objects.Node}
2053     @param ninfo: the node to check
2054     @param nresult: the remote results for the node
2055     @param nimg: the node image object
2056     @param vg_name: the configured VG name
2057
2058     """
2059     node = ninfo.name
2060     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2061
2062     # try to read free memory (from the hypervisor)
2063     hv_info = nresult.get(constants.NV_HVINFO, None)
2064     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2065     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2066     if not test:
2067       try:
2068         nimg.mfree = int(hv_info["memory_free"])
2069       except (ValueError, TypeError):
2070         _ErrorIf(True, self.ENODERPC, node,
2071                  "node returned invalid nodeinfo, check hypervisor")
2072
2073     # FIXME: devise a free space model for file based instances as well
2074     if vg_name is not None:
2075       test = (constants.NV_VGLIST not in nresult or
2076               vg_name not in nresult[constants.NV_VGLIST])
2077       _ErrorIf(test, self.ENODELVM, node,
2078                "node didn't return data for the volume group '%s'"
2079                " - it is either missing or broken", vg_name)
2080       if not test:
2081         try:
2082           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2083         except (ValueError, TypeError):
2084           _ErrorIf(True, self.ENODERPC, node,
2085                    "node returned invalid LVM info, check LVM status")
2086
2087   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2088     """Gets per-disk status information for all instances.
2089
2090     @type nodelist: list of strings
2091     @param nodelist: Node names
2092     @type node_image: dict of (name, L{objects.Node})
2093     @param node_image: Node objects
2094     @type instanceinfo: dict of (name, L{objects.Instance})
2095     @param instanceinfo: Instance objects
2096     @rtype: {instance: {node: [(succes, payload)]}}
2097     @return: a dictionary of per-instance dictionaries with nodes as
2098         keys and disk information as values; the disk information is a
2099         list of tuples (success, payload)
2100
2101     """
2102     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2103
2104     node_disks = {}
2105     node_disks_devonly = {}
2106     diskless_instances = set()
2107     diskless = constants.DT_DISKLESS
2108
2109     for nname in nodelist:
2110       node_instances = list(itertools.chain(node_image[nname].pinst,
2111                                             node_image[nname].sinst))
2112       diskless_instances.update(inst for inst in node_instances
2113                                 if instanceinfo[inst].disk_template == diskless)
2114       disks = [(inst, disk)
2115                for inst in node_instances
2116                for disk in instanceinfo[inst].disks]
2117
2118       if not disks:
2119         # No need to collect data
2120         continue
2121
2122       node_disks[nname] = disks
2123
2124       # Creating copies as SetDiskID below will modify the objects and that can
2125       # lead to incorrect data returned from nodes
2126       devonly = [dev.Copy() for (_, dev) in disks]
2127
2128       for dev in devonly:
2129         self.cfg.SetDiskID(dev, nname)
2130
2131       node_disks_devonly[nname] = devonly
2132
2133     assert len(node_disks) == len(node_disks_devonly)
2134
2135     # Collect data from all nodes with disks
2136     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2137                                                           node_disks_devonly)
2138
2139     assert len(result) == len(node_disks)
2140
2141     instdisk = {}
2142
2143     for (nname, nres) in result.items():
2144       disks = node_disks[nname]
2145
2146       if nres.offline:
2147         # No data from this node
2148         data = len(disks) * [(False, "node offline")]
2149       else:
2150         msg = nres.fail_msg
2151         _ErrorIf(msg, self.ENODERPC, nname,
2152                  "while getting disk information: %s", msg)
2153         if msg:
2154           # No data from this node
2155           data = len(disks) * [(False, msg)]
2156         else:
2157           data = []
2158           for idx, i in enumerate(nres.payload):
2159             if isinstance(i, (tuple, list)) and len(i) == 2:
2160               data.append(i)
2161             else:
2162               logging.warning("Invalid result from node %s, entry %d: %s",
2163                               nname, idx, i)
2164               data.append((False, "Invalid result from the remote node"))
2165
2166       for ((inst, _), status) in zip(disks, data):
2167         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2168
2169     # Add empty entries for diskless instances.
2170     for inst in diskless_instances:
2171       assert inst not in instdisk
2172       instdisk[inst] = {}
2173
2174     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2175                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2176                       compat.all(isinstance(s, (tuple, list)) and
2177                                  len(s) == 2 for s in statuses)
2178                       for inst, nnames in instdisk.items()
2179                       for nname, statuses in nnames.items())
2180     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2181
2182     return instdisk
2183
2184   def _VerifyHVP(self, hvp_data):
2185     """Verifies locally the syntax of the hypervisor parameters.
2186
2187     """
2188     for item, hv_name, hv_params in hvp_data:
2189       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2190              (item, hv_name))
2191       try:
2192         hv_class = hypervisor.GetHypervisor(hv_name)
2193         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2194         hv_class.CheckParameterSyntax(hv_params)
2195       except errors.GenericError, err:
2196         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2197
2198   def BuildHooksEnv(self):
2199     """Build hooks env.
2200
2201     Cluster-Verify hooks just ran in the post phase and their failure makes
2202     the output be logged in the verify output and the verification to fail.
2203
2204     """
2205     cfg = self.cfg
2206
2207     env = {
2208       "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2209       }
2210
2211     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2212                for node in cfg.GetAllNodesInfo().values())
2213
2214     return env
2215
2216   def BuildHooksNodes(self):
2217     """Build hooks nodes.
2218
2219     """
2220     return ([], self.cfg.GetNodeList())
2221
2222   def Exec(self, feedback_fn):
2223     """Verify integrity of cluster, performing various test on nodes.
2224
2225     """
2226     # This method has too many local variables. pylint: disable-msg=R0914
2227     self.bad = False
2228     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2229     verbose = self.op.verbose
2230     self._feedback_fn = feedback_fn
2231     feedback_fn("* Verifying global settings")
2232     for msg in self.cfg.VerifyConfig():
2233       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2234
2235     # Check the cluster certificates
2236     for cert_filename in constants.ALL_CERT_FILES:
2237       (errcode, msg) = _VerifyCertificate(cert_filename)
2238       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2239
2240     vg_name = self.cfg.GetVGName()
2241     drbd_helper = self.cfg.GetDRBDHelper()
2242     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2243     cluster = self.cfg.GetClusterInfo()
2244     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2245     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2246     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2247     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2248     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2249                         for iname in instancelist)
2250     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2251     i_non_redundant = [] # Non redundant instances
2252     i_non_a_balanced = [] # Non auto-balanced instances
2253     n_offline = 0 # Count of offline nodes
2254     n_drained = 0 # Count of nodes being drained
2255     node_vol_should = {}
2256
2257     # FIXME: verify OS list
2258
2259     # File verification
2260     filemap = _ComputeAncillaryFiles(cluster, False)
2261
2262     # do local checksums
2263     master_node = self.master_node = self.cfg.GetMasterNode()
2264     master_ip = self.cfg.GetMasterIP()
2265
2266     # Compute the set of hypervisor parameters
2267     hvp_data = []
2268     for hv_name in hypervisors:
2269       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2270     for os_name, os_hvp in cluster.os_hvp.items():
2271       for hv_name, hv_params in os_hvp.items():
2272         if not hv_params:
2273           continue
2274         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2275         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2276     # TODO: collapse identical parameter values in a single one
2277     for instance in instanceinfo.values():
2278       if not instance.hvparams:
2279         continue
2280       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2281                        cluster.FillHV(instance)))
2282     # and verify them locally
2283     self._VerifyHVP(hvp_data)
2284
2285     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2286     node_verify_param = {
2287       constants.NV_FILELIST:
2288         utils.UniqueSequence(filename
2289                              for files in filemap
2290                              for filename in files),
2291       constants.NV_NODELIST: [node.name for node in nodeinfo
2292                               if not node.offline],
2293       constants.NV_HYPERVISOR: hypervisors,
2294       constants.NV_HVPARAMS: hvp_data,
2295       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2296                                   node.secondary_ip) for node in nodeinfo
2297                                  if not node.offline],
2298       constants.NV_INSTANCELIST: hypervisors,
2299       constants.NV_VERSION: None,
2300       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2301       constants.NV_NODESETUP: None,
2302       constants.NV_TIME: None,
2303       constants.NV_MASTERIP: (master_node, master_ip),
2304       constants.NV_OSLIST: None,
2305       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2306       }
2307
2308     if vg_name is not None:
2309       node_verify_param[constants.NV_VGLIST] = None
2310       node_verify_param[constants.NV_LVLIST] = vg_name
2311       node_verify_param[constants.NV_PVLIST] = [vg_name]
2312       node_verify_param[constants.NV_DRBDLIST] = None
2313
2314     if drbd_helper:
2315       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2316
2317     # Build our expected cluster state
2318     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2319                                                  name=node.name,
2320                                                  vm_capable=node.vm_capable))
2321                       for node in nodeinfo)
2322
2323     # Gather OOB paths
2324     oob_paths = []
2325     for node in nodeinfo:
2326       path = _SupportsOob(self.cfg, node)
2327       if path and path not in oob_paths:
2328         oob_paths.append(path)
2329
2330     if oob_paths:
2331       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2332
2333     for instance in instancelist:
2334       inst_config = instanceinfo[instance]
2335
2336       for nname in inst_config.all_nodes:
2337         if nname not in node_image:
2338           # ghost node
2339           gnode = self.NodeImage(name=nname)
2340           gnode.ghost = True
2341           node_image[nname] = gnode
2342
2343       inst_config.MapLVsByNode(node_vol_should)
2344
2345       pnode = inst_config.primary_node
2346       node_image[pnode].pinst.append(instance)
2347
2348       for snode in inst_config.secondary_nodes:
2349         nimg = node_image[snode]
2350         nimg.sinst.append(instance)
2351         if pnode not in nimg.sbp:
2352           nimg.sbp[pnode] = []
2353         nimg.sbp[pnode].append(instance)
2354
2355     # At this point, we have the in-memory data structures complete,
2356     # except for the runtime information, which we'll gather next
2357
2358     # Due to the way our RPC system works, exact response times cannot be
2359     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2360     # time before and after executing the request, we can at least have a time
2361     # window.
2362     nvinfo_starttime = time.time()
2363     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2364                                            self.cfg.GetClusterName())
2365     nvinfo_endtime = time.time()
2366
2367     all_drbd_map = self.cfg.ComputeDRBDMap()
2368
2369     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2370     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2371
2372     feedback_fn("* Verifying configuration file consistency")
2373     self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2374
2375     feedback_fn("* Verifying node status")
2376
2377     refos_img = None
2378
2379     for node_i in nodeinfo:
2380       node = node_i.name
2381       nimg = node_image[node]
2382
2383       if node_i.offline:
2384         if verbose:
2385           feedback_fn("* Skipping offline node %s" % (node,))
2386         n_offline += 1
2387         continue
2388
2389       if node == master_node:
2390         ntype = "master"
2391       elif node_i.master_candidate:
2392         ntype = "master candidate"
2393       elif node_i.drained:
2394         ntype = "drained"
2395         n_drained += 1
2396       else:
2397         ntype = "regular"
2398       if verbose:
2399         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2400
2401       msg = all_nvinfo[node].fail_msg
2402       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2403       if msg:
2404         nimg.rpc_fail = True
2405         continue
2406
2407       nresult = all_nvinfo[node].payload
2408
2409       nimg.call_ok = self._VerifyNode(node_i, nresult)
2410       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2411       self._VerifyNodeNetwork(node_i, nresult)
2412       self._VerifyOob(node_i, nresult)
2413
2414       if nimg.vm_capable:
2415         self._VerifyNodeLVM(node_i, nresult, vg_name)
2416         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2417                              all_drbd_map)
2418
2419         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2420         self._UpdateNodeInstances(node_i, nresult, nimg)
2421         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2422         self._UpdateNodeOS(node_i, nresult, nimg)
2423         if not nimg.os_fail:
2424           if refos_img is None:
2425             refos_img = nimg
2426           self._VerifyNodeOS(node_i, nimg, refos_img)
2427
2428     feedback_fn("* Verifying instance status")
2429     for instance in instancelist:
2430       if verbose:
2431         feedback_fn("* Verifying instance %s" % instance)
2432       inst_config = instanceinfo[instance]
2433       self._VerifyInstance(instance, inst_config, node_image,
2434                            instdisk[instance])
2435       inst_nodes_offline = []
2436
2437       pnode = inst_config.primary_node
2438       pnode_img = node_image[pnode]
2439       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2440                self.ENODERPC, pnode, "instance %s, connection to"
2441                " primary node failed", instance)
2442
2443       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2444                self.EINSTANCEBADNODE, instance,
2445                "instance is marked as running and lives on offline node %s",
2446                inst_config.primary_node)
2447
2448       # If the instance is non-redundant we cannot survive losing its primary
2449       # node, so we are not N+1 compliant. On the other hand we have no disk
2450       # templates with more than one secondary so that situation is not well
2451       # supported either.
2452       # FIXME: does not support file-backed instances
2453       if not inst_config.secondary_nodes:
2454         i_non_redundant.append(instance)
2455
2456       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2457                instance, "instance has multiple secondary nodes: %s",
2458                utils.CommaJoin(inst_config.secondary_nodes),
2459                code=self.ETYPE_WARNING)
2460
2461       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2462         pnode = inst_config.primary_node
2463         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2464         instance_groups = {}
2465
2466         for node in instance_nodes:
2467           instance_groups.setdefault(nodeinfo_byname[node].group,
2468                                      []).append(node)
2469
2470         pretty_list = [
2471           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2472           # Sort so that we always list the primary node first.
2473           for group, nodes in sorted(instance_groups.items(),
2474                                      key=lambda (_, nodes): pnode in nodes,
2475                                      reverse=True)]
2476
2477         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2478                       instance, "instance has primary and secondary nodes in"
2479                       " different groups: %s", utils.CommaJoin(pretty_list),
2480                       code=self.ETYPE_WARNING)
2481
2482       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2483         i_non_a_balanced.append(instance)
2484
2485       for snode in inst_config.secondary_nodes:
2486         s_img = node_image[snode]
2487         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2488                  "instance %s, connection to secondary node failed", instance)
2489
2490         if s_img.offline:
2491           inst_nodes_offline.append(snode)
2492
2493       # warn that the instance lives on offline nodes
2494       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2495                "instance has offline secondary node(s) %s",
2496                utils.CommaJoin(inst_nodes_offline))
2497       # ... or ghost/non-vm_capable nodes
2498       for node in inst_config.all_nodes:
2499         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2500                  "instance lives on ghost node %s", node)
2501         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2502                  instance, "instance lives on non-vm_capable node %s", node)
2503
2504     feedback_fn("* Verifying orphan volumes")
2505     reserved = utils.FieldSet(*cluster.reserved_lvs)
2506     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2507
2508     feedback_fn("* Verifying orphan instances")
2509     self._VerifyOrphanInstances(instancelist, node_image)
2510
2511     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2512       feedback_fn("* Verifying N+1 Memory redundancy")
2513       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2514
2515     feedback_fn("* Other Notes")
2516     if i_non_redundant:
2517       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2518                   % len(i_non_redundant))
2519
2520     if i_non_a_balanced:
2521       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2522                   % len(i_non_a_balanced))
2523
2524     if n_offline:
2525       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2526
2527     if n_drained:
2528       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2529
2530     return not self.bad
2531
2532   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2533     """Analyze the post-hooks' result
2534
2535     This method analyses the hook result, handles it, and sends some
2536     nicely-formatted feedback back to the user.
2537
2538     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2539         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2540     @param hooks_results: the results of the multi-node hooks rpc call
2541     @param feedback_fn: function used send feedback back to the caller
2542     @param lu_result: previous Exec result
2543     @return: the new Exec result, based on the previous result
2544         and hook results
2545
2546     """
2547     # We only really run POST phase hooks, and are only interested in
2548     # their results
2549     if phase == constants.HOOKS_PHASE_POST:
2550       # Used to change hooks' output to proper indentation
2551       feedback_fn("* Hooks Results")
2552       assert hooks_results, "invalid result from hooks"
2553
2554       for node_name in hooks_results:
2555         res = hooks_results[node_name]
2556         msg = res.fail_msg
2557         test = msg and not res.offline
2558         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2559                       "Communication failure in hooks execution: %s", msg)
2560         if res.offline or msg:
2561           # No need to investigate payload if node is offline or gave an error.
2562           # override manually lu_result here as _ErrorIf only
2563           # overrides self.bad
2564           lu_result = 1
2565           continue
2566         for script, hkr, output in res.payload:
2567           test = hkr == constants.HKR_FAIL
2568           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2569                         "Script %s failed, output:", script)
2570           if test:
2571             output = self._HOOKS_INDENT_RE.sub('      ', output)
2572             feedback_fn("%s" % output)
2573             lu_result = 0
2574
2575       return lu_result
2576
2577
2578 class LUClusterVerifyDisks(NoHooksLU):
2579   """Verifies the cluster disks status.
2580
2581   """
2582   REQ_BGL = False
2583
2584   def ExpandNames(self):
2585     self.needed_locks = {
2586       locking.LEVEL_NODE: locking.ALL_SET,
2587       locking.LEVEL_INSTANCE: locking.ALL_SET,
2588     }
2589     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2590
2591   def Exec(self, feedback_fn):
2592     """Verify integrity of cluster disks.
2593
2594     @rtype: tuple of three items
2595     @return: a tuple of (dict of node-to-node_error, list of instances
2596         which need activate-disks, dict of instance: (node, volume) for
2597         missing volumes
2598
2599     """
2600     result = res_nodes, res_instances, res_missing = {}, [], {}
2601
2602     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2603     instances = self.cfg.GetAllInstancesInfo().values()
2604
2605     nv_dict = {}
2606     for inst in instances:
2607       inst_lvs = {}
2608       if not inst.admin_up:
2609         continue
2610       inst.MapLVsByNode(inst_lvs)
2611       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2612       for node, vol_list in inst_lvs.iteritems():
2613         for vol in vol_list:
2614           nv_dict[(node, vol)] = inst
2615
2616     if not nv_dict:
2617       return result
2618
2619     node_lvs = self.rpc.call_lv_list(nodes, [])
2620     for node, node_res in node_lvs.items():
2621       if node_res.offline:
2622         continue
2623       msg = node_res.fail_msg
2624       if msg:
2625         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2626         res_nodes[node] = msg
2627         continue
2628
2629       lvs = node_res.payload
2630       for lv_name, (_, _, lv_online) in lvs.items():
2631         inst = nv_dict.pop((node, lv_name), None)
2632         if (not lv_online and inst is not None
2633             and inst.name not in res_instances):
2634           res_instances.append(inst.name)
2635
2636     # any leftover items in nv_dict are missing LVs, let's arrange the
2637     # data better
2638     for key, inst in nv_dict.iteritems():
2639       if inst.name not in res_missing:
2640         res_missing[inst.name] = []
2641       res_missing[inst.name].append(key)
2642
2643     return result
2644
2645
2646 class LUClusterRepairDiskSizes(NoHooksLU):
2647   """Verifies the cluster disks sizes.
2648
2649   """
2650   REQ_BGL = False
2651
2652   def ExpandNames(self):
2653     if self.op.instances:
2654       self.wanted_names = []
2655       for name in self.op.instances:
2656         full_name = _ExpandInstanceName(self.cfg, name)
2657         self.wanted_names.append(full_name)
2658       self.needed_locks = {
2659         locking.LEVEL_NODE: [],
2660         locking.LEVEL_INSTANCE: self.wanted_names,
2661         }
2662       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2663     else:
2664       self.wanted_names = None
2665       self.needed_locks = {
2666         locking.LEVEL_NODE: locking.ALL_SET,
2667         locking.LEVEL_INSTANCE: locking.ALL_SET,
2668         }
2669     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2670
2671   def DeclareLocks(self, level):
2672     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2673       self._LockInstancesNodes(primary_only=True)
2674
2675   def CheckPrereq(self):
2676     """Check prerequisites.
2677
2678     This only checks the optional instance list against the existing names.
2679
2680     """
2681     if self.wanted_names is None:
2682       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2683
2684     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2685                              in self.wanted_names]
2686
2687   def _EnsureChildSizes(self, disk):
2688     """Ensure children of the disk have the needed disk size.
2689
2690     This is valid mainly for DRBD8 and fixes an issue where the
2691     children have smaller disk size.
2692
2693     @param disk: an L{ganeti.objects.Disk} object
2694
2695     """
2696     if disk.dev_type == constants.LD_DRBD8:
2697       assert disk.children, "Empty children for DRBD8?"
2698       fchild = disk.children[0]
2699       mismatch = fchild.size < disk.size
2700       if mismatch:
2701         self.LogInfo("Child disk has size %d, parent %d, fixing",
2702                      fchild.size, disk.size)
2703         fchild.size = disk.size
2704
2705       # and we recurse on this child only, not on the metadev
2706       return self._EnsureChildSizes(fchild) or mismatch
2707     else:
2708       return False
2709
2710   def Exec(self, feedback_fn):
2711     """Verify the size of cluster disks.
2712
2713     """
2714     # TODO: check child disks too
2715     # TODO: check differences in size between primary/secondary nodes
2716     per_node_disks = {}
2717     for instance in self.wanted_instances:
2718       pnode = instance.primary_node
2719       if pnode not in per_node_disks:
2720         per_node_disks[pnode] = []
2721       for idx, disk in enumerate(instance.disks):
2722         per_node_disks[pnode].append((instance, idx, disk))
2723
2724     changed = []
2725     for node, dskl in per_node_disks.items():
2726       newl = [v[2].Copy() for v in dskl]
2727       for dsk in newl:
2728         self.cfg.SetDiskID(dsk, node)
2729       result = self.rpc.call_blockdev_getsize(node, newl)
2730       if result.fail_msg:
2731         self.LogWarning("Failure in blockdev_getsize call to node"
2732                         " %s, ignoring", node)
2733         continue
2734       if len(result.payload) != len(dskl):
2735         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2736                         " result.payload=%s", node, len(dskl), result.payload)
2737         self.LogWarning("Invalid result from node %s, ignoring node results",
2738                         node)
2739         continue
2740       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2741         if size is None:
2742           self.LogWarning("Disk %d of instance %s did not return size"
2743                           " information, ignoring", idx, instance.name)
2744           continue
2745         if not isinstance(size, (int, long)):
2746           self.LogWarning("Disk %d of instance %s did not return valid"
2747                           " size information, ignoring", idx, instance.name)
2748           continue
2749         size = size >> 20
2750         if size != disk.size:
2751           self.LogInfo("Disk %d of instance %s has mismatched size,"
2752                        " correcting: recorded %d, actual %d", idx,
2753                        instance.name, disk.size, size)
2754           disk.size = size
2755           self.cfg.Update(instance, feedback_fn)
2756           changed.append((instance.name, idx, size))
2757         if self._EnsureChildSizes(disk):
2758           self.cfg.Update(instance, feedback_fn)
2759           changed.append((instance.name, idx, disk.size))
2760     return changed
2761
2762
2763 class LUClusterRename(LogicalUnit):
2764   """Rename the cluster.
2765
2766   """
2767   HPATH = "cluster-rename"
2768   HTYPE = constants.HTYPE_CLUSTER
2769
2770   def BuildHooksEnv(self):
2771     """Build hooks env.
2772
2773     """
2774     return {
2775       "OP_TARGET": self.cfg.GetClusterName(),
2776       "NEW_NAME": self.op.name,
2777       }
2778
2779   def BuildHooksNodes(self):
2780     """Build hooks nodes.
2781
2782     """
2783     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2784
2785   def CheckPrereq(self):
2786     """Verify that the passed name is a valid one.
2787
2788     """
2789     hostname = netutils.GetHostname(name=self.op.name,
2790                                     family=self.cfg.GetPrimaryIPFamily())
2791
2792     new_name = hostname.name
2793     self.ip = new_ip = hostname.ip
2794     old_name = self.cfg.GetClusterName()
2795     old_ip = self.cfg.GetMasterIP()
2796     if new_name == old_name and new_ip == old_ip:
2797       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2798                                  " cluster has changed",
2799                                  errors.ECODE_INVAL)
2800     if new_ip != old_ip:
2801       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2802         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2803                                    " reachable on the network" %
2804                                    new_ip, errors.ECODE_NOTUNIQUE)
2805
2806     self.op.name = new_name
2807
2808   def Exec(self, feedback_fn):
2809     """Rename the cluster.
2810
2811     """
2812     clustername = self.op.name
2813     ip = self.ip
2814
2815     # shutdown the master IP
2816     master = self.cfg.GetMasterNode()
2817     result = self.rpc.call_node_stop_master(master, False)
2818     result.Raise("Could not disable the master role")
2819
2820     try:
2821       cluster = self.cfg.GetClusterInfo()
2822       cluster.cluster_name = clustername
2823       cluster.master_ip = ip
2824       self.cfg.Update(cluster, feedback_fn)
2825
2826       # update the known hosts file
2827       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2828       node_list = self.cfg.GetOnlineNodeList()
2829       try:
2830         node_list.remove(master)
2831       except ValueError:
2832         pass
2833       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2834     finally:
2835       result = self.rpc.call_node_start_master(master, False, False)
2836       msg = result.fail_msg
2837       if msg:
2838         self.LogWarning("Could not re-enable the master role on"
2839                         " the master, please restart manually: %s", msg)
2840
2841     return clustername
2842
2843
2844 class LUClusterSetParams(LogicalUnit):
2845   """Change the parameters of the cluster.
2846
2847   """
2848   HPATH = "cluster-modify"
2849   HTYPE = constants.HTYPE_CLUSTER
2850   REQ_BGL = False
2851
2852   def CheckArguments(self):
2853     """Check parameters
2854
2855     """
2856     if self.op.uid_pool:
2857       uidpool.CheckUidPool(self.op.uid_pool)
2858
2859     if self.op.add_uids:
2860       uidpool.CheckUidPool(self.op.add_uids)
2861
2862     if self.op.remove_uids:
2863       uidpool.CheckUidPool(self.op.remove_uids)
2864
2865   def ExpandNames(self):
2866     # FIXME: in the future maybe other cluster params won't require checking on
2867     # all nodes to be modified.
2868     self.needed_locks = {
2869       locking.LEVEL_NODE: locking.ALL_SET,
2870     }
2871     self.share_locks[locking.LEVEL_NODE] = 1
2872
2873   def BuildHooksEnv(self):
2874     """Build hooks env.
2875
2876     """
2877     return {
2878       "OP_TARGET": self.cfg.GetClusterName(),
2879       "NEW_VG_NAME": self.op.vg_name,
2880       }
2881
2882   def BuildHooksNodes(self):
2883     """Build hooks nodes.
2884
2885     """
2886     mn = self.cfg.GetMasterNode()
2887     return ([mn], [mn])
2888
2889   def CheckPrereq(self):
2890     """Check prerequisites.
2891
2892     This checks whether the given params don't conflict and
2893     if the given volume group is valid.
2894
2895     """
2896     if self.op.vg_name is not None and not self.op.vg_name:
2897       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2898         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2899                                    " instances exist", errors.ECODE_INVAL)
2900
2901     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2902       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2903         raise errors.OpPrereqError("Cannot disable drbd helper while"
2904                                    " drbd-based instances exist",
2905                                    errors.ECODE_INVAL)
2906
2907     node_list = self.acquired_locks[locking.LEVEL_NODE]
2908
2909     # if vg_name not None, checks given volume group on all nodes
2910     if self.op.vg_name:
2911       vglist = self.rpc.call_vg_list(node_list)
2912       for node in node_list:
2913         msg = vglist[node].fail_msg
2914         if msg:
2915           # ignoring down node
2916           self.LogWarning("Error while gathering data on node %s"
2917                           " (ignoring node): %s", node, msg)
2918           continue
2919         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2920                                               self.op.vg_name,
2921                                               constants.MIN_VG_SIZE)
2922         if vgstatus:
2923           raise errors.OpPrereqError("Error on node '%s': %s" %
2924                                      (node, vgstatus), errors.ECODE_ENVIRON)
2925
2926     if self.op.drbd_helper:
2927       # checks given drbd helper on all nodes
2928       helpers = self.rpc.call_drbd_helper(node_list)
2929       for node in node_list:
2930         ninfo = self.cfg.GetNodeInfo(node)
2931         if ninfo.offline:
2932           self.LogInfo("Not checking drbd helper on offline node %s", node)
2933           continue
2934         msg = helpers[node].fail_msg
2935         if msg:
2936           raise errors.OpPrereqError("Error checking drbd helper on node"
2937                                      " '%s': %s" % (node, msg),
2938                                      errors.ECODE_ENVIRON)
2939         node_helper = helpers[node].payload
2940         if node_helper != self.op.drbd_helper:
2941           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2942                                      (node, node_helper), errors.ECODE_ENVIRON)
2943
2944     self.cluster = cluster = self.cfg.GetClusterInfo()
2945     # validate params changes
2946     if self.op.beparams:
2947       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2948       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2949
2950     if self.op.ndparams:
2951       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2952       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2953
2954       # TODO: we need a more general way to handle resetting
2955       # cluster-level parameters to default values
2956       if self.new_ndparams["oob_program"] == "":
2957         self.new_ndparams["oob_program"] = \
2958             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2959
2960     if self.op.nicparams:
2961       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2962       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2963       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2964       nic_errors = []
2965
2966       # check all instances for consistency
2967       for instance in self.cfg.GetAllInstancesInfo().values():
2968         for nic_idx, nic in enumerate(instance.nics):
2969           params_copy = copy.deepcopy(nic.nicparams)
2970           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2971
2972           # check parameter syntax
2973           try:
2974             objects.NIC.CheckParameterSyntax(params_filled)
2975           except errors.ConfigurationError, err:
2976             nic_errors.append("Instance %s, nic/%d: %s" %
2977                               (instance.name, nic_idx, err))
2978
2979           # if we're moving instances to routed, check that they have an ip
2980           target_mode = params_filled[constants.NIC_MODE]
2981           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2982             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2983                               (instance.name, nic_idx))
2984       if nic_errors:
2985         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2986                                    "\n".join(nic_errors))
2987
2988     # hypervisor list/parameters
2989     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2990     if self.op.hvparams:
2991       for hv_name, hv_dict in self.op.hvparams.items():
2992         if hv_name not in self.new_hvparams:
2993           self.new_hvparams[hv_name] = hv_dict
2994         else:
2995           self.new_hvparams[hv_name].update(hv_dict)
2996
2997     # os hypervisor parameters
2998     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2999     if self.op.os_hvp:
3000       for os_name, hvs in self.op.os_hvp.items():
3001         if os_name not in self.new_os_hvp:
3002           self.new_os_hvp[os_name] = hvs
3003         else:
3004           for hv_name, hv_dict in hvs.items():
3005             if hv_name not in self.new_os_hvp[os_name]:
3006               self.new_os_hvp[os_name][hv_name] = hv_dict
3007             else:
3008               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3009
3010     # os parameters
3011     self.new_osp = objects.FillDict(cluster.osparams, {})
3012     if self.op.osparams:
3013       for os_name, osp in self.op.osparams.items():
3014         if os_name not in self.new_osp:
3015           self.new_osp[os_name] = {}
3016
3017         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3018                                                   use_none=True)
3019
3020         if not self.new_osp[os_name]:
3021           # we removed all parameters
3022           del self.new_osp[os_name]
3023         else:
3024           # check the parameter validity (remote check)
3025           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3026                          os_name, self.new_osp[os_name])
3027
3028     # changes to the hypervisor list
3029     if self.op.enabled_hypervisors is not None:
3030       self.hv_list = self.op.enabled_hypervisors
3031       for hv in self.hv_list:
3032         # if the hypervisor doesn't already exist in the cluster
3033         # hvparams, we initialize it to empty, and then (in both
3034         # cases) we make sure to fill the defaults, as we might not
3035         # have a complete defaults list if the hypervisor wasn't
3036         # enabled before
3037         if hv not in new_hvp:
3038           new_hvp[hv] = {}
3039         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3040         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3041     else:
3042       self.hv_list = cluster.enabled_hypervisors
3043
3044     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3045       # either the enabled list has changed, or the parameters have, validate
3046       for hv_name, hv_params in self.new_hvparams.items():
3047         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3048             (self.op.enabled_hypervisors and
3049              hv_name in self.op.enabled_hypervisors)):
3050           # either this is a new hypervisor, or its parameters have changed
3051           hv_class = hypervisor.GetHypervisor(hv_name)
3052           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3053           hv_class.CheckParameterSyntax(hv_params)
3054           _CheckHVParams(self, node_list, hv_name, hv_params)
3055
3056     if self.op.os_hvp:
3057       # no need to check any newly-enabled hypervisors, since the
3058       # defaults have already been checked in the above code-block
3059       for os_name, os_hvp in self.new_os_hvp.items():
3060         for hv_name, hv_params in os_hvp.items():
3061           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3062           # we need to fill in the new os_hvp on top of the actual hv_p
3063           cluster_defaults = self.new_hvparams.get(hv_name, {})
3064           new_osp = objects.FillDict(cluster_defaults, hv_params)
3065           hv_class = hypervisor.GetHypervisor(hv_name)
3066           hv_class.CheckParameterSyntax(new_osp)
3067           _CheckHVParams(self, node_list, hv_name, new_osp)
3068
3069     if self.op.default_iallocator:
3070       alloc_script = utils.FindFile(self.op.default_iallocator,
3071                                     constants.IALLOCATOR_SEARCH_PATH,
3072                                     os.path.isfile)
3073       if alloc_script is None:
3074         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3075                                    " specified" % self.op.default_iallocator,
3076                                    errors.ECODE_INVAL)
3077
3078   def Exec(self, feedback_fn):
3079     """Change the parameters of the cluster.
3080
3081     """
3082     if self.op.vg_name is not None:
3083       new_volume = self.op.vg_name
3084       if not new_volume:
3085         new_volume = None
3086       if new_volume != self.cfg.GetVGName():
3087         self.cfg.SetVGName(new_volume)
3088       else:
3089         feedback_fn("Cluster LVM configuration already in desired"
3090                     " state, not changing")
3091     if self.op.drbd_helper is not None:
3092       new_helper = self.op.drbd_helper
3093       if not new_helper:
3094         new_helper = None
3095       if new_helper != self.cfg.GetDRBDHelper():
3096         self.cfg.SetDRBDHelper(new_helper)
3097       else:
3098         feedback_fn("Cluster DRBD helper already in desired state,"
3099                     " not changing")
3100     if self.op.hvparams:
3101       self.cluster.hvparams = self.new_hvparams
3102     if self.op.os_hvp:
3103       self.cluster.os_hvp = self.new_os_hvp
3104     if self.op.enabled_hypervisors is not None:
3105       self.cluster.hvparams = self.new_hvparams
3106       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3107     if self.op.beparams:
3108       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3109     if self.op.nicparams:
3110       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3111     if self.op.osparams:
3112       self.cluster.osparams = self.new_osp
3113     if self.op.ndparams:
3114       self.cluster.ndparams = self.new_ndparams
3115
3116     if self.op.candidate_pool_size is not None:
3117       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3118       # we need to update the pool size here, otherwise the save will fail
3119       _AdjustCandidatePool(self, [])
3120
3121     if self.op.maintain_node_health is not None:
3122       self.cluster.maintain_node_health = self.op.maintain_node_health
3123
3124     if self.op.prealloc_wipe_disks is not None:
3125       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3126
3127     if self.op.add_uids is not None:
3128       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3129
3130     if self.op.remove_uids is not None:
3131       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3132
3133     if self.op.uid_pool is not None:
3134       self.cluster.uid_pool = self.op.uid_pool
3135
3136     if self.op.default_iallocator is not None:
3137       self.cluster.default_iallocator = self.op.default_iallocator
3138
3139     if self.op.reserved_lvs is not None:
3140       self.cluster.reserved_lvs = self.op.reserved_lvs
3141
3142     def helper_os(aname, mods, desc):
3143       desc += " OS list"
3144       lst = getattr(self.cluster, aname)
3145       for key, val in mods:
3146         if key == constants.DDM_ADD:
3147           if val in lst:
3148             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3149           else:
3150             lst.append(val)
3151         elif key == constants.DDM_REMOVE:
3152           if val in lst:
3153             lst.remove(val)
3154           else:
3155             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3156         else:
3157           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3158
3159     if self.op.hidden_os:
3160       helper_os("hidden_os", self.op.hidden_os, "hidden")
3161
3162     if self.op.blacklisted_os:
3163       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3164
3165     if self.op.master_netdev:
3166       master = self.cfg.GetMasterNode()
3167       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3168                   self.cluster.master_netdev)
3169       result = self.rpc.call_node_stop_master(master, False)
3170       result.Raise("Could not disable the master ip")
3171       feedback_fn("Changing master_netdev from %s to %s" %
3172                   (self.cluster.master_netdev, self.op.master_netdev))
3173       self.cluster.master_netdev = self.op.master_netdev
3174
3175     self.cfg.Update(self.cluster, feedback_fn)
3176
3177     if self.op.master_netdev:
3178       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3179                   self.op.master_netdev)
3180       result = self.rpc.call_node_start_master(master, False, False)
3181       if result.fail_msg:
3182         self.LogWarning("Could not re-enable the master ip on"
3183                         " the master, please restart manually: %s",
3184                         result.fail_msg)
3185
3186
3187 def _UploadHelper(lu, nodes, fname):
3188   """Helper for uploading a file and showing warnings.
3189
3190   """
3191   if os.path.exists(fname):
3192     result = lu.rpc.call_upload_file(nodes, fname)
3193     for to_node, to_result in result.items():
3194       msg = to_result.fail_msg
3195       if msg:
3196         msg = ("Copy of file %s to node %s failed: %s" %
3197                (fname, to_node, msg))
3198         lu.proc.LogWarning(msg)
3199
3200
3201 def _ComputeAncillaryFiles(cluster, redist):
3202   """Compute files external to Ganeti which need to be consistent.
3203
3204   @type redist: boolean
3205   @param redist: Whether to include files which need to be redistributed
3206
3207   """
3208   # Compute files for all nodes
3209   files_all = set([
3210     constants.SSH_KNOWN_HOSTS_FILE,
3211     constants.CONFD_HMAC_KEY,
3212     constants.CLUSTER_DOMAIN_SECRET_FILE,
3213     ])
3214
3215   if not redist:
3216     files_all.update(constants.ALL_CERT_FILES)
3217     files_all.update(ssconf.SimpleStore().GetFileList())
3218
3219   if cluster.modify_etc_hosts:
3220     files_all.add(constants.ETC_HOSTS)
3221
3222   # Files which must either exist on all nodes or on none
3223   files_all_opt = set([
3224     constants.RAPI_USERS_FILE,
3225     ])
3226
3227   # Files which should only be on master candidates
3228   files_mc = set()
3229   if not redist:
3230     files_mc.add(constants.CLUSTER_CONF_FILE)
3231
3232   # Files which should only be on VM-capable nodes
3233   files_vm = set(filename
3234     for hv_name in cluster.enabled_hypervisors
3235     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3236
3237   # Filenames must be unique
3238   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3239           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3240          "Found file listed in more than one file list"
3241
3242   return (files_all, files_all_opt, files_mc, files_vm)
3243
3244
3245 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3246   """Distribute additional files which are part of the cluster configuration.
3247
3248   ConfigWriter takes care of distributing the config and ssconf files, but
3249   there are more files which should be distributed to all nodes. This function
3250   makes sure those are copied.
3251
3252   @param lu: calling logical unit
3253   @param additional_nodes: list of nodes not in the config to distribute to
3254   @type additional_vm: boolean
3255   @param additional_vm: whether the additional nodes are vm-capable or not
3256
3257   """
3258   # Gather target nodes
3259   cluster = lu.cfg.GetClusterInfo()
3260   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3261
3262   online_nodes = lu.cfg.GetOnlineNodeList()
3263   vm_nodes = lu.cfg.GetVmCapableNodeList()
3264
3265   if additional_nodes is not None:
3266     online_nodes.extend(additional_nodes)
3267     if additional_vm:
3268       vm_nodes.extend(additional_nodes)
3269
3270   # Never distribute to master node
3271   for nodelist in [online_nodes, vm_nodes]:
3272     if master_info.name in nodelist:
3273       nodelist.remove(master_info.name)
3274
3275   # Gather file lists
3276   (files_all, files_all_opt, files_mc, files_vm) = \
3277     _ComputeAncillaryFiles(cluster, True)
3278
3279   # Never re-distribute configuration file from here
3280   assert not (constants.CLUSTER_CONF_FILE in files_all or
3281               constants.CLUSTER_CONF_FILE in files_vm)
3282   assert not files_mc, "Master candidates not handled in this function"
3283
3284   filemap = [
3285     (online_nodes, files_all),
3286     (online_nodes, files_all_opt),
3287     (vm_nodes, files_vm),
3288     ]
3289
3290   # Upload the files
3291   for (node_list, files) in filemap:
3292     for fname in files:
3293       _UploadHelper(lu, node_list, fname)
3294
3295
3296 class LUClusterRedistConf(NoHooksLU):
3297   """Force the redistribution of cluster configuration.
3298
3299   This is a very simple LU.
3300
3301   """
3302   REQ_BGL = False
3303
3304   def ExpandNames(self):
3305     self.needed_locks = {
3306       locking.LEVEL_NODE: locking.ALL_SET,
3307     }
3308     self.share_locks[locking.LEVEL_NODE] = 1
3309
3310   def Exec(self, feedback_fn):
3311     """Redistribute the configuration.
3312
3313     """
3314     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3315     _RedistributeAncillaryFiles(self)
3316
3317
3318 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3319   """Sleep and poll for an instance's disk to sync.
3320
3321   """
3322   if not instance.disks or disks is not None and not disks:
3323     return True
3324
3325   disks = _ExpandCheckDisks(instance, disks)
3326
3327   if not oneshot:
3328     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3329
3330   node = instance.primary_node
3331
3332   for dev in disks:
3333     lu.cfg.SetDiskID(dev, node)
3334
3335   # TODO: Convert to utils.Retry
3336
3337   retries = 0
3338   degr_retries = 10 # in seconds, as we sleep 1 second each time
3339   while True:
3340     max_time = 0
3341     done = True
3342     cumul_degraded = False
3343     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3344     msg = rstats.fail_msg
3345     if msg:
3346       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3347       retries += 1
3348       if retries >= 10:
3349         raise errors.RemoteError("Can't contact node %s for mirror data,"
3350                                  " aborting." % node)
3351       time.sleep(6)
3352       continue
3353     rstats = rstats.payload
3354     retries = 0
3355     for i, mstat in enumerate(rstats):
3356       if mstat is None:
3357         lu.LogWarning("Can't compute data for node %s/%s",
3358                            node, disks[i].iv_name)
3359         continue
3360
3361       cumul_degraded = (cumul_degraded or
3362                         (mstat.is_degraded and mstat.sync_percent is None))
3363       if mstat.sync_percent is not None:
3364         done = False
3365         if mstat.estimated_time is not None:
3366           rem_time = ("%s remaining (estimated)" %
3367                       utils.FormatSeconds(mstat.estimated_time))
3368           max_time = mstat.estimated_time
3369         else:
3370           rem_time = "no time estimate"
3371         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3372                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3373
3374     # if we're done but degraded, let's do a few small retries, to
3375     # make sure we see a stable and not transient situation; therefore
3376     # we force restart of the loop
3377     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3378       logging.info("Degraded disks found, %d retries left", degr_retries)
3379       degr_retries -= 1
3380       time.sleep(1)
3381       continue
3382
3383     if done or oneshot:
3384       break
3385
3386     time.sleep(min(60, max_time))
3387
3388   if done:
3389     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3390   return not cumul_degraded
3391
3392
3393 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3394   """Check that mirrors are not degraded.
3395
3396   The ldisk parameter, if True, will change the test from the
3397   is_degraded attribute (which represents overall non-ok status for
3398   the device(s)) to the ldisk (representing the local storage status).
3399
3400   """
3401   lu.cfg.SetDiskID(dev, node)
3402
3403   result = True
3404
3405   if on_primary or dev.AssembleOnSecondary():
3406     rstats = lu.rpc.call_blockdev_find(node, dev)
3407     msg = rstats.fail_msg
3408     if msg:
3409       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3410       result = False
3411     elif not rstats.payload:
3412       lu.LogWarning("Can't find disk on node %s", node)
3413       result = False
3414     else:
3415       if ldisk:
3416         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3417       else:
3418         result = result and not rstats.payload.is_degraded
3419
3420   if dev.children:
3421     for child in dev.children:
3422       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3423
3424   return result
3425
3426
3427 class LUOobCommand(NoHooksLU):
3428   """Logical unit for OOB handling.
3429
3430   """
3431   REG_BGL = False
3432   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3433
3434   def CheckPrereq(self):
3435     """Check prerequisites.
3436
3437     This checks:
3438      - the node exists in the configuration
3439      - OOB is supported
3440
3441     Any errors are signaled by raising errors.OpPrereqError.
3442
3443     """
3444     self.nodes = []
3445     self.master_node = self.cfg.GetMasterNode()
3446
3447     assert self.op.power_delay >= 0.0
3448
3449     if self.op.node_names:
3450       if (self.op.command in self._SKIP_MASTER and
3451           self.master_node in self.op.node_names):
3452         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3453         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3454
3455         if master_oob_handler:
3456           additional_text = ("run '%s %s %s' if you want to operate on the"
3457                              " master regardless") % (master_oob_handler,
3458                                                       self.op.command,
3459                                                       self.master_node)
3460         else:
3461           additional_text = "it does not support out-of-band operations"
3462
3463         raise errors.OpPrereqError(("Operating on the master node %s is not"
3464                                     " allowed for %s; %s") %
3465                                    (self.master_node, self.op.command,
3466                                     additional_text), errors.ECODE_INVAL)
3467     else:
3468       self.op.node_names = self.cfg.GetNodeList()
3469       if self.op.command in self._SKIP_MASTER:
3470         self.op.node_names.remove(self.master_node)
3471
3472     if self.op.command in self._SKIP_MASTER:
3473       assert self.master_node not in self.op.node_names
3474
3475     for node_name in self.op.node_names:
3476       node = self.cfg.GetNodeInfo(node_name)
3477
3478       if node is None:
3479         raise errors.OpPrereqError("Node %s not found" % node_name,
3480                                    errors.ECODE_NOENT)
3481       else:
3482         self.nodes.append(node)
3483
3484       if (not self.op.ignore_status and
3485           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3486         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3487                                     " not marked offline") % node_name,
3488                                    errors.ECODE_STATE)
3489
3490   def ExpandNames(self):
3491     """Gather locks we need.
3492
3493     """
3494     if self.op.node_names:
3495       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3496                             for name in self.op.node_names]
3497       lock_names = self.op.node_names
3498     else:
3499       lock_names = locking.ALL_SET
3500
3501     self.needed_locks = {
3502       locking.LEVEL_NODE: lock_names,
3503       }
3504
3505   def Exec(self, feedback_fn):
3506     """Execute OOB and return result if we expect any.
3507
3508     """
3509     master_node = self.master_node
3510     ret = []
3511
3512     for idx, node in enumerate(self.nodes):
3513       node_entry = [(constants.RS_NORMAL, node.name)]
3514       ret.append(node_entry)
3515
3516       oob_program = _SupportsOob(self.cfg, node)
3517
3518       if not oob_program:
3519         node_entry.append((constants.RS_UNAVAIL, None))
3520         continue
3521
3522       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3523                    self.op.command, oob_program, node.name)
3524       result = self.rpc.call_run_oob(master_node, oob_program,
3525                                      self.op.command, node.name,
3526                                      self.op.timeout)
3527
3528       if result.fail_msg:
3529         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3530                         node.name, result.fail_msg)
3531         node_entry.append((constants.RS_NODATA, None))
3532       else:
3533         try:
3534           self._CheckPayload(result)
3535         except errors.OpExecError, err:
3536           self.LogWarning("Payload returned by node '%s' is not valid: %s",
3537                           node.name, err)
3538           node_entry.append((constants.RS_NODATA, None))
3539         else:
3540           if self.op.command == constants.OOB_HEALTH:
3541             # For health we should log important events
3542             for item, status in result.payload:
3543               if status in [constants.OOB_STATUS_WARNING,
3544                             constants.OOB_STATUS_CRITICAL]:
3545                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3546                                 item, node.name, status)
3547
3548           if self.op.command == constants.OOB_POWER_ON:
3549             node.powered = True
3550           elif self.op.command == constants.OOB_POWER_OFF:
3551             node.powered = False
3552           elif self.op.command == constants.OOB_POWER_STATUS:
3553             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3554             if powered != node.powered:
3555               logging.warning(("Recorded power state (%s) of node '%s' does not"
3556                                " match actual power state (%s)"), node.powered,
3557                               node.name, powered)
3558
3559           # For configuration changing commands we should update the node
3560           if self.op.command in (constants.OOB_POWER_ON,
3561                                  constants.OOB_POWER_OFF):
3562             self.cfg.Update(node, feedback_fn)
3563
3564           node_entry.append((constants.RS_NORMAL, result.payload))
3565
3566           if (self.op.command == constants.OOB_POWER_ON and
3567               idx < len(self.nodes) - 1):
3568             time.sleep(self.op.power_delay)
3569
3570     return ret
3571
3572   def _CheckPayload(self, result):
3573     """Checks if the payload is valid.
3574
3575     @param result: RPC result
3576     @raises errors.OpExecError: If payload is not valid
3577
3578     """
3579     errs = []
3580     if self.op.command == constants.OOB_HEALTH:
3581       if not isinstance(result.payload, list):
3582         errs.append("command 'health' is expected to return a list but got %s" %
3583                     type(result.payload))
3584       else:
3585         for item, status in result.payload:
3586           if status not in constants.OOB_STATUSES:
3587             errs.append("health item '%s' has invalid status '%s'" %
3588                         (item, status))
3589
3590     if self.op.command == constants.OOB_POWER_STATUS:
3591       if not isinstance(result.payload, dict):
3592         errs.append("power-status is expected to return a dict but got %s" %
3593                     type(result.payload))
3594
3595     if self.op.command in [
3596         constants.OOB_POWER_ON,
3597         constants.OOB_POWER_OFF,
3598         constants.OOB_POWER_CYCLE,
3599         ]:
3600       if result.payload is not None:
3601         errs.append("%s is expected to not return payload but got '%s'" %
3602                     (self.op.command, result.payload))
3603
3604     if errs:
3605       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3606                                utils.CommaJoin(errs))
3607
3608 class _OsQuery(_QueryBase):
3609   FIELDS = query.OS_FIELDS
3610
3611   def ExpandNames(self, lu):
3612     # Lock all nodes in shared mode
3613     # Temporary removal of locks, should be reverted later
3614     # TODO: reintroduce locks when they are lighter-weight
3615     lu.needed_locks = {}
3616     #self.share_locks[locking.LEVEL_NODE] = 1
3617     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3618
3619     # The following variables interact with _QueryBase._GetNames
3620     if self.names:
3621       self.wanted = self.names
3622     else:
3623       self.wanted = locking.ALL_SET
3624
3625     self.do_locking = self.use_locking
3626
3627   def DeclareLocks(self, lu, level):
3628     pass
3629
3630   @staticmethod
3631   def _DiagnoseByOS(rlist):
3632     """Remaps a per-node return list into an a per-os per-node dictionary
3633
3634     @param rlist: a map with node names as keys and OS objects as values
3635
3636     @rtype: dict
3637     @return: a dictionary with osnames as keys and as value another
3638         map, with nodes as keys and tuples of (path, status, diagnose,
3639         variants, parameters, api_versions) as values, eg::
3640
3641           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3642                                      (/srv/..., False, "invalid api")],
3643                            "node2": [(/srv/..., True, "", [], [])]}
3644           }
3645
3646     """
3647     all_os = {}
3648     # we build here the list of nodes that didn't fail the RPC (at RPC
3649     # level), so that nodes with a non-responding node daemon don't
3650     # make all OSes invalid
3651     good_nodes = [node_name for node_name in rlist
3652                   if not rlist[node_name].fail_msg]
3653     for node_name, nr in rlist.items():
3654       if nr.fail_msg or not nr.payload:
3655         continue
3656       for (name, path, status, diagnose, variants,
3657            params, api_versions) in nr.payload:
3658         if name not in all_os:
3659           # build a list of nodes for this os containing empty lists
3660           # for each node in node_list
3661           all_os[name] = {}
3662           for nname in good_nodes:
3663             all_os[name][nname] = []
3664         # convert params from [name, help] to (name, help)
3665         params = [tuple(v) for v in params]
3666         all_os[name][node_name].append((path, status, diagnose,
3667                                         variants, params, api_versions))
3668     return all_os
3669
3670   def _GetQueryData(self, lu):
3671     """Computes the list of nodes and their attributes.
3672
3673     """
3674     # Locking is not used
3675     assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3676
3677     valid_nodes = [node.name
3678                    for node in lu.cfg.GetAllNodesInfo().values()
3679                    if not node.offline and node.vm_capable]
3680     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3681     cluster = lu.cfg.GetClusterInfo()
3682
3683     data = {}
3684
3685     for (os_name, os_data) in pol.items():
3686       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3687                           hidden=(os_name in cluster.hidden_os),
3688                           blacklisted=(os_name in cluster.blacklisted_os))
3689
3690       variants = set()
3691       parameters = set()
3692       api_versions = set()
3693
3694       for idx, osl in enumerate(os_data.values()):
3695         info.valid = bool(info.valid and osl and osl[0][1])
3696         if not info.valid:
3697           break
3698
3699         (node_variants, node_params, node_api) = osl[0][3:6]
3700         if idx == 0:
3701           # First entry
3702           variants.update(node_variants)
3703           parameters.update(node_params)
3704           api_versions.update(node_api)
3705         else:
3706           # Filter out inconsistent values
3707           variants.intersection_update(node_variants)
3708           parameters.intersection_update(node_params)
3709           api_versions.intersection_update(node_api)
3710
3711       info.variants = list(variants)
3712       info.parameters = list(parameters)
3713       info.api_versions = list(api_versions)
3714
3715       data[os_name] = info
3716
3717     # Prepare data in requested order
3718     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3719             if name in data]
3720
3721
3722 class LUOsDiagnose(NoHooksLU):
3723   """Logical unit for OS diagnose/query.
3724
3725   """
3726   REQ_BGL = False
3727
3728   @staticmethod
3729   def _BuildFilter(fields, names):
3730     """Builds a filter for querying OSes.
3731
3732     """
3733     name_filter = qlang.MakeSimpleFilter("name", names)
3734
3735     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3736     # respective field is not requested
3737     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3738                      for fname in ["hidden", "blacklisted"]
3739                      if fname not in fields]
3740     if "valid" not in fields:
3741       status_filter.append([qlang.OP_TRUE, "valid"])
3742
3743     if status_filter:
3744       status_filter.insert(0, qlang.OP_AND)
3745     else:
3746       status_filter = None
3747
3748     if name_filter and status_filter:
3749       return [qlang.OP_AND, name_filter, status_filter]
3750     elif name_filter:
3751       return name_filter
3752     else:
3753       return status_filter
3754
3755   def CheckArguments(self):
3756     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3757                        self.op.output_fields, False)
3758
3759   def ExpandNames(self):
3760     self.oq.ExpandNames(self)
3761
3762   def Exec(self, feedback_fn):
3763     return self.oq.OldStyleQuery(self)
3764
3765
3766 class LUNodeRemove(LogicalUnit):
3767   """Logical unit for removing a node.
3768
3769   """
3770   HPATH = "node-remove"
3771   HTYPE = constants.HTYPE_NODE
3772
3773   def BuildHooksEnv(self):
3774     """Build hooks env.
3775
3776     This doesn't run on the target node in the pre phase as a failed
3777     node would then be impossible to remove.
3778
3779     """
3780     return {
3781       "OP_TARGET": self.op.node_name,
3782       "NODE_NAME": self.op.node_name,
3783       }
3784
3785   def BuildHooksNodes(self):
3786     """Build hooks nodes.
3787
3788     """
3789     all_nodes = self.cfg.GetNodeList()
3790     try:
3791       all_nodes.remove(self.op.node_name)
3792     except ValueError:
3793       logging.warning("Node '%s', which is about to be removed, was not found"
3794                       " in the list of all nodes", self.op.node_name)
3795     return (all_nodes, all_nodes)
3796
3797   def CheckPrereq(self):
3798     """Check prerequisites.
3799
3800     This checks:
3801      - the node exists in the configuration
3802      - it does not have primary or secondary instances
3803      - it's not the master
3804
3805     Any errors are signaled by raising errors.OpPrereqError.
3806
3807     """
3808     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3809     node = self.cfg.GetNodeInfo(self.op.node_name)
3810     assert node is not None
3811
3812     instance_list = self.cfg.GetInstanceList()
3813
3814     masternode = self.cfg.GetMasterNode()
3815     if node.name == masternode:
3816       raise errors.OpPrereqError("Node is the master node, failover to another"
3817                                  " node is required", errors.ECODE_INVAL)
3818
3819     for instance_name in instance_list:
3820       instance = self.cfg.GetInstanceInfo(instance_name)
3821       if node.name in instance.all_nodes:
3822         raise errors.OpPrereqError("Instance %s is still running on the node,"
3823                                    " please remove first" % instance_name,
3824                                    errors.ECODE_INVAL)
3825     self.op.node_name = node.name
3826     self.node = node
3827
3828   def Exec(self, feedback_fn):
3829     """Removes the node from the cluster.
3830
3831     """
3832     node = self.node
3833     logging.info("Stopping the node daemon and removing configs from node %s",
3834                  node.name)
3835
3836     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3837
3838     # Promote nodes to master candidate as needed
3839     _AdjustCandidatePool(self, exceptions=[node.name])
3840     self.context.RemoveNode(node.name)
3841
3842     # Run post hooks on the node before it's removed
3843     _RunPostHook(self, node.name)
3844
3845     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3846     msg = result.fail_msg
3847     if msg:
3848       self.LogWarning("Errors encountered on the remote node while leaving"
3849                       " the cluster: %s", msg)
3850
3851     # Remove node from our /etc/hosts
3852     if self.cfg.GetClusterInfo().modify_etc_hosts:
3853       master_node = self.cfg.GetMasterNode()
3854       result = self.rpc.call_etc_hosts_modify(master_node,
3855                                               constants.ETC_HOSTS_REMOVE,
3856                                               node.name, None)
3857       result.Raise("Can't update hosts file with new host data")
3858       _RedistributeAncillaryFiles(self)
3859
3860
3861 class _NodeQuery(_QueryBase):
3862   FIELDS = query.NODE_FIELDS
3863
3864   def ExpandNames(self, lu):
3865     lu.needed_locks = {}
3866     lu.share_locks[locking.LEVEL_NODE] = 1
3867
3868     if self.names:
3869       self.wanted = _GetWantedNodes(lu, self.names)
3870     else:
3871       self.wanted = locking.ALL_SET
3872
3873     self.do_locking = (self.use_locking and
3874                        query.NQ_LIVE in self.requested_data)
3875
3876     if self.do_locking:
3877       # if we don't request only static fields, we need to lock the nodes
3878       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3879
3880   def DeclareLocks(self, lu, level):
3881     pass
3882
3883   def _GetQueryData(self, lu):
3884     """Computes the list of nodes and their attributes.
3885
3886     """
3887     all_info = lu.cfg.GetAllNodesInfo()
3888
3889     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3890
3891     # Gather data as requested
3892     if query.NQ_LIVE in self.requested_data:
3893       # filter out non-vm_capable nodes
3894       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3895
3896       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3897                                         lu.cfg.GetHypervisorType())
3898       live_data = dict((name, nresult.payload)
3899                        for (name, nresult) in node_data.items()
3900                        if not nresult.fail_msg and nresult.payload)
3901     else:
3902       live_data = None
3903
3904     if query.NQ_INST in self.requested_data:
3905       node_to_primary = dict([(name, set()) for name in nodenames])
3906       node_to_secondary = dict([(name, set()) for name in nodenames])
3907
3908       inst_data = lu.cfg.GetAllInstancesInfo()
3909
3910       for inst in inst_data.values():
3911         if inst.primary_node in node_to_primary:
3912           node_to_primary[inst.primary_node].add(inst.name)
3913         for secnode in inst.secondary_nodes:
3914           if secnode in node_to_secondary:
3915             node_to_secondary[secnode].add(inst.name)
3916     else:
3917       node_to_primary = None
3918       node_to_secondary = None
3919
3920     if query.NQ_OOB in self.requested_data:
3921       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3922                          for name, node in all_info.iteritems())
3923     else:
3924       oob_support = None
3925
3926     if query.NQ_GROUP in self.requested_data:
3927       groups = lu.cfg.GetAllNodeGroupsInfo()
3928     else:
3929       groups = {}
3930
3931     return query.NodeQueryData([all_info[name] for name in nodenames],
3932                                live_data, lu.cfg.GetMasterNode(),
3933                                node_to_primary, node_to_secondary, groups,
3934                                oob_support, lu.cfg.GetClusterInfo())
3935
3936
3937 class LUNodeQuery(NoHooksLU):
3938   """Logical unit for querying nodes.
3939
3940   """
3941   # pylint: disable-msg=W0142
3942   REQ_BGL = False
3943
3944   def CheckArguments(self):
3945     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3946                          self.op.output_fields, self.op.use_locking)
3947
3948   def ExpandNames(self):
3949     self.nq.ExpandNames(self)
3950
3951   def Exec(self, feedback_fn):
3952     return self.nq.OldStyleQuery(self)
3953
3954
3955 class LUNodeQueryvols(NoHooksLU):
3956   """Logical unit for getting volumes on node(s).
3957
3958   """
3959   REQ_BGL = False
3960   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3961   _FIELDS_STATIC = utils.FieldSet("node")
3962
3963   def CheckArguments(self):
3964     _CheckOutputFields(static=self._FIELDS_STATIC,
3965                        dynamic=self._FIELDS_DYNAMIC,
3966                        selected=self.op.output_fields)
3967
3968   def ExpandNames(self):
3969     self.needed_locks = {}
3970     self.share_locks[locking.LEVEL_NODE] = 1
3971     if not self.op.nodes:
3972       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3973     else:
3974       self.needed_locks[locking.LEVEL_NODE] = \
3975         _GetWantedNodes(self, self.op.nodes)
3976
3977   def Exec(self, feedback_fn):
3978     """Computes the list of nodes and their attributes.
3979
3980     """
3981     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3982     volumes = self.rpc.call_node_volumes(nodenames)
3983
3984     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3985              in self.cfg.GetInstanceList()]
3986
3987     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3988
3989     output = []
3990     for node in nodenames:
3991       nresult = volumes[node]
3992       if nresult.offline:
3993         continue
3994       msg = nresult.fail_msg
3995       if msg:
3996         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3997         continue
3998
3999       node_vols = nresult.payload[:]
4000       node_vols.sort(key=lambda vol: vol['dev'])
4001
4002       for vol in node_vols:
4003         node_output = []
4004         for field in self.op.output_fields:
4005           if field == "node":
4006             val = node
4007           elif field == "phys":
4008             val = vol['dev']
4009           elif field == "vg":
4010             val = vol['vg']
4011           elif field == "name":
4012             val = vol['name']
4013           elif field == "size":
4014             val = int(float(vol['size']))
4015           elif field == "instance":
4016             for inst in ilist:
4017               if node not in lv_by_node[inst]:
4018                 continue
4019               if vol['name'] in lv_by_node[inst][node]:
4020                 val = inst.name
4021                 break
4022             else:
4023               val = '-'
4024           else:
4025             raise errors.ParameterError(field)
4026           node_output.append(str(val))
4027
4028         output.append(node_output)
4029
4030     return output
4031
4032
4033 class LUNodeQueryStorage(NoHooksLU):
4034   """Logical unit for getting information on storage units on node(s).
4035
4036   """
4037   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4038   REQ_BGL = False
4039
4040   def CheckArguments(self):
4041     _CheckOutputFields(static=self._FIELDS_STATIC,
4042                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4043                        selected=self.op.output_fields)
4044
4045   def ExpandNames(self):
4046     self.needed_locks = {}
4047     self.share_locks[locking.LEVEL_NODE] = 1
4048
4049     if self.op.nodes:
4050       self.needed_locks[locking.LEVEL_NODE] = \
4051         _GetWantedNodes(self, self.op.nodes)
4052     else:
4053       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4054
4055   def Exec(self, feedback_fn):
4056     """Computes the list of nodes and their attributes.
4057
4058     """
4059     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4060
4061     # Always get name to sort by
4062     if constants.SF_NAME in self.op.output_fields:
4063       fields = self.op.output_fields[:]
4064     else:
4065       fields = [constants.SF_NAME] + self.op.output_fields
4066
4067     # Never ask for node or type as it's only known to the LU
4068     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4069       while extra in fields:
4070         fields.remove(extra)
4071
4072     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4073     name_idx = field_idx[constants.SF_NAME]
4074
4075     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4076     data = self.rpc.call_storage_list(self.nodes,
4077                                       self.op.storage_type, st_args,
4078                                       self.op.name, fields)
4079
4080     result = []
4081
4082     for node in utils.NiceSort(self.nodes):
4083       nresult = data[node]
4084       if nresult.offline:
4085         continue
4086
4087       msg = nresult.fail_msg
4088       if msg:
4089         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4090         continue
4091
4092       rows = dict([(row[name_idx], row) for row in nresult.payload])
4093
4094       for name in utils.NiceSort(rows.keys()):
4095         row = rows[name]
4096
4097         out = []
4098
4099         for field in self.op.output_fields:
4100           if field == constants.SF_NODE:
4101             val = node
4102           elif field == constants.SF_TYPE:
4103             val = self.op.storage_type
4104           elif field in field_idx:
4105             val = row[field_idx[field]]
4106           else:
4107             raise errors.ParameterError(field)
4108
4109           out.append(val)
4110
4111         result.append(out)
4112
4113     return result
4114
4115
4116 class _InstanceQuery(_QueryBase):
4117   FIELDS = query.INSTANCE_FIELDS
4118
4119   def ExpandNames(self, lu):
4120     lu.needed_locks = {}
4121     lu.share_locks[locking.LEVEL_INSTANCE] = 1
4122     lu.share_locks[locking.LEVEL_NODE] = 1
4123
4124     if self.names:
4125       self.wanted = _GetWantedInstances(lu, self.names)
4126     else:
4127       self.wanted = locking.ALL_SET
4128
4129     self.do_locking = (self.use_locking and
4130                        query.IQ_LIVE in self.requested_data)
4131     if self.do_locking:
4132       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4133       lu.needed_locks[locking.LEVEL_NODE] = []
4134       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4135
4136   def DeclareLocks(self, lu, level):
4137     if level == locking.LEVEL_NODE and self.do_locking:
4138       lu._LockInstancesNodes() # pylint: disable-msg=W0212
4139
4140   def _GetQueryData(self, lu):
4141     """Computes the list of instances and their attributes.
4142
4143     """
4144     cluster = lu.cfg.GetClusterInfo()
4145     all_info = lu.cfg.GetAllInstancesInfo()
4146
4147     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4148
4149     instance_list = [all_info[name] for name in instance_names]
4150     nodes = frozenset(itertools.chain(*(inst.all_nodes
4151                                         for inst in instance_list)))
4152     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4153     bad_nodes = []
4154     offline_nodes = []
4155     wrongnode_inst = set()
4156
4157     # Gather data as requested
4158     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4159       live_data = {}
4160       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4161       for name in nodes:
4162         result = node_data[name]
4163         if result.offline:
4164           # offline nodes will be in both lists
4165           assert result.fail_msg
4166           offline_nodes.append(name)
4167         if result.fail_msg:
4168           bad_nodes.append(name)
4169         elif result.payload:
4170           for inst in result.payload:
4171             if inst in all_info:
4172               if all_info[inst].primary_node == name:
4173                 live_data.update(result.payload)
4174               else:
4175                 wrongnode_inst.add(inst)
4176             else:
4177               # orphan instance; we don't list it here as we don't
4178               # handle this case yet in the output of instance listing
4179               logging.warning("Orphan instance '%s' found on node %s",
4180                               inst, name)
4181         # else no instance is alive
4182     else:
4183       live_data = {}
4184
4185     if query.IQ_DISKUSAGE in self.requested_data:
4186       disk_usage = dict((inst.name,
4187                          _ComputeDiskSize(inst.disk_template,
4188                                           [{constants.IDISK_SIZE: disk.size}
4189                                            for disk in inst.disks]))
4190                         for inst in instance_list)
4191     else:
4192       disk_usage = None
4193
4194     if query.IQ_CONSOLE in self.requested_data:
4195       consinfo = {}
4196       for inst in instance_list:
4197         if inst.name in live_data:
4198           # Instance is running
4199           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4200         else:
4201           consinfo[inst.name] = None
4202       assert set(consinfo.keys()) == set(instance_names)
4203     else:
4204       consinfo = None
4205
4206     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4207                                    disk_usage, offline_nodes, bad_nodes,
4208                                    live_data, wrongnode_inst, consinfo)
4209
4210
4211 class LUQuery(NoHooksLU):
4212   """Query for resources/items of a certain kind.
4213
4214   """
4215   # pylint: disable-msg=W0142
4216   REQ_BGL = False
4217
4218   def CheckArguments(self):
4219     qcls = _GetQueryImplementation(self.op.what)
4220
4221     self.impl = qcls(self.op.filter, self.op.fields, False)
4222
4223   def ExpandNames(self):
4224     self.impl.ExpandNames(self)
4225
4226   def DeclareLocks(self, level):
4227     self.impl.DeclareLocks(self, level)
4228
4229   def Exec(self, feedback_fn):
4230     return self.impl.NewStyleQuery(self)
4231
4232
4233 class LUQueryFields(NoHooksLU):
4234   """Query for resources/items of a certain kind.
4235
4236   """
4237   # pylint: disable-msg=W0142
4238   REQ_BGL = False
4239
4240   def CheckArguments(self):
4241     self.qcls = _GetQueryImplementation(self.op.what)
4242
4243   def ExpandNames(self):
4244     self.needed_locks = {}
4245
4246   def Exec(self, feedback_fn):
4247     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4248
4249
4250 class LUNodeModifyStorage(NoHooksLU):
4251   """Logical unit for modifying a storage volume on a node.
4252
4253   """
4254   REQ_BGL = False
4255
4256   def CheckArguments(self):
4257     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4258
4259     storage_type = self.op.storage_type
4260
4261     try:
4262       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4263     except KeyError:
4264       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4265                                  " modified" % storage_type,
4266                                  errors.ECODE_INVAL)
4267
4268     diff = set(self.op.changes.keys()) - modifiable
4269     if diff:
4270       raise errors.OpPrereqError("The following fields can not be modified for"
4271                                  " storage units of type '%s': %r" %
4272                                  (storage_type, list(diff)),
4273                                  errors.ECODE_INVAL)
4274
4275   def ExpandNames(self):
4276     self.needed_locks = {
4277       locking.LEVEL_NODE: self.op.node_name,
4278       }
4279
4280   def Exec(self, feedback_fn):
4281     """Computes the list of nodes and their attributes.
4282
4283     """
4284     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4285     result = self.rpc.call_storage_modify(self.op.node_name,
4286                                           self.op.storage_type, st_args,
4287                                           self.op.name, self.op.changes)
4288     result.Raise("Failed to modify storage unit '%s' on %s" %
4289                  (self.op.name, self.op.node_name))
4290
4291
4292 class LUNodeAdd(LogicalUnit):
4293   """Logical unit for adding node to the cluster.
4294
4295   """
4296   HPATH = "node-add"
4297   HTYPE = constants.HTYPE_NODE
4298   _NFLAGS = ["master_capable", "vm_capable"]
4299
4300   def CheckArguments(self):
4301     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4302     # validate/normalize the node name
4303     self.hostname = netutils.GetHostname(name=self.op.node_name,
4304                                          family=self.primary_ip_family)
4305     self.op.node_name = self.hostname.name
4306
4307     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4308       raise errors.OpPrereqError("Cannot readd the master node",
4309                                  errors.ECODE_STATE)
4310
4311     if self.op.readd and self.op.group:
4312       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4313                                  " being readded", errors.ECODE_INVAL)
4314
4315   def BuildHooksEnv(self):
4316     """Build hooks env.
4317
4318     This will run on all nodes before, and on all nodes + the new node after.
4319
4320     """
4321     return {
4322       "OP_TARGET": self.op.node_name,
4323       "NODE_NAME": self.op.node_name,
4324       "NODE_PIP": self.op.primary_ip,
4325       "NODE_SIP": self.op.secondary_ip,
4326       "MASTER_CAPABLE": str(self.op.master_capable),
4327       "VM_CAPABLE": str(self.op.vm_capable),
4328       }
4329
4330   def BuildHooksNodes(self):
4331     """Build hooks nodes.
4332
4333     """
4334     # Exclude added node
4335     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4336     post_nodes = pre_nodes + [self.op.node_name, ]
4337
4338     return (pre_nodes, post_nodes)
4339
4340   def CheckPrereq(self):
4341     """Check prerequisites.
4342
4343     This checks:
4344      - the new node is not already in the config
4345      - it is resolvable
4346      - its parameters (single/dual homed) matches the cluster
4347
4348     Any errors are signaled by raising errors.OpPrereqError.
4349
4350     """
4351     cfg = self.cfg
4352     hostname = self.hostname
4353     node = hostname.name
4354     primary_ip = self.op.primary_ip = hostname.ip
4355     if self.op.secondary_ip is None:
4356       if self.primary_ip_family == netutils.IP6Address.family:
4357         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4358                                    " IPv4 address must be given as secondary",
4359                                    errors.ECODE_INVAL)
4360       self.op.secondary_ip = primary_ip
4361
4362     secondary_ip = self.op.secondary_ip
4363     if not netutils.IP4Address.IsValid(secondary_ip):
4364       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4365                                  " address" % secondary_ip, errors.ECODE_INVAL)
4366
4367     node_list = cfg.GetNodeList()
4368     if not self.op.readd and node in node_list:
4369       raise errors.OpPrereqError("Node %s is already in the configuration" %
4370                                  node, errors.ECODE_EXISTS)
4371     elif self.op.readd and node not in node_list:
4372       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4373                                  errors.ECODE_NOENT)
4374
4375     self.changed_primary_ip = False
4376
4377     for existing_node_name in node_list:
4378       existing_node = cfg.GetNodeInfo(existing_node_name)
4379
4380       if self.op.readd and node == existing_node_name:
4381         if existing_node.secondary_ip != secondary_ip:
4382           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4383                                      " address configuration as before",
4384                                      errors.ECODE_INVAL)
4385         if existing_node.primary_ip != primary_ip:
4386           self.changed_primary_ip = True
4387
4388         continue
4389
4390       if (existing_node.primary_ip == primary_ip or
4391           existing_node.secondary_ip == primary_ip or
4392           existing_node.primary_ip == secondary_ip or
4393           existing_node.secondary_ip == secondary_ip):
4394         raise errors.OpPrereqError("New node ip address(es) conflict with"
4395                                    " existing node %s" % existing_node.name,
4396                                    errors.ECODE_NOTUNIQUE)
4397
4398     # After this 'if' block, None is no longer a valid value for the
4399     # _capable op attributes
4400     if self.op.readd:
4401       old_node = self.cfg.GetNodeInfo(node)
4402       assert old_node is not None, "Can't retrieve locked node %s" % node
4403       for attr in self._NFLAGS:
4404         if getattr(self.op, attr) is None:
4405           setattr(self.op, attr, getattr(old_node, attr))
4406     else:
4407       for attr in self._NFLAGS:
4408         if getattr(self.op, attr) is None:
4409           setattr(self.op, attr, True)
4410
4411     if self.op.readd and not self.op.vm_capable:
4412       pri, sec = cfg.GetNodeInstances(node)
4413       if pri or sec:
4414         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4415                                    " flag set to false, but it already holds"
4416                                    " instances" % node,
4417                                    errors.ECODE_STATE)
4418
4419     # check that the type of the node (single versus dual homed) is the
4420     # same as for the master
4421     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4422     master_singlehomed = myself.secondary_ip == myself.primary_ip
4423     newbie_singlehomed = secondary_ip == primary_ip
4424     if master_singlehomed != newbie_singlehomed:
4425       if master_singlehomed:
4426         raise errors.OpPrereqError("The master has no secondary ip but the"
4427                                    " new node has one",
4428                                    errors.ECODE_INVAL)
4429       else:
4430         raise errors.OpPrereqError("The master has a secondary ip but the"
4431                                    " new node doesn't have one",
4432                                    errors.ECODE_INVAL)
4433
4434     # checks reachability
4435     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4436       raise errors.OpPrereqError("Node not reachable by ping",
4437                                  errors.ECODE_ENVIRON)
4438
4439     if not newbie_singlehomed:
4440       # check reachability from my secondary ip to newbie's secondary ip
4441       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4442                            source=myself.secondary_ip):
4443         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4444                                    " based ping to node daemon port",
4445                                    errors.ECODE_ENVIRON)
4446
4447     if self.op.readd:
4448       exceptions = [node]
4449     else:
4450       exceptions = []
4451
4452     if self.op.master_capable:
4453       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4454     else:
4455       self.master_candidate = False
4456
4457     if self.op.readd:
4458       self.new_node = old_node
4459     else:
4460       node_group = cfg.LookupNodeGroup(self.op.group)
4461       self.new_node = objects.Node(name=node,
4462                                    primary_ip=primary_ip,
4463                                    secondary_ip=secondary_ip,
4464                                    master_candidate=self.master_candidate,
4465                                    offline=False, drained=False,
4466                                    group=node_group)
4467
4468     if self.op.ndparams:
4469       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4470
4471   def Exec(self, feedback_fn):
4472     """Adds the new node to the cluster.
4473
4474     """
4475     new_node = self.new_node
4476     node = new_node.name
4477
4478     # We adding a new node so we assume it's powered
4479     new_node.powered = True
4480
4481     # for re-adds, reset the offline/drained/master-candidate flags;
4482     # we need to reset here, otherwise offline would prevent RPC calls
4483     # later in the procedure; this also means that if the re-add
4484     # fails, we are left with a non-offlined, broken node
4485     if self.op.readd:
4486       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4487       self.LogInfo("Readding a node, the offline/drained flags were reset")
4488       # if we demote the node, we do cleanup later in the procedure
4489       new_node.master_candidate = self.master_candidate
4490       if self.changed_primary_ip:
4491         new_node.primary_ip = self.op.primary_ip
4492
4493     # copy the master/vm_capable flags
4494     for attr in self._NFLAGS:
4495       setattr(new_node, attr, getattr(self.op, attr))
4496
4497     # notify the user about any possible mc promotion
4498     if new_node.master_candidate:
4499       self.LogInfo("Node will be a master candidate")
4500
4501     if self.op.ndparams:
4502       new_node.ndparams = self.op.ndparams
4503     else:
4504       new_node.ndparams = {}
4505
4506     # check connectivity
4507     result = self.rpc.call_version([node])[node]
4508     result.Raise("Can't get version information from node %s" % node)
4509     if constants.PROTOCOL_VERSION == result.payload:
4510       logging.info("Communication to node %s fine, sw version %s match",
4511                    node, result.payload)
4512     else:
4513       raise errors.OpExecError("Version mismatch master version %s,"
4514                                " node version %s" %
4515                                (constants.PROTOCOL_VERSION, result.payload))
4516
4517     # Add node to our /etc/hosts, and add key to known_hosts
4518     if self.cfg.GetClusterInfo().modify_etc_hosts:
4519       master_node = self.cfg.GetMasterNode()
4520       result = self.rpc.call_etc_hosts_modify(master_node,
4521                                               constants.ETC_HOSTS_ADD,
4522                                               self.hostname.name,
4523                                               self.hostname.ip)
4524       result.Raise("Can't update hosts file with new host data")
4525
4526     if new_node.secondary_ip != new_node.primary_ip:
4527       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4528                                False)
4529
4530     node_verify_list = [self.cfg.GetMasterNode()]
4531     node_verify_param = {
4532       constants.NV_NODELIST: [node],
4533       # TODO: do a node-net-test as well?
4534     }
4535
4536     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4537                                        self.cfg.GetClusterName())
4538     for verifier in node_verify_list:
4539       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4540       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4541       if nl_payload:
4542         for failed in nl_payload:
4543           feedback_fn("ssh/hostname verification failed"
4544                       " (checking from %s): %s" %
4545                       (verifier, nl_payload[failed]))
4546         raise errors.OpExecError("ssh/hostname verification failed")
4547
4548     if self.op.readd:
4549       _RedistributeAncillaryFiles(self)
4550       self.context.ReaddNode(new_node)
4551       # make sure we redistribute the config
4552       self.cfg.Update(new_node, feedback_fn)
4553       # and make sure the new node will not have old files around
4554       if not new_node.master_candidate:
4555         result = self.rpc.call_node_demote_from_mc(new_node.name)
4556         msg = result.fail_msg
4557         if msg:
4558           self.LogWarning("Node failed to demote itself from master"
4559                           " candidate status: %s" % msg)
4560     else:
4561       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4562                                   additional_vm=self.op.vm_capable)
4563       self.context.AddNode(new_node, self.proc.GetECId())
4564
4565
4566 class LUNodeSetParams(LogicalUnit):
4567   """Modifies the parameters of a node.
4568
4569   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4570       to the node role (as _ROLE_*)
4571   @cvar _R2F: a dictionary from node role to tuples of flags
4572   @cvar _FLAGS: a list of attribute names corresponding to the flags
4573
4574   """
4575   HPATH = "node-modify"
4576   HTYPE = constants.HTYPE_NODE
4577   REQ_BGL = False
4578   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4579   _F2R = {
4580     (True, False, False): _ROLE_CANDIDATE,
4581     (False, True, False): _ROLE_DRAINED,
4582     (False, False, True): _ROLE_OFFLINE,
4583     (False, False, False): _ROLE_REGULAR,
4584     }
4585   _R2F = dict((v, k) for k, v in _F2R.items())
4586   _FLAGS = ["master_candidate", "drained", "offline"]
4587
4588   def CheckArguments(self):
4589     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4590     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4591                 self.op.master_capable, self.op.vm_capable,
4592                 self.op.secondary_ip, self.op.ndparams]
4593     if all_mods.count(None) == len(all_mods):
4594       raise errors.OpPrereqError("Please pass at least one modification",
4595                                  errors.ECODE_INVAL)
4596     if all_mods.count(True) > 1:
4597       raise errors.OpPrereqError("Can't set the node into more than one"
4598                                  " state at the same time",
4599                                  errors.ECODE_INVAL)
4600
4601     # Boolean value that tells us whether we might be demoting from MC
4602     self.might_demote = (self.op.master_candidate == False or
4603                          self.op.offline == True or
4604                          self.op.drained == True or
4605                          self.op.master_capable == False)
4606
4607     if self.op.secondary_ip:
4608       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4609         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4610                                    " address" % self.op.secondary_ip,
4611                                    errors.ECODE_INVAL)
4612
4613     self.lock_all = self.op.auto_promote and self.might_demote
4614     self.lock_instances = self.op.secondary_ip is not None
4615
4616   def ExpandNames(self):
4617     if self.lock_all:
4618       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4619     else:
4620       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4621
4622     if self.lock_instances:
4623       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4624
4625   def DeclareLocks(self, level):
4626     # If we have locked all instances, before waiting to lock nodes, release
4627     # all the ones living on nodes unrelated to the current operation.
4628     if level == locking.LEVEL_NODE and self.lock_instances:
4629       self.affected_instances = []
4630       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4631         instances_keep = []
4632
4633         # Build list of instances to release
4634         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4635           instance = self.context.cfg.GetInstanceInfo(instance_name)
4636           if (instance.disk_template in constants.DTS_INT_MIRROR and
4637               self.op.node_name in instance.all_nodes):
4638             instances_keep.append(instance_name)
4639             self.affected_instances.append(instance)
4640
4641         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4642
4643         assert (set(self.acquired_locks.get(locking.LEVEL_INSTANCE, [])) ==
4644                 set(instances_keep))
4645
4646   def BuildHooksEnv(self):
4647     """Build hooks env.
4648
4649     This runs on the master node.
4650
4651     """
4652     return {
4653       "OP_TARGET": self.op.node_name,
4654       "MASTER_CANDIDATE": str(self.op.master_candidate),
4655       "OFFLINE": str(self.op.offline),
4656       "DRAINED": str(self.op.drained),
4657       "MASTER_CAPABLE": str(self.op.master_capable),
4658       "VM_CAPABLE": str(self.op.vm_capable),
4659       }
4660
4661   def BuildHooksNodes(self):
4662     """Build hooks nodes.
4663
4664     """
4665     nl = [self.cfg.GetMasterNode(), self.op.node_name]
4666     return (nl, nl)
4667
4668   def CheckPrereq(self):
4669     """Check prerequisites.
4670
4671     This only checks the instance list against the existing names.
4672
4673     """
4674     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4675
4676     if (self.op.master_candidate is not None or
4677         self.op.drained is not None or
4678         self.op.offline is not None):
4679       # we can't change the master's node flags
4680       if self.op.node_name == self.cfg.GetMasterNode():
4681         raise errors.OpPrereqError("The master role can be changed"
4682                                    " only via master-failover",
4683                                    errors.ECODE_INVAL)
4684
4685     if self.op.master_candidate and not node.master_capable:
4686       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4687                                  " it a master candidate" % node.name,
4688                                  errors.ECODE_STATE)
4689
4690     if self.op.vm_capable == False:
4691       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4692       if ipri or isec:
4693         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4694                                    " the vm_capable flag" % node.name,
4695                                    errors.ECODE_STATE)
4696
4697     if node.master_candidate and self.might_demote and not self.lock_all:
4698       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4699       # check if after removing the current node, we're missing master
4700       # candidates
4701       (mc_remaining, mc_should, _) = \
4702           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4703       if mc_remaining < mc_should:
4704         raise errors.OpPrereqError("Not enough master candidates, please"
4705                                    " pass auto promote option to allow"
4706                                    " promotion", errors.ECODE_STATE)
4707
4708     self.old_flags = old_flags = (node.master_candidate,
4709                                   node.drained, node.offline)
4710     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4711     self.old_role = old_role = self._F2R[old_flags]
4712
4713     # Check for ineffective changes
4714     for attr in self._FLAGS:
4715       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4716         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4717         setattr(self.op, attr, None)
4718
4719     # Past this point, any flag change to False means a transition
4720     # away from the respective state, as only real changes are kept
4721
4722     # TODO: We might query the real power state if it supports OOB
4723     if _SupportsOob(self.cfg, node):
4724       if self.op.offline is False and not (node.powered or
4725                                            self.op.powered == True):
4726         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4727                                     " offline status can be reset") %
4728                                    self.op.node_name)
4729     elif self.op.powered is not None:
4730       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4731                                   " as it does not support out-of-band"
4732                                   " handling") % self.op.node_name)
4733
4734     # If we're being deofflined/drained, we'll MC ourself if needed
4735     if (self.op.drained == False or self.op.offline == False or
4736         (self.op.master_capable and not node.master_capable)):
4737       if _DecideSelfPromotion(self):
4738         self.op.master_candidate = True
4739         self.LogInfo("Auto-promoting node to master candidate")
4740
4741     # If we're no longer master capable, we'll demote ourselves from MC
4742     if self.op.master_capable == False and node.master_candidate:
4743       self.LogInfo("Demoting from master candidate")
4744       self.op.master_candidate = False
4745
4746     # Compute new role
4747     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4748     if self.op.master_candidate:
4749       new_role = self._ROLE_CANDIDATE
4750     elif self.op.drained:
4751       new_role = self._ROLE_DRAINED
4752     elif self.op.offline:
4753       new_role = self._ROLE_OFFLINE
4754     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4755       # False is still in new flags, which means we're un-setting (the
4756       # only) True flag
4757       new_role = self._ROLE_REGULAR
4758     else: # no new flags, nothing, keep old role
4759       new_role = old_role
4760
4761     self.new_role = new_role
4762
4763     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4764       # Trying to transition out of offline status
4765       result = self.rpc.call_version([node.name])[node.name]
4766       if result.fail_msg:
4767         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4768                                    " to report its version: %s" %
4769                                    (node.name, result.fail_msg),
4770                                    errors.ECODE_STATE)
4771       else:
4772         self.LogWarning("Transitioning node from offline to online state"
4773                         " without using re-add. Please make sure the node"
4774                         " is healthy!")
4775
4776     if self.op.secondary_ip:
4777       # Ok even without locking, because this can't be changed by any LU
4778       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4779       master_singlehomed = master.secondary_ip == master.primary_ip
4780       if master_singlehomed and self.op.secondary_ip:
4781         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4782                                    " homed cluster", errors.ECODE_INVAL)
4783
4784       if node.offline:
4785         if self.affected_instances:
4786           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4787                                      " node has instances (%s) configured"
4788                                      " to use it" % self.affected_instances)
4789       else:
4790         # On online nodes, check that no instances are running, and that
4791         # the node has the new ip and we can reach it.
4792         for instance in self.affected_instances:
4793           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4794
4795         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4796         if master.name != node.name:
4797           # check reachability from master secondary ip to new secondary ip
4798           if not netutils.TcpPing(self.op.secondary_ip,
4799                                   constants.DEFAULT_NODED_PORT,
4800                                   source=master.secondary_ip):
4801             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4802                                        " based ping to node daemon port",
4803                                        errors.ECODE_ENVIRON)
4804
4805     if self.op.ndparams:
4806       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4807       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4808       self.new_ndparams = new_ndparams
4809
4810   def Exec(self, feedback_fn):
4811     """Modifies a node.
4812
4813     """
4814     node = self.node
4815     old_role = self.old_role
4816     new_role = self.new_role
4817
4818     result = []
4819
4820     if self.op.ndparams:
4821       node.ndparams = self.new_ndparams
4822
4823     if self.op.powered is not None:
4824       node.powered = self.op.powered
4825
4826     for attr in ["master_capable", "vm_capable"]:
4827       val = getattr(self.op, attr)
4828       if val is not None:
4829         setattr(node, attr, val)
4830         result.append((attr, str(val)))
4831
4832     if new_role != old_role:
4833       # Tell the node to demote itself, if no longer MC and not offline
4834       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4835         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4836         if msg:
4837           self.LogWarning("Node failed to demote itself: %s", msg)
4838
4839       new_flags = self._R2F[new_role]
4840       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4841         if of != nf:
4842           result.append((desc, str(nf)))
4843       (node.master_candidate, node.drained, node.offline) = new_flags
4844
4845       # we locked all nodes, we adjust the CP before updating this node
4846       if self.lock_all:
4847         _AdjustCandidatePool(self, [node.name])
4848
4849     if self.op.secondary_ip:
4850       node.secondary_ip = self.op.secondary_ip
4851       result.append(("secondary_ip", self.op.secondary_ip))
4852
4853     # this will trigger configuration file update, if needed
4854     self.cfg.Update(node, feedback_fn)
4855
4856     # this will trigger job queue propagation or cleanup if the mc
4857     # flag changed
4858     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4859       self.context.ReaddNode(node)
4860
4861     return result
4862
4863
4864 class LUNodePowercycle(NoHooksLU):
4865   """Powercycles a node.
4866
4867   """
4868   REQ_BGL = False
4869
4870   def CheckArguments(self):
4871     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4872     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4873       raise errors.OpPrereqError("The node is the master and the force"
4874                                  " parameter was not set",
4875                                  errors.ECODE_INVAL)
4876
4877   def ExpandNames(self):
4878     """Locking for PowercycleNode.
4879
4880     This is a last-resort option and shouldn't block on other
4881     jobs. Therefore, we grab no locks.
4882
4883     """
4884     self.needed_locks = {}
4885
4886   def Exec(self, feedback_fn):
4887     """Reboots a node.
4888
4889     """
4890     result = self.rpc.call_node_powercycle(self.op.node_name,
4891                                            self.cfg.GetHypervisorType())
4892     result.Raise("Failed to schedule the reboot")
4893     return result.payload
4894
4895
4896 class LUClusterQuery(NoHooksLU):
4897   """Query cluster configuration.
4898
4899   """
4900   REQ_BGL = False
4901
4902   def ExpandNames(self):
4903     self.needed_locks = {}
4904
4905   def Exec(self, feedback_fn):
4906     """Return cluster config.
4907
4908     """
4909     cluster = self.cfg.GetClusterInfo()
4910     os_hvp = {}
4911
4912     # Filter just for enabled hypervisors
4913     for os_name, hv_dict in cluster.os_hvp.items():
4914       os_hvp[os_name] = {}
4915       for hv_name, hv_params in hv_dict.items():
4916         if hv_name in cluster.enabled_hypervisors:
4917           os_hvp[os_name][hv_name] = hv_params
4918
4919     # Convert ip_family to ip_version
4920     primary_ip_version = constants.IP4_VERSION
4921     if cluster.primary_ip_family == netutils.IP6Address.family:
4922       primary_ip_version = constants.IP6_VERSION
4923
4924     result = {
4925       "software_version": constants.RELEASE_VERSION,
4926       "protocol_version": constants.PROTOCOL_VERSION,
4927       "config_version": constants.CONFIG_VERSION,
4928       "os_api_version": max(constants.OS_API_VERSIONS),
4929       "export_version": constants.EXPORT_VERSION,
4930       "architecture": (platform.architecture()[0], platform.machine()),
4931       "name": cluster.cluster_name,
4932       "master": cluster.master_node,
4933       "default_hypervisor": cluster.enabled_hypervisors[0],
4934       "enabled_hypervisors": cluster.enabled_hypervisors,
4935       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4936                         for hypervisor_name in cluster.enabled_hypervisors]),
4937       "os_hvp": os_hvp,
4938       "beparams": cluster.beparams,
4939       "osparams": cluster.osparams,
4940       "nicparams": cluster.nicparams,
4941       "ndparams": cluster.ndparams,
4942       "candidate_pool_size": cluster.candidate_pool_size,
4943       "master_netdev": cluster.master_netdev,
4944       "volume_group_name": cluster.volume_group_name,
4945       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4946       "file_storage_dir": cluster.file_storage_dir,
4947       "shared_file_storage_dir": cluster.shared_file_storage_dir,
4948       "maintain_node_health": cluster.maintain_node_health,
4949       "ctime": cluster.ctime,
4950       "mtime": cluster.mtime,
4951       "uuid": cluster.uuid,
4952       "tags": list(cluster.GetTags()),
4953       "uid_pool": cluster.uid_pool,
4954       "default_iallocator": cluster.default_iallocator,
4955       "reserved_lvs": cluster.reserved_lvs,
4956       "primary_ip_version": primary_ip_version,
4957       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4958       "hidden_os": cluster.hidden_os,
4959       "blacklisted_os": cluster.blacklisted_os,
4960       }
4961
4962     return result
4963
4964
4965 class LUClusterConfigQuery(NoHooksLU):
4966   """Return configuration values.
4967
4968   """
4969   REQ_BGL = False
4970   _FIELDS_DYNAMIC = utils.FieldSet()
4971   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4972                                   "watcher_pause", "volume_group_name")
4973
4974   def CheckArguments(self):
4975     _CheckOutputFields(static=self._FIELDS_STATIC,
4976                        dynamic=self._FIELDS_DYNAMIC,
4977                        selected=self.op.output_fields)
4978
4979   def ExpandNames(self):
4980     self.needed_locks = {}
4981
4982   def Exec(self, feedback_fn):
4983     """Dump a representation of the cluster config to the standard output.
4984
4985     """
4986     values = []
4987     for field in self.op.output_fields:
4988       if field == "cluster_name":
4989         entry = self.cfg.GetClusterName()
4990       elif field == "master_node":
4991         entry = self.cfg.GetMasterNode()
4992       elif field == "drain_flag":
4993         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4994       elif field == "watcher_pause":
4995         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4996       elif field == "volume_group_name":
4997         entry = self.cfg.GetVGName()
4998       else:
4999         raise errors.ParameterError(field)
5000       values.append(entry)
5001     return values
5002
5003
5004 class LUInstanceActivateDisks(NoHooksLU):
5005   """Bring up an instance's disks.
5006
5007   """
5008   REQ_BGL = False
5009
5010   def ExpandNames(self):
5011     self._ExpandAndLockInstance()
5012     self.needed_locks[locking.LEVEL_NODE] = []
5013     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5014
5015   def DeclareLocks(self, level):
5016     if level == locking.LEVEL_NODE:
5017       self._LockInstancesNodes()
5018
5019   def CheckPrereq(self):
5020     """Check prerequisites.
5021
5022     This checks that the instance is in the cluster.
5023
5024     """
5025     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5026     assert self.instance is not None, \
5027       "Cannot retrieve locked instance %s" % self.op.instance_name
5028     _CheckNodeOnline(self, self.instance.primary_node)
5029
5030   def Exec(self, feedback_fn):
5031     """Activate the disks.
5032
5033     """
5034     disks_ok, disks_info = \
5035               _AssembleInstanceDisks(self, self.instance,
5036                                      ignore_size=self.op.ignore_size)
5037     if not disks_ok:
5038       raise errors.OpExecError("Cannot activate block devices")
5039
5040     return disks_info
5041
5042
5043 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5044                            ignore_size=False):
5045   """Prepare the block devices for an instance.
5046
5047   This sets up the block devices on all nodes.
5048
5049   @type lu: L{LogicalUnit}
5050   @param lu: the logical unit on whose behalf we execute
5051   @type instance: L{objects.Instance}
5052   @param instance: the instance for whose disks we assemble
5053   @type disks: list of L{objects.Disk} or None
5054   @param disks: which disks to assemble (or all, if None)
5055   @type ignore_secondaries: boolean
5056   @param ignore_secondaries: if true, errors on secondary nodes
5057       won't result in an error return from the function
5058   @type ignore_size: boolean
5059   @param ignore_size: if true, the current known size of the disk
5060       will not be used during the disk activation, useful for cases
5061       when the size is wrong
5062   @return: False if the operation failed, otherwise a list of
5063       (host, instance_visible_name, node_visible_name)
5064       with the mapping from node devices to instance devices
5065
5066   """
5067   device_info = []
5068   disks_ok = True
5069   iname = instance.name
5070   disks = _ExpandCheckDisks(instance, disks)
5071
5072   # With the two passes mechanism we try to reduce the window of
5073   # opportunity for the race condition of switching DRBD to primary
5074   # before handshaking occured, but we do not eliminate it
5075
5076   # The proper fix would be to wait (with some limits) until the
5077   # connection has been made and drbd transitions from WFConnection
5078   # into any other network-connected state (Connected, SyncTarget,
5079   # SyncSource, etc.)
5080
5081   # 1st pass, assemble on all nodes in secondary mode
5082   for idx, inst_disk in enumerate(disks):
5083     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5084       if ignore_size:
5085         node_disk = node_disk.Copy()
5086         node_disk.UnsetSize()
5087       lu.cfg.SetDiskID(node_disk, node)
5088       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5089       msg = result.fail_msg
5090       if msg:
5091         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5092                            " (is_primary=False, pass=1): %s",
5093                            inst_disk.iv_name, node, msg)
5094         if not ignore_secondaries:
5095           disks_ok = False
5096
5097   # FIXME: race condition on drbd migration to primary
5098
5099   # 2nd pass, do only the primary node
5100   for idx, inst_disk in enumerate(disks):
5101     dev_path = None
5102
5103     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5104       if node != instance.primary_node:
5105         continue
5106       if ignore_size:
5107         node_disk = node_disk.Copy()
5108         node_disk.UnsetSize()
5109       lu.cfg.SetDiskID(node_disk, node)
5110       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5111       msg = result.fail_msg
5112       if msg:
5113         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5114                            " (is_primary=True, pass=2): %s",
5115                            inst_disk.iv_name, node, msg)
5116         disks_ok = False
5117       else:
5118         dev_path = result.payload
5119
5120     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5121
5122   # leave the disks configured for the primary node
5123   # this is a workaround that would be fixed better by
5124   # improving the logical/physical id handling
5125   for disk in disks:
5126     lu.cfg.SetDiskID(disk, instance.primary_node)
5127
5128   return disks_ok, device_info
5129
5130
5131 def _StartInstanceDisks(lu, instance, force):
5132   """Start the disks of an instance.
5133
5134   """
5135   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5136                                            ignore_secondaries=force)
5137   if not disks_ok:
5138     _ShutdownInstanceDisks(lu, instance)
5139     if force is not None and not force:
5140       lu.proc.LogWarning("", hint="If the message above refers to a"
5141                          " secondary node,"
5142                          " you can retry the operation using '--force'.")
5143     raise errors.OpExecError("Disk consistency error")
5144
5145
5146 class LUInstanceDeactivateDisks(NoHooksLU):
5147   """Shutdown an instance's disks.
5148
5149   """
5150   REQ_BGL = False
5151
5152   def ExpandNames(self):
5153     self._ExpandAndLockInstance()
5154     self.needed_locks[locking.LEVEL_NODE] = []
5155     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5156
5157   def DeclareLocks(self, level):
5158     if level == locking.LEVEL_NODE:
5159       self._LockInstancesNodes()
5160
5161   def CheckPrereq(self):
5162     """Check prerequisites.
5163
5164     This checks that the instance is in the cluster.
5165
5166     """
5167     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5168     assert self.instance is not None, \
5169       "Cannot retrieve locked instance %s" % self.op.instance_name
5170
5171   def Exec(self, feedback_fn):
5172     """Deactivate the disks
5173
5174     """
5175     instance = self.instance
5176     if self.op.force:
5177       _ShutdownInstanceDisks(self, instance)
5178     else:
5179       _SafeShutdownInstanceDisks(self, instance)
5180
5181
5182 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5183   """Shutdown block devices of an instance.
5184
5185   This function checks if an instance is running, before calling
5186   _ShutdownInstanceDisks.
5187
5188   """
5189   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5190   _ShutdownInstanceDisks(lu, instance, disks=disks)
5191
5192
5193 def _ExpandCheckDisks(instance, disks):
5194   """Return the instance disks selected by the disks list
5195
5196   @type disks: list of L{objects.Disk} or None
5197   @param disks: selected disks
5198   @rtype: list of L{objects.Disk}
5199   @return: selected instance disks to act on
5200
5201   """
5202   if disks is None:
5203     return instance.disks
5204   else:
5205     if not set(disks).issubset(instance.disks):
5206       raise errors.ProgrammerError("Can only act on disks belonging to the"
5207                                    " target instance")
5208     return disks
5209
5210
5211 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5212   """Shutdown block devices of an instance.
5213
5214   This does the shutdown on all nodes of the instance.
5215
5216   If the ignore_primary is false, errors on the primary node are
5217   ignored.
5218
5219   """
5220   all_result = True
5221   disks = _ExpandCheckDisks(instance, disks)
5222
5223   for disk in disks:
5224     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5225       lu.cfg.SetDiskID(top_disk, node)
5226       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5227       msg = result.fail_msg
5228       if msg:
5229         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5230                       disk.iv_name, node, msg)
5231         if ((node == instance.primary_node and not ignore_primary) or
5232             (node != instance.primary_node and not result.offline)):
5233           all_result = False
5234   return all_result
5235
5236
5237 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5238   """Checks if a node has enough free memory.
5239
5240   This function check if a given node has the needed amount of free
5241   memory. In case the node has less memory or we cannot get the
5242   information from the node, this function raise an OpPrereqError
5243   exception.
5244
5245   @type lu: C{LogicalUnit}
5246   @param lu: a logical unit from which we get configuration data
5247   @type node: C{str}
5248   @param node: the node to check
5249   @type reason: C{str}
5250   @param reason: string to use in the error message
5251   @type requested: C{int}
5252   @param requested: the amount of memory in MiB to check for
5253   @type hypervisor_name: C{str}
5254   @param hypervisor_name: the hypervisor to ask for memory stats
5255   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5256       we cannot check the node
5257
5258   """
5259   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5260   nodeinfo[node].Raise("Can't get data from node %s" % node,
5261                        prereq=True, ecode=errors.ECODE_ENVIRON)
5262   free_mem = nodeinfo[node].payload.get('memory_free', None)
5263   if not isinstance(free_mem, int):
5264     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5265                                " was '%s'" % (node, free_mem),
5266                                errors.ECODE_ENVIRON)
5267   if requested > free_mem:
5268     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5269                                " needed %s MiB, available %s MiB" %
5270                                (node, reason, requested, free_mem),
5271                                errors.ECODE_NORES)
5272
5273
5274 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5275   """Checks if nodes have enough free disk space in the all VGs.
5276
5277   This function check if all given nodes have the needed amount of
5278   free disk. In case any node has less disk or we cannot get the
5279   information from the node, this function raise an OpPrereqError
5280   exception.
5281
5282   @type lu: C{LogicalUnit}
5283   @param lu: a logical unit from which we get configuration data
5284   @type nodenames: C{list}
5285   @param nodenames: the list of node names to check
5286   @type req_sizes: C{dict}
5287   @param req_sizes: the hash of vg and corresponding amount of disk in
5288       MiB to check for
5289   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5290       or we cannot check the node
5291
5292   """
5293   for vg, req_size in req_sizes.items():
5294     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5295
5296
5297 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5298   """Checks if nodes have enough free disk space in the specified VG.
5299
5300   This function check if all given nodes have the needed amount of
5301   free disk. In case any node has less disk or we cannot get the
5302   information from the node, this function raise an OpPrereqError
5303   exception.
5304
5305   @type lu: C{LogicalUnit}
5306   @param lu: a logical unit from which we get configuration data
5307   @type nodenames: C{list}
5308   @param nodenames: the list of node names to check
5309   @type vg: C{str}
5310   @param vg: the volume group to check
5311   @type requested: C{int}
5312   @param requested: the amount of disk in MiB to check for
5313   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5314       or we cannot check the node
5315
5316   """
5317   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5318   for node in nodenames:
5319     info = nodeinfo[node]
5320     info.Raise("Cannot get current information from node %s" % node,
5321                prereq=True, ecode=errors.ECODE_ENVIRON)
5322     vg_free = info.payload.get("vg_free", None)
5323     if not isinstance(vg_free, int):
5324       raise errors.OpPrereqError("Can't compute free disk space on node"
5325                                  " %s for vg %s, result was '%s'" %
5326                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5327     if requested > vg_free:
5328       raise errors.OpPrereqError("Not enough disk space on target node %s"
5329                                  " vg %s: required %d MiB, available %d MiB" %
5330                                  (node, vg, requested, vg_free),
5331                                  errors.ECODE_NORES)
5332
5333
5334 class LUInstanceStartup(LogicalUnit):
5335   """Starts an instance.
5336
5337   """
5338   HPATH = "instance-start"
5339   HTYPE = constants.HTYPE_INSTANCE
5340   REQ_BGL = False
5341
5342   def CheckArguments(self):
5343     # extra beparams
5344     if self.op.beparams:
5345       # fill the beparams dict
5346       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5347
5348   def ExpandNames(self):
5349     self._ExpandAndLockInstance()
5350
5351   def BuildHooksEnv(self):
5352     """Build hooks env.
5353
5354     This runs on master, primary and secondary nodes of the instance.
5355
5356     """
5357     env = {
5358       "FORCE": self.op.force,
5359       }
5360
5361     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5362
5363     return env
5364
5365   def BuildHooksNodes(self):
5366     """Build hooks nodes.
5367
5368     """
5369     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5370     return (nl, nl)
5371
5372   def CheckPrereq(self):
5373     """Check prerequisites.
5374
5375     This checks that the instance is in the cluster.
5376
5377     """
5378     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5379     assert self.instance is not None, \
5380       "Cannot retrieve locked instance %s" % self.op.instance_name
5381
5382     # extra hvparams
5383     if self.op.hvparams:
5384       # check hypervisor parameter syntax (locally)
5385       cluster = self.cfg.GetClusterInfo()
5386       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5387       filled_hvp = cluster.FillHV(instance)
5388       filled_hvp.update(self.op.hvparams)
5389       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5390       hv_type.CheckParameterSyntax(filled_hvp)
5391       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5392
5393     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5394
5395     if self.primary_offline and self.op.ignore_offline_nodes:
5396       self.proc.LogWarning("Ignoring offline primary node")
5397
5398       if self.op.hvparams or self.op.beparams:
5399         self.proc.LogWarning("Overridden parameters are ignored")
5400     else:
5401       _CheckNodeOnline(self, instance.primary_node)
5402
5403       bep = self.cfg.GetClusterInfo().FillBE(instance)
5404
5405       # check bridges existence
5406       _CheckInstanceBridgesExist(self, instance)
5407
5408       remote_info = self.rpc.call_instance_info(instance.primary_node,
5409                                                 instance.name,
5410                                                 instance.hypervisor)
5411       remote_info.Raise("Error checking node %s" % instance.primary_node,
5412                         prereq=True, ecode=errors.ECODE_ENVIRON)
5413       if not remote_info.payload: # not running already
5414         _CheckNodeFreeMemory(self, instance.primary_node,
5415                              "starting instance %s" % instance.name,
5416                              bep[constants.BE_MEMORY], instance.hypervisor)
5417
5418   def Exec(self, feedback_fn):
5419     """Start the instance.
5420
5421     """
5422     instance = self.instance
5423     force = self.op.force
5424
5425     self.cfg.MarkInstanceUp(instance.name)
5426
5427     if self.primary_offline:
5428       assert self.op.ignore_offline_nodes
5429       self.proc.LogInfo("Primary node offline, marked instance as started")
5430     else:
5431       node_current = instance.primary_node
5432
5433       _StartInstanceDisks(self, instance, force)
5434
5435       result = self.rpc.call_instance_start(node_current, instance,
5436                                             self.op.hvparams, self.op.beparams)
5437       msg = result.fail_msg
5438       if msg:
5439         _ShutdownInstanceDisks(self, instance)
5440         raise errors.OpExecError("Could not start instance: %s" % msg)
5441
5442
5443 class LUInstanceReboot(LogicalUnit):
5444   """Reboot an instance.
5445
5446   """
5447   HPATH = "instance-reboot"
5448   HTYPE = constants.HTYPE_INSTANCE
5449   REQ_BGL = False
5450
5451   def ExpandNames(self):
5452     self._ExpandAndLockInstance()
5453
5454   def BuildHooksEnv(self):
5455     """Build hooks env.
5456
5457     This runs on master, primary and secondary nodes of the instance.
5458
5459     """
5460     env = {
5461       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5462       "REBOOT_TYPE": self.op.reboot_type,
5463       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5464       }
5465
5466     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5467
5468     return env
5469
5470   def BuildHooksNodes(self):
5471     """Build hooks nodes.
5472
5473     """
5474     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5475     return (nl, nl)
5476
5477   def CheckPrereq(self):
5478     """Check prerequisites.
5479
5480     This checks that the instance is in the cluster.
5481
5482     """
5483     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5484     assert self.instance is not None, \
5485       "Cannot retrieve locked instance %s" % self.op.instance_name
5486
5487     _CheckNodeOnline(self, instance.primary_node)
5488
5489     # check bridges existence
5490     _CheckInstanceBridgesExist(self, instance)
5491
5492   def Exec(self, feedback_fn):
5493     """Reboot the instance.
5494
5495     """
5496     instance = self.instance
5497     ignore_secondaries = self.op.ignore_secondaries
5498     reboot_type = self.op.reboot_type
5499
5500     remote_info = self.rpc.call_instance_info(instance.primary_node,
5501                                               instance.name,
5502                                               instance.hypervisor)
5503     remote_info.Raise("Error checking node %s" % instance.primary_node)
5504     instance_running = bool(remote_info.payload)
5505
5506     node_current = instance.primary_node
5507
5508     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5509                                             constants.INSTANCE_REBOOT_HARD]:
5510       for disk in instance.disks:
5511         self.cfg.SetDiskID(disk, node_current)
5512       result = self.rpc.call_instance_reboot(node_current, instance,
5513                                              reboot_type,
5514                                              self.op.shutdown_timeout)
5515       result.Raise("Could not reboot instance")
5516     else:
5517       if instance_running:
5518         result = self.rpc.call_instance_shutdown(node_current, instance,
5519                                                  self.op.shutdown_timeout)
5520         result.Raise("Could not shutdown instance for full reboot")
5521         _ShutdownInstanceDisks(self, instance)
5522       else:
5523         self.LogInfo("Instance %s was already stopped, starting now",
5524                      instance.name)
5525       _StartInstanceDisks(self, instance, ignore_secondaries)
5526       result = self.rpc.call_instance_start(node_current, instance, None, None)
5527       msg = result.fail_msg
5528       if msg:
5529         _ShutdownInstanceDisks(self, instance)
5530         raise errors.OpExecError("Could not start instance for"
5531                                  " full reboot: %s" % msg)
5532
5533     self.cfg.MarkInstanceUp(instance.name)
5534
5535
5536 class LUInstanceShutdown(LogicalUnit):
5537   """Shutdown an instance.
5538
5539   """
5540   HPATH = "instance-stop"
5541   HTYPE = constants.HTYPE_INSTANCE
5542   REQ_BGL = False
5543
5544   def ExpandNames(self):
5545     self._ExpandAndLockInstance()
5546
5547   def BuildHooksEnv(self):
5548     """Build hooks env.
5549
5550     This runs on master, primary and secondary nodes of the instance.
5551
5552     """
5553     env = _BuildInstanceHookEnvByObject(self, self.instance)
5554     env["TIMEOUT"] = self.op.timeout
5555     return env
5556
5557   def BuildHooksNodes(self):
5558     """Build hooks nodes.
5559
5560     """
5561     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5562     return (nl, nl)
5563
5564   def CheckPrereq(self):
5565     """Check prerequisites.
5566
5567     This checks that the instance is in the cluster.
5568
5569     """
5570     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5571     assert self.instance is not None, \
5572       "Cannot retrieve locked instance %s" % self.op.instance_name
5573
5574     self.primary_offline = \
5575       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5576
5577     if self.primary_offline and self.op.ignore_offline_nodes:
5578       self.proc.LogWarning("Ignoring offline primary node")
5579     else:
5580       _CheckNodeOnline(self, self.instance.primary_node)
5581
5582   def Exec(self, feedback_fn):
5583     """Shutdown the instance.
5584
5585     """
5586     instance = self.instance
5587     node_current = instance.primary_node
5588     timeout = self.op.timeout
5589
5590     self.cfg.MarkInstanceDown(instance.name)
5591
5592     if self.primary_offline:
5593       assert self.op.ignore_offline_nodes
5594       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5595     else:
5596       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5597       msg = result.fail_msg
5598       if msg:
5599         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5600
5601       _ShutdownInstanceDisks(self, instance)
5602
5603
5604 class LUInstanceReinstall(LogicalUnit):
5605   """Reinstall an instance.
5606
5607   """
5608   HPATH = "instance-reinstall"
5609   HTYPE = constants.HTYPE_INSTANCE
5610   REQ_BGL = False
5611
5612   def ExpandNames(self):
5613     self._ExpandAndLockInstance()
5614
5615   def BuildHooksEnv(self):
5616     """Build hooks env.
5617
5618     This runs on master, primary and secondary nodes of the instance.
5619
5620     """
5621     return _BuildInstanceHookEnvByObject(self, self.instance)
5622
5623   def BuildHooksNodes(self):
5624     """Build hooks nodes.
5625
5626     """
5627     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5628     return (nl, nl)
5629
5630   def CheckPrereq(self):
5631     """Check prerequisites.
5632
5633     This checks that the instance is in the cluster and is not running.
5634
5635     """
5636     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5637     assert instance is not None, \
5638       "Cannot retrieve locked instance %s" % self.op.instance_name
5639     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5640                      " offline, cannot reinstall")
5641     for node in instance.secondary_nodes:
5642       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5643                        " cannot reinstall")
5644
5645     if instance.disk_template == constants.DT_DISKLESS:
5646       raise errors.OpPrereqError("Instance '%s' has no disks" %
5647                                  self.op.instance_name,
5648                                  errors.ECODE_INVAL)
5649     _CheckInstanceDown(self, instance, "cannot reinstall")
5650
5651     if self.op.os_type is not None:
5652       # OS verification
5653       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5654       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5655       instance_os = self.op.os_type
5656     else:
5657       instance_os = instance.os
5658
5659     nodelist = list(instance.all_nodes)
5660
5661     if self.op.osparams:
5662       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5663       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5664       self.os_inst = i_osdict # the new dict (without defaults)
5665     else:
5666       self.os_inst = None
5667
5668     self.instance = instance
5669
5670   def Exec(self, feedback_fn):
5671     """Reinstall the instance.
5672
5673     """
5674     inst = self.instance
5675
5676     if self.op.os_type is not None:
5677       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5678       inst.os = self.op.os_type
5679       # Write to configuration
5680       self.cfg.Update(inst, feedback_fn)
5681
5682     _StartInstanceDisks(self, inst, None)
5683     try:
5684       feedback_fn("Running the instance OS create scripts...")
5685       # FIXME: pass debug option from opcode to backend
5686       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5687                                              self.op.debug_level,
5688                                              osparams=self.os_inst)
5689       result.Raise("Could not install OS for instance %s on node %s" %
5690                    (inst.name, inst.primary_node))
5691     finally:
5692       _ShutdownInstanceDisks(self, inst)
5693
5694
5695 class LUInstanceRecreateDisks(LogicalUnit):
5696   """Recreate an instance's missing disks.
5697
5698   """
5699   HPATH = "instance-recreate-disks"
5700   HTYPE = constants.HTYPE_INSTANCE
5701   REQ_BGL = False
5702
5703   def ExpandNames(self):
5704     self._ExpandAndLockInstance()
5705
5706   def BuildHooksEnv(self):
5707     """Build hooks env.
5708
5709     This runs on master, primary and secondary nodes of the instance.
5710
5711     """
5712     return _BuildInstanceHookEnvByObject(self, self.instance)
5713
5714   def BuildHooksNodes(self):
5715     """Build hooks nodes.
5716
5717     """
5718     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5719     return (nl, nl)
5720
5721   def CheckPrereq(self):
5722     """Check prerequisites.
5723
5724     This checks that the instance is in the cluster and is not running.
5725
5726     """
5727     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5728     assert instance is not None, \
5729       "Cannot retrieve locked instance %s" % self.op.instance_name
5730     _CheckNodeOnline(self, instance.primary_node)
5731
5732     if instance.disk_template == constants.DT_DISKLESS:
5733       raise errors.OpPrereqError("Instance '%s' has no disks" %
5734                                  self.op.instance_name, errors.ECODE_INVAL)
5735     _CheckInstanceDown(self, instance, "cannot recreate disks")
5736
5737     if not self.op.disks:
5738       self.op.disks = range(len(instance.disks))
5739     else:
5740       for idx in self.op.disks:
5741         if idx >= len(instance.disks):
5742           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
5743                                      errors.ECODE_INVAL)
5744
5745     self.instance = instance
5746
5747   def Exec(self, feedback_fn):
5748     """Recreate the disks.
5749
5750     """
5751     to_skip = []
5752     for idx, _ in enumerate(self.instance.disks):
5753       if idx not in self.op.disks: # disk idx has not been passed in
5754         to_skip.append(idx)
5755         continue
5756
5757     _CreateDisks(self, self.instance, to_skip=to_skip)
5758
5759
5760 class LUInstanceRename(LogicalUnit):
5761   """Rename an instance.
5762
5763   """
5764   HPATH = "instance-rename"
5765   HTYPE = constants.HTYPE_INSTANCE
5766
5767   def CheckArguments(self):
5768     """Check arguments.
5769
5770     """
5771     if self.op.ip_check and not self.op.name_check:
5772       # TODO: make the ip check more flexible and not depend on the name check
5773       raise errors.OpPrereqError("IP address check requires a name check",
5774                                  errors.ECODE_INVAL)
5775
5776   def BuildHooksEnv(self):
5777     """Build hooks env.
5778
5779     This runs on master, primary and secondary nodes of the instance.
5780
5781     """
5782     env = _BuildInstanceHookEnvByObject(self, self.instance)
5783     env["INSTANCE_NEW_NAME"] = self.op.new_name
5784     return env
5785
5786   def BuildHooksNodes(self):
5787     """Build hooks nodes.
5788
5789     """
5790     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5791     return (nl, nl)
5792
5793   def CheckPrereq(self):
5794     """Check prerequisites.
5795
5796     This checks that the instance is in the cluster and is not running.
5797
5798     """
5799     self.op.instance_name = _ExpandInstanceName(self.cfg,
5800                                                 self.op.instance_name)
5801     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5802     assert instance is not None
5803     _CheckNodeOnline(self, instance.primary_node)
5804     _CheckInstanceDown(self, instance, "cannot rename")
5805     self.instance = instance
5806
5807     new_name = self.op.new_name
5808     if self.op.name_check:
5809       hostname = netutils.GetHostname(name=new_name)
5810       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5811                    hostname.name)
5812       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5813         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5814                                     " same as given hostname '%s'") %
5815                                     (hostname.name, self.op.new_name),
5816                                     errors.ECODE_INVAL)
5817       new_name = self.op.new_name = hostname.name
5818       if (self.op.ip_check and
5819           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5820         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5821                                    (hostname.ip, new_name),
5822                                    errors.ECODE_NOTUNIQUE)
5823
5824     instance_list = self.cfg.GetInstanceList()
5825     if new_name in instance_list and new_name != instance.name:
5826       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5827                                  new_name, errors.ECODE_EXISTS)
5828
5829   def Exec(self, feedback_fn):
5830     """Rename the instance.
5831
5832     """
5833     inst = self.instance
5834     old_name = inst.name
5835
5836     rename_file_storage = False
5837     if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5838         self.op.new_name != inst.name):
5839       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5840       rename_file_storage = True
5841
5842     self.cfg.RenameInstance(inst.name, self.op.new_name)
5843     # Change the instance lock. This is definitely safe while we hold the BGL.
5844     # Otherwise the new lock would have to be added in acquired mode.
5845     assert self.REQ_BGL
5846     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5847     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5848
5849     # re-read the instance from the configuration after rename
5850     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5851
5852     if rename_file_storage:
5853       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5854       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5855                                                      old_file_storage_dir,
5856                                                      new_file_storage_dir)
5857       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5858                    " (but the instance has been renamed in Ganeti)" %
5859                    (inst.primary_node, old_file_storage_dir,
5860                     new_file_storage_dir))
5861
5862     _StartInstanceDisks(self, inst, None)
5863     try:
5864       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5865                                                  old_name, self.op.debug_level)
5866       msg = result.fail_msg
5867       if msg:
5868         msg = ("Could not run OS rename script for instance %s on node %s"
5869                " (but the instance has been renamed in Ganeti): %s" %
5870                (inst.name, inst.primary_node, msg))
5871         self.proc.LogWarning(msg)
5872     finally:
5873       _ShutdownInstanceDisks(self, inst)
5874
5875     return inst.name
5876
5877
5878 class LUInstanceRemove(LogicalUnit):
5879   """Remove an instance.
5880
5881   """
5882   HPATH = "instance-remove"
5883   HTYPE = constants.HTYPE_INSTANCE
5884   REQ_BGL = False
5885
5886   def ExpandNames(self):
5887     self._ExpandAndLockInstance()
5888     self.needed_locks[locking.LEVEL_NODE] = []
5889     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5890
5891   def DeclareLocks(self, level):
5892     if level == locking.LEVEL_NODE:
5893       self._LockInstancesNodes()
5894
5895   def BuildHooksEnv(self):
5896     """Build hooks env.
5897
5898     This runs on master, primary and secondary nodes of the instance.
5899
5900     """
5901     env = _BuildInstanceHookEnvByObject(self, self.instance)
5902     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5903     return env
5904
5905   def BuildHooksNodes(self):
5906     """Build hooks nodes.
5907
5908     """
5909     nl = [self.cfg.GetMasterNode()]
5910     nl_post = list(self.instance.all_nodes) + nl
5911     return (nl, nl_post)
5912
5913   def CheckPrereq(self):
5914     """Check prerequisites.
5915
5916     This checks that the instance is in the cluster.
5917
5918     """
5919     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5920     assert self.instance is not None, \
5921       "Cannot retrieve locked instance %s" % self.op.instance_name
5922
5923   def Exec(self, feedback_fn):
5924     """Remove the instance.
5925
5926     """
5927     instance = self.instance
5928     logging.info("Shutting down instance %s on node %s",
5929                  instance.name, instance.primary_node)
5930
5931     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5932                                              self.op.shutdown_timeout)
5933     msg = result.fail_msg
5934     if msg:
5935       if self.op.ignore_failures:
5936         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5937       else:
5938         raise errors.OpExecError("Could not shutdown instance %s on"
5939                                  " node %s: %s" %
5940                                  (instance.name, instance.primary_node, msg))
5941
5942     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5943
5944
5945 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5946   """Utility function to remove an instance.
5947
5948   """
5949   logging.info("Removing block devices for instance %s", instance.name)
5950
5951   if not _RemoveDisks(lu, instance):
5952     if not ignore_failures:
5953       raise errors.OpExecError("Can't remove instance's disks")
5954     feedback_fn("Warning: can't remove instance's disks")
5955
5956   logging.info("Removing instance %s out of cluster config", instance.name)
5957
5958   lu.cfg.RemoveInstance(instance.name)
5959
5960   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5961     "Instance lock removal conflict"
5962
5963   # Remove lock for the instance
5964   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5965
5966
5967 class LUInstanceQuery(NoHooksLU):
5968   """Logical unit for querying instances.
5969
5970   """
5971   # pylint: disable-msg=W0142
5972   REQ_BGL = False
5973
5974   def CheckArguments(self):
5975     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5976                              self.op.output_fields, self.op.use_locking)
5977
5978   def ExpandNames(self):
5979     self.iq.ExpandNames(self)
5980
5981   def DeclareLocks(self, level):
5982     self.iq.DeclareLocks(self, level)
5983
5984   def Exec(self, feedback_fn):
5985     return self.iq.OldStyleQuery(self)
5986
5987
5988 class LUInstanceFailover(LogicalUnit):
5989   """Failover an instance.
5990
5991   """
5992   HPATH = "instance-failover"
5993   HTYPE = constants.HTYPE_INSTANCE
5994   REQ_BGL = False
5995
5996   def CheckArguments(self):
5997     """Check the arguments.
5998
5999     """
6000     self.iallocator = getattr(self.op, "iallocator", None)
6001     self.target_node = getattr(self.op, "target_node", None)
6002
6003   def ExpandNames(self):
6004     self._ExpandAndLockInstance()
6005
6006     if self.op.target_node is not None:
6007       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6008
6009     self.needed_locks[locking.LEVEL_NODE] = []
6010     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6011
6012     ignore_consistency = self.op.ignore_consistency
6013     shutdown_timeout = self.op.shutdown_timeout
6014     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6015                                        cleanup=False,
6016                                        iallocator=self.op.iallocator,
6017                                        target_node=self.op.target_node,
6018                                        failover=True,
6019                                        ignore_consistency=ignore_consistency,
6020                                        shutdown_timeout=shutdown_timeout)
6021     self.tasklets = [self._migrater]
6022
6023   def DeclareLocks(self, level):
6024     if level == locking.LEVEL_NODE:
6025       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6026       if instance.disk_template in constants.DTS_EXT_MIRROR:
6027         if self.op.target_node is None:
6028           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6029         else:
6030           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6031                                                    self.op.target_node]
6032         del self.recalculate_locks[locking.LEVEL_NODE]
6033       else:
6034         self._LockInstancesNodes()
6035
6036   def BuildHooksEnv(self):
6037     """Build hooks env.
6038
6039     This runs on master, primary and secondary nodes of the instance.
6040
6041     """
6042     instance = self._migrater.instance
6043     source_node = instance.primary_node
6044     target_node = self._migrater.target_node
6045     env = {
6046       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6047       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6048       "OLD_PRIMARY": source_node,
6049       "NEW_PRIMARY": target_node,
6050       }
6051
6052     if instance.disk_template in constants.DTS_INT_MIRROR:
6053       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6054       env["NEW_SECONDARY"] = source_node
6055     else:
6056       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6057
6058     env.update(_BuildInstanceHookEnvByObject(self, instance))
6059
6060     return env
6061
6062   def BuildHooksNodes(self):
6063     """Build hooks nodes.
6064
6065     """
6066     instance = self._migrater.instance
6067     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6068     return (nl, nl + [instance.primary_node])
6069
6070
6071 class LUInstanceMigrate(LogicalUnit):
6072   """Migrate an instance.
6073
6074   This is migration without shutting down, compared to the failover,
6075   which is done with shutdown.
6076
6077   """
6078   HPATH = "instance-migrate"
6079   HTYPE = constants.HTYPE_INSTANCE
6080   REQ_BGL = False
6081
6082   def ExpandNames(self):
6083     self._ExpandAndLockInstance()
6084
6085     if self.op.target_node is not None:
6086       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6087
6088     self.needed_locks[locking.LEVEL_NODE] = []
6089     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6090
6091     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6092                                        cleanup=self.op.cleanup,
6093                                        failover=False,
6094                                        fallback=self.op.allow_failover)
6095     self.tasklets = [self._migrater]
6096
6097   def DeclareLocks(self, level):
6098     if level == locking.LEVEL_NODE:
6099       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6100       if instance.disk_template in constants.DTS_EXT_MIRROR:
6101         if self.op.target_node is None:
6102           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6103         else:
6104           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6105                                                    self.op.target_node]
6106         del self.recalculate_locks[locking.LEVEL_NODE]
6107       else:
6108         self._LockInstancesNodes()
6109
6110   def BuildHooksEnv(self):
6111     """Build hooks env.
6112
6113     This runs on master, primary and secondary nodes of the instance.
6114
6115     """
6116     instance = self._migrater.instance
6117     source_node = instance.primary_node
6118     target_node = self._migrater.target_node
6119     env = _BuildInstanceHookEnvByObject(self, instance)
6120     env.update({
6121       "MIGRATE_LIVE": self._migrater.live,
6122       "MIGRATE_CLEANUP": self.op.cleanup,
6123       "OLD_PRIMARY": source_node,
6124       "NEW_PRIMARY": target_node,
6125       })
6126
6127     if instance.disk_template in constants.DTS_INT_MIRROR:
6128       env["OLD_SECONDARY"] = target_node
6129       env["NEW_SECONDARY"] = source_node
6130     else:
6131       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6132
6133     return env
6134
6135   def BuildHooksNodes(self):
6136     """Build hooks nodes.
6137
6138     """
6139     instance = self._migrater.instance
6140     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6141     return (nl, nl + [instance.primary_node])
6142
6143
6144 class LUInstanceMove(LogicalUnit):
6145   """Move an instance by data-copying.
6146
6147   """
6148   HPATH = "instance-move"
6149   HTYPE = constants.HTYPE_INSTANCE
6150   REQ_BGL = False
6151
6152   def ExpandNames(self):
6153     self._ExpandAndLockInstance()
6154     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6155     self.op.target_node = target_node
6156     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6157     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6158
6159   def DeclareLocks(self, level):
6160     if level == locking.LEVEL_NODE:
6161       self._LockInstancesNodes(primary_only=True)
6162
6163   def BuildHooksEnv(self):
6164     """Build hooks env.
6165
6166     This runs on master, primary and secondary nodes of the instance.
6167
6168     """
6169     env = {
6170       "TARGET_NODE": self.op.target_node,
6171       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6172       }
6173     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6174     return env
6175
6176   def BuildHooksNodes(self):
6177     """Build hooks nodes.
6178
6179     """
6180     nl = [
6181       self.cfg.GetMasterNode(),
6182       self.instance.primary_node,
6183       self.op.target_node,
6184       ]
6185     return (nl, nl)
6186
6187   def CheckPrereq(self):
6188     """Check prerequisites.
6189
6190     This checks that the instance is in the cluster.
6191
6192     """
6193     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6194     assert self.instance is not None, \
6195       "Cannot retrieve locked instance %s" % self.op.instance_name
6196
6197     node = self.cfg.GetNodeInfo(self.op.target_node)
6198     assert node is not None, \
6199       "Cannot retrieve locked node %s" % self.op.target_node
6200
6201     self.target_node = target_node = node.name
6202
6203     if target_node == instance.primary_node:
6204       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6205                                  (instance.name, target_node),
6206                                  errors.ECODE_STATE)
6207
6208     bep = self.cfg.GetClusterInfo().FillBE(instance)
6209
6210     for idx, dsk in enumerate(instance.disks):
6211       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6212         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6213                                    " cannot copy" % idx, errors.ECODE_STATE)
6214
6215     _CheckNodeOnline(self, target_node)
6216     _CheckNodeNotDrained(self, target_node)
6217     _CheckNodeVmCapable(self, target_node)
6218
6219     if instance.admin_up:
6220       # check memory requirements on the secondary node
6221       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6222                            instance.name, bep[constants.BE_MEMORY],
6223                            instance.hypervisor)
6224     else:
6225       self.LogInfo("Not checking memory on the secondary node as"
6226                    " instance will not be started")
6227
6228     # check bridge existance
6229     _CheckInstanceBridgesExist(self, instance, node=target_node)
6230
6231   def Exec(self, feedback_fn):
6232     """Move an instance.
6233
6234     The move is done by shutting it down on its present node, copying
6235     the data over (slow) and starting it on the new node.
6236
6237     """
6238     instance = self.instance
6239
6240     source_node = instance.primary_node
6241     target_node = self.target_node
6242
6243     self.LogInfo("Shutting down instance %s on source node %s",
6244                  instance.name, source_node)
6245
6246     result = self.rpc.call_instance_shutdown(source_node, instance,
6247                                              self.op.shutdown_timeout)
6248     msg = result.fail_msg
6249     if msg:
6250       if self.op.ignore_consistency:
6251         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6252                              " Proceeding anyway. Please make sure node"
6253                              " %s is down. Error details: %s",
6254                              instance.name, source_node, source_node, msg)
6255       else:
6256         raise errors.OpExecError("Could not shutdown instance %s on"
6257                                  " node %s: %s" %
6258                                  (instance.name, source_node, msg))
6259
6260     # create the target disks
6261     try:
6262       _CreateDisks(self, instance, target_node=target_node)
6263     except errors.OpExecError:
6264       self.LogWarning("Device creation failed, reverting...")
6265       try:
6266         _RemoveDisks(self, instance, target_node=target_node)
6267       finally:
6268         self.cfg.ReleaseDRBDMinors(instance.name)
6269         raise
6270
6271     cluster_name = self.cfg.GetClusterInfo().cluster_name
6272
6273     errs = []
6274     # activate, get path, copy the data over
6275     for idx, disk in enumerate(instance.disks):
6276       self.LogInfo("Copying data for disk %d", idx)
6277       result = self.rpc.call_blockdev_assemble(target_node, disk,
6278                                                instance.name, True, idx)
6279       if result.fail_msg:
6280         self.LogWarning("Can't assemble newly created disk %d: %s",
6281                         idx, result.fail_msg)
6282         errs.append(result.fail_msg)
6283         break
6284       dev_path = result.payload
6285       result = self.rpc.call_blockdev_export(source_node, disk,
6286                                              target_node, dev_path,
6287                                              cluster_name)
6288       if result.fail_msg:
6289         self.LogWarning("Can't copy data over for disk %d: %s",
6290                         idx, result.fail_msg)
6291         errs.append(result.fail_msg)
6292         break
6293
6294     if errs:
6295       self.LogWarning("Some disks failed to copy, aborting")
6296       try:
6297         _RemoveDisks(self, instance, target_node=target_node)
6298       finally:
6299         self.cfg.ReleaseDRBDMinors(instance.name)
6300         raise errors.OpExecError("Errors during disk copy: %s" %
6301                                  (",".join(errs),))
6302
6303     instance.primary_node = target_node
6304     self.cfg.Update(instance, feedback_fn)
6305
6306     self.LogInfo("Removing the disks on the original node")
6307     _RemoveDisks(self, instance, target_node=source_node)
6308
6309     # Only start the instance if it's marked as up
6310     if instance.admin_up:
6311       self.LogInfo("Starting instance %s on node %s",
6312                    instance.name, target_node)
6313
6314       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6315                                            ignore_secondaries=True)
6316       if not disks_ok:
6317         _ShutdownInstanceDisks(self, instance)
6318         raise errors.OpExecError("Can't activate the instance's disks")
6319
6320       result = self.rpc.call_instance_start(target_node, instance, None, None)
6321       msg = result.fail_msg
6322       if msg:
6323         _ShutdownInstanceDisks(self, instance)
6324         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6325                                  (instance.name, target_node, msg))
6326
6327
6328 class LUNodeMigrate(LogicalUnit):
6329   """Migrate all instances from a node.
6330
6331   """
6332   HPATH = "node-migrate"
6333   HTYPE = constants.HTYPE_NODE
6334   REQ_BGL = False
6335
6336   def CheckArguments(self):
6337     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6338
6339   def ExpandNames(self):
6340     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6341
6342     self.needed_locks = {}
6343
6344     # Create tasklets for migrating instances for all instances on this node
6345     names = []
6346     tasklets = []
6347
6348     self.lock_all_nodes = False
6349
6350     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6351       logging.debug("Migrating instance %s", inst.name)
6352       names.append(inst.name)
6353
6354       tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False))
6355
6356       if inst.disk_template in constants.DTS_EXT_MIRROR:
6357         # We need to lock all nodes, as the iallocator will choose the
6358         # destination nodes afterwards
6359         self.lock_all_nodes = True
6360
6361     self.tasklets = tasklets
6362
6363     # Declare node locks
6364     if self.lock_all_nodes:
6365       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6366     else:
6367       self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6368       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6369
6370     # Declare instance locks
6371     self.needed_locks[locking.LEVEL_INSTANCE] = names
6372
6373   def DeclareLocks(self, level):
6374     if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6375       self._LockInstancesNodes()
6376
6377   def BuildHooksEnv(self):
6378     """Build hooks env.
6379
6380     This runs on the master, the primary and all the secondaries.
6381
6382     """
6383     return {
6384       "NODE_NAME": self.op.node_name,
6385       }
6386
6387   def BuildHooksNodes(self):
6388     """Build hooks nodes.
6389
6390     """
6391     nl = [self.cfg.GetMasterNode()]
6392     return (nl, nl)
6393
6394
6395 class TLMigrateInstance(Tasklet):
6396   """Tasklet class for instance migration.
6397
6398   @type live: boolean
6399   @ivar live: whether the migration will be done live or non-live;
6400       this variable is initalized only after CheckPrereq has run
6401   @type cleanup: boolean
6402   @ivar cleanup: Wheater we cleanup from a failed migration
6403   @type iallocator: string
6404   @ivar iallocator: The iallocator used to determine target_node
6405   @type target_node: string
6406   @ivar target_node: If given, the target_node to reallocate the instance to
6407   @type failover: boolean
6408   @ivar failover: Whether operation results in failover or migration
6409   @type fallback: boolean
6410   @ivar fallback: Whether fallback to failover is allowed if migration not
6411                   possible
6412   @type ignore_consistency: boolean
6413   @ivar ignore_consistency: Wheter we should ignore consistency between source
6414                             and target node
6415   @type shutdown_timeout: int
6416   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6417
6418   """
6419   def __init__(self, lu, instance_name, cleanup=False,
6420                failover=False, fallback=False,
6421                ignore_consistency=False,
6422                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6423     """Initializes this class.
6424
6425     """
6426     Tasklet.__init__(self, lu)
6427
6428     # Parameters
6429     self.instance_name = instance_name
6430     self.cleanup = cleanup
6431     self.live = False # will be overridden later
6432     self.failover = failover
6433     self.fallback = fallback
6434     self.ignore_consistency = ignore_consistency
6435     self.shutdown_timeout = shutdown_timeout
6436
6437   def CheckPrereq(self):
6438     """Check prerequisites.
6439
6440     This checks that the instance is in the cluster.
6441
6442     """
6443     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6444     instance = self.cfg.GetInstanceInfo(instance_name)
6445     assert instance is not None
6446     self.instance = instance
6447
6448     if (not self.cleanup and not instance.admin_up and not self.failover and
6449         self.fallback):
6450       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6451                       " to failover")
6452       self.failover = True
6453
6454     if instance.disk_template not in constants.DTS_MIRRORED:
6455       if self.failover:
6456         text = "failovers"
6457       else:
6458         text = "migrations"
6459       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6460                                  " %s" % (instance.disk_template, text),
6461                                  errors.ECODE_STATE)
6462
6463     if instance.disk_template in constants.DTS_EXT_MIRROR:
6464       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6465
6466       if self.lu.op.iallocator:
6467         self._RunAllocator()
6468       else:
6469         # We set set self.target_node as it is required by
6470         # BuildHooksEnv
6471         self.target_node = self.lu.op.target_node
6472
6473       # self.target_node is already populated, either directly or by the
6474       # iallocator run
6475       target_node = self.target_node
6476
6477       if len(self.lu.tasklets) == 1:
6478         # It is safe to release locks only when we're the only tasklet in the LU
6479         _ReleaseLocks(self, locking.LEVEL_NODE,
6480                       keep=[instance.primary_node, self.target_node])
6481
6482     else:
6483       secondary_nodes = instance.secondary_nodes
6484       if not secondary_nodes:
6485         raise errors.ConfigurationError("No secondary node but using"
6486                                         " %s disk template" %
6487                                         instance.disk_template)
6488       target_node = secondary_nodes[0]
6489       if self.lu.op.iallocator or (self.target_node and
6490                                    self.target_node != target_node):
6491         if self.failover:
6492           text = "failed over"
6493         else:
6494           text = "migrated"
6495         raise errors.OpPrereqError("Instances with disk template %s cannot"
6496                                    " be %s to arbitrary nodes"
6497                                    " (neither an iallocator nor a target"
6498                                    " node can be passed)" %
6499                                    (instance.disk_template, text),
6500                                    errors.ECODE_INVAL)
6501
6502     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6503
6504     # check memory requirements on the secondary node
6505     if not self.failover or instance.admin_up:
6506       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6507                            instance.name, i_be[constants.BE_MEMORY],
6508                            instance.hypervisor)
6509     else:
6510       self.lu.LogInfo("Not checking memory on the secondary node as"
6511                       " instance will not be started")
6512
6513     # check bridge existance
6514     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6515
6516     if not self.cleanup:
6517       _CheckNodeNotDrained(self.lu, target_node)
6518       if not self.failover:
6519         result = self.rpc.call_instance_migratable(instance.primary_node,
6520                                                    instance)
6521         if result.fail_msg and self.fallback:
6522           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6523                           " failover")
6524           self.failover = True
6525         else:
6526           result.Raise("Can't migrate, please use failover",
6527                        prereq=True, ecode=errors.ECODE_STATE)
6528
6529     assert not (self.failover and self.cleanup)
6530
6531     if not self.failover:
6532       if self.lu.op.live is not None and self.lu.op.mode is not None:
6533         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6534                                    " parameters are accepted",
6535                                    errors.ECODE_INVAL)
6536       if self.lu.op.live is not None:
6537         if self.lu.op.live:
6538           self.lu.op.mode = constants.HT_MIGRATION_LIVE
6539         else:
6540           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6541         # reset the 'live' parameter to None so that repeated
6542         # invocations of CheckPrereq do not raise an exception
6543         self.lu.op.live = None
6544       elif self.lu.op.mode is None:
6545         # read the default value from the hypervisor
6546         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6547                                                 skip_globals=False)
6548         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6549
6550       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6551     else:
6552       # Failover is never live
6553       self.live = False
6554
6555   def _RunAllocator(self):
6556     """Run the allocator based on input opcode.
6557
6558     """
6559     ial = IAllocator(self.cfg, self.rpc,
6560                      mode=constants.IALLOCATOR_MODE_RELOC,
6561                      name=self.instance_name,
6562                      # TODO See why hail breaks with a single node below
6563                      relocate_from=[self.instance.primary_node,
6564                                     self.instance.primary_node],
6565                      )
6566
6567     ial.Run(self.lu.op.iallocator)
6568
6569     if not ial.success:
6570       raise errors.OpPrereqError("Can't compute nodes using"
6571                                  " iallocator '%s': %s" %
6572                                  (self.lu.op.iallocator, ial.info),
6573                                  errors.ECODE_NORES)
6574     if len(ial.result) != ial.required_nodes:
6575       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6576                                  " of nodes (%s), required %s" %
6577                                  (self.lu.op.iallocator, len(ial.result),
6578                                   ial.required_nodes), errors.ECODE_FAULT)
6579     self.target_node = ial.result[0]
6580     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6581                  self.instance_name, self.lu.op.iallocator,
6582                  utils.CommaJoin(ial.result))
6583
6584   def _WaitUntilSync(self):
6585     """Poll with custom rpc for disk sync.
6586
6587     This uses our own step-based rpc call.
6588
6589     """
6590     self.feedback_fn("* wait until resync is done")
6591     all_done = False
6592     while not all_done:
6593       all_done = True
6594       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6595                                             self.nodes_ip,
6596                                             self.instance.disks)
6597       min_percent = 100
6598       for node, nres in result.items():
6599         nres.Raise("Cannot resync disks on node %s" % node)
6600         node_done, node_percent = nres.payload
6601         all_done = all_done and node_done
6602         if node_percent is not None:
6603           min_percent = min(min_percent, node_percent)
6604       if not all_done:
6605         if min_percent < 100:
6606           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6607         time.sleep(2)
6608
6609   def _EnsureSecondary(self, node):
6610     """Demote a node to secondary.
6611
6612     """
6613     self.feedback_fn("* switching node %s to secondary mode" % node)
6614
6615     for dev in self.instance.disks:
6616       self.cfg.SetDiskID(dev, node)
6617
6618     result = self.rpc.call_blockdev_close(node, self.instance.name,
6619                                           self.instance.disks)
6620     result.Raise("Cannot change disk to secondary on node %s" % node)
6621
6622   def _GoStandalone(self):
6623     """Disconnect from the network.
6624
6625     """
6626     self.feedback_fn("* changing into standalone mode")
6627     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6628                                                self.instance.disks)
6629     for node, nres in result.items():
6630       nres.Raise("Cannot disconnect disks node %s" % node)
6631
6632   def _GoReconnect(self, multimaster):
6633     """Reconnect to the network.
6634
6635     """
6636     if multimaster:
6637       msg = "dual-master"
6638     else:
6639       msg = "single-master"
6640     self.feedback_fn("* changing disks into %s mode" % msg)
6641     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6642                                            self.instance.disks,
6643                                            self.instance.name, multimaster)
6644     for node, nres in result.items():
6645       nres.Raise("Cannot change disks config on node %s" % node)
6646
6647   def _ExecCleanup(self):
6648     """Try to cleanup after a failed migration.
6649
6650     The cleanup is done by:
6651       - check that the instance is running only on one node
6652         (and update the config if needed)
6653       - change disks on its secondary node to secondary
6654       - wait until disks are fully synchronized
6655       - disconnect from the network
6656       - change disks into single-master mode
6657       - wait again until disks are fully synchronized
6658
6659     """
6660     instance = self.instance
6661     target_node = self.target_node
6662     source_node = self.source_node
6663
6664     # check running on only one node
6665     self.feedback_fn("* checking where the instance actually runs"
6666                      " (if this hangs, the hypervisor might be in"
6667                      " a bad state)")
6668     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6669     for node, result in ins_l.items():
6670       result.Raise("Can't contact node %s" % node)
6671
6672     runningon_source = instance.name in ins_l[source_node].payload
6673     runningon_target = instance.name in ins_l[target_node].payload
6674
6675     if runningon_source and runningon_target:
6676       raise errors.OpExecError("Instance seems to be running on two nodes,"
6677                                " or the hypervisor is confused; you will have"
6678                                " to ensure manually that it runs only on one"
6679                                " and restart this operation")
6680
6681     if not (runningon_source or runningon_target):
6682       raise errors.OpExecError("Instance does not seem to be running at all;"
6683                                " in this case it's safer to repair by"
6684                                " running 'gnt-instance stop' to ensure disk"
6685                                " shutdown, and then restarting it")
6686
6687     if runningon_target:
6688       # the migration has actually succeeded, we need to update the config
6689       self.feedback_fn("* instance running on secondary node (%s),"
6690                        " updating config" % target_node)
6691       instance.primary_node = target_node
6692       self.cfg.Update(instance, self.feedback_fn)
6693       demoted_node = source_node
6694     else:
6695       self.feedback_fn("* instance confirmed to be running on its"
6696                        " primary node (%s)" % source_node)
6697       demoted_node = target_node
6698
6699     if instance.disk_template in constants.DTS_INT_MIRROR:
6700       self._EnsureSecondary(demoted_node)
6701       try:
6702         self._WaitUntilSync()
6703       except errors.OpExecError:
6704         # we ignore here errors, since if the device is standalone, it
6705         # won't be able to sync
6706         pass
6707       self._GoStandalone()
6708       self._GoReconnect(False)
6709       self._WaitUntilSync()
6710
6711     self.feedback_fn("* done")
6712
6713   def _RevertDiskStatus(self):
6714     """Try to revert the disk status after a failed migration.
6715
6716     """
6717     target_node = self.target_node
6718     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6719       return
6720
6721     try:
6722       self._EnsureSecondary(target_node)
6723       self._GoStandalone()
6724       self._GoReconnect(False)
6725       self._WaitUntilSync()
6726     except errors.OpExecError, err:
6727       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
6728                          " please try to recover the instance manually;"
6729                          " error '%s'" % str(err))
6730
6731   def _AbortMigration(self):
6732     """Call the hypervisor code to abort a started migration.
6733
6734     """
6735     instance = self.instance
6736     target_node = self.target_node
6737     migration_info = self.migration_info
6738
6739     abort_result = self.rpc.call_finalize_migration(target_node,
6740                                                     instance,
6741                                                     migration_info,
6742                                                     False)
6743     abort_msg = abort_result.fail_msg
6744     if abort_msg:
6745       logging.error("Aborting migration failed on target node %s: %s",
6746                     target_node, abort_msg)
6747       # Don't raise an exception here, as we stil have to try to revert the
6748       # disk status, even if this step failed.
6749
6750   def _ExecMigration(self):
6751     """Migrate an instance.
6752
6753     The migrate is done by:
6754       - change the disks into dual-master mode
6755       - wait until disks are fully synchronized again
6756       - migrate the instance
6757       - change disks on the new secondary node (the old primary) to secondary
6758       - wait until disks are fully synchronized
6759       - change disks into single-master mode
6760
6761     """
6762     instance = self.instance
6763     target_node = self.target_node
6764     source_node = self.source_node
6765
6766     self.feedback_fn("* checking disk consistency between source and target")
6767     for dev in instance.disks:
6768       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6769         raise errors.OpExecError("Disk %s is degraded or not fully"
6770                                  " synchronized on target node,"
6771                                  " aborting migration" % dev.iv_name)
6772
6773     # First get the migration information from the remote node
6774     result = self.rpc.call_migration_info(source_node, instance)
6775     msg = result.fail_msg
6776     if msg:
6777       log_err = ("Failed fetching source migration information from %s: %s" %
6778                  (source_node, msg))
6779       logging.error(log_err)
6780       raise errors.OpExecError(log_err)
6781
6782     self.migration_info = migration_info = result.payload
6783
6784     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6785       # Then switch the disks to master/master mode
6786       self._EnsureSecondary(target_node)
6787       self._GoStandalone()
6788       self._GoReconnect(True)
6789       self._WaitUntilSync()
6790
6791     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6792     result = self.rpc.call_accept_instance(target_node,
6793                                            instance,
6794                                            migration_info,
6795                                            self.nodes_ip[target_node])
6796
6797     msg = result.fail_msg
6798     if msg:
6799       logging.error("Instance pre-migration failed, trying to revert"
6800                     " disk status: %s", msg)
6801       self.feedback_fn("Pre-migration failed, aborting")
6802       self._AbortMigration()
6803       self._RevertDiskStatus()
6804       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6805                                (instance.name, msg))
6806
6807     self.feedback_fn("* migrating instance to %s" % target_node)
6808     result = self.rpc.call_instance_migrate(source_node, instance,
6809                                             self.nodes_ip[target_node],
6810                                             self.live)
6811     msg = result.fail_msg
6812     if msg:
6813       logging.error("Instance migration failed, trying to revert"
6814                     " disk status: %s", msg)
6815       self.feedback_fn("Migration failed, aborting")
6816       self._AbortMigration()
6817       self._RevertDiskStatus()
6818       raise errors.OpExecError("Could not migrate instance %s: %s" %
6819                                (instance.name, msg))
6820
6821     instance.primary_node = target_node
6822     # distribute new instance config to the other nodes
6823     self.cfg.Update(instance, self.feedback_fn)
6824
6825     result = self.rpc.call_finalize_migration(target_node,
6826                                               instance,
6827                                               migration_info,
6828                                               True)
6829     msg = result.fail_msg
6830     if msg:
6831       logging.error("Instance migration succeeded, but finalization failed:"
6832                     " %s", msg)
6833       raise errors.OpExecError("Could not finalize instance migration: %s" %
6834                                msg)
6835
6836     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6837       self._EnsureSecondary(source_node)
6838       self._WaitUntilSync()
6839       self._GoStandalone()
6840       self._GoReconnect(False)
6841       self._WaitUntilSync()
6842
6843     self.feedback_fn("* done")
6844
6845   def _ExecFailover(self):
6846     """Failover an instance.
6847
6848     The failover is done by shutting it down on its present node and
6849     starting it on the secondary.
6850
6851     """
6852     instance = self.instance
6853     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6854
6855     source_node = instance.primary_node
6856     target_node = self.target_node
6857
6858     if instance.admin_up:
6859       self.feedback_fn("* checking disk consistency between source and target")
6860       for dev in instance.disks:
6861         # for drbd, these are drbd over lvm
6862         if not _CheckDiskConsistency(self, dev, target_node, False):
6863           if not self.ignore_consistency:
6864             raise errors.OpExecError("Disk %s is degraded on target node,"
6865                                      " aborting failover" % dev.iv_name)
6866     else:
6867       self.feedback_fn("* not checking disk consistency as instance is not"
6868                        " running")
6869
6870     self.feedback_fn("* shutting down instance on source node")
6871     logging.info("Shutting down instance %s on node %s",
6872                  instance.name, source_node)
6873
6874     result = self.rpc.call_instance_shutdown(source_node, instance,
6875                                              self.shutdown_timeout)
6876     msg = result.fail_msg
6877     if msg:
6878       if self.ignore_consistency or primary_node.offline:
6879         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
6880                            " proceeding anyway; please make sure node"
6881                            " %s is down; error details: %s",
6882                            instance.name, source_node, source_node, msg)
6883       else:
6884         raise errors.OpExecError("Could not shutdown instance %s on"
6885                                  " node %s: %s" %
6886                                  (instance.name, source_node, msg))
6887
6888     self.feedback_fn("* deactivating the instance's disks on source node")
6889     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6890       raise errors.OpExecError("Can't shut down the instance's disks.")
6891
6892     instance.primary_node = target_node
6893     # distribute new instance config to the other nodes
6894     self.cfg.Update(instance, self.feedback_fn)
6895
6896     # Only start the instance if it's marked as up
6897     if instance.admin_up:
6898       self.feedback_fn("* activating the instance's disks on target node")
6899       logging.info("Starting instance %s on node %s",
6900                    instance.name, target_node)
6901
6902       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6903                                            ignore_secondaries=True)
6904       if not disks_ok:
6905         _ShutdownInstanceDisks(self, instance)
6906         raise errors.OpExecError("Can't activate the instance's disks")
6907
6908       self.feedback_fn("* starting the instance on the target node")
6909       result = self.rpc.call_instance_start(target_node, instance, None, None)
6910       msg = result.fail_msg
6911       if msg:
6912         _ShutdownInstanceDisks(self, instance)
6913         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6914                                  (instance.name, target_node, msg))
6915
6916   def Exec(self, feedback_fn):
6917     """Perform the migration.
6918
6919     """
6920     self.feedback_fn = feedback_fn
6921     self.source_node = self.instance.primary_node
6922
6923     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6924     if self.instance.disk_template in constants.DTS_INT_MIRROR:
6925       self.target_node = self.instance.secondary_nodes[0]
6926       # Otherwise self.target_node has been populated either
6927       # directly, or through an iallocator.
6928
6929     self.all_nodes = [self.source_node, self.target_node]
6930     self.nodes_ip = {
6931       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6932       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6933       }
6934
6935     if self.failover:
6936       feedback_fn("Failover instance %s" % self.instance.name)
6937       self._ExecFailover()
6938     else:
6939       feedback_fn("Migrating instance %s" % self.instance.name)
6940
6941       if self.cleanup:
6942         return self._ExecCleanup()
6943       else:
6944         return self._ExecMigration()
6945
6946
6947 def _CreateBlockDev(lu, node, instance, device, force_create,
6948                     info, force_open):
6949   """Create a tree of block devices on a given node.
6950
6951   If this device type has to be created on secondaries, create it and
6952   all its children.
6953
6954   If not, just recurse to children keeping the same 'force' value.
6955
6956   @param lu: the lu on whose behalf we execute
6957   @param node: the node on which to create the device
6958   @type instance: L{objects.Instance}
6959   @param instance: the instance which owns the device
6960   @type device: L{objects.Disk}
6961   @param device: the device to create
6962   @type force_create: boolean
6963   @param force_create: whether to force creation of this device; this
6964       will be change to True whenever we find a device which has
6965       CreateOnSecondary() attribute
6966   @param info: the extra 'metadata' we should attach to the device
6967       (this will be represented as a LVM tag)
6968   @type force_open: boolean
6969   @param force_open: this parameter will be passes to the
6970       L{backend.BlockdevCreate} function where it specifies
6971       whether we run on primary or not, and it affects both
6972       the child assembly and the device own Open() execution
6973
6974   """
6975   if device.CreateOnSecondary():
6976     force_create = True
6977
6978   if device.children:
6979     for child in device.children:
6980       _CreateBlockDev(lu, node, instance, child, force_create,
6981                       info, force_open)
6982
6983   if not force_create:
6984     return
6985
6986   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6987
6988
6989 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6990   """Create a single block device on a given node.
6991
6992   This will not recurse over children of the device, so they must be
6993   created in advance.
6994
6995   @param lu: the lu on whose behalf we execute
6996   @param node: the node on which to create the device
6997   @type instance: L{objects.Instance}
6998   @param instance: the instance which owns the device
6999   @type device: L{objects.Disk}
7000   @param device: the device to create
7001   @param info: the extra 'metadata' we should attach to the device
7002       (this will be represented as a LVM tag)
7003   @type force_open: boolean
7004   @param force_open: this parameter will be passes to the
7005       L{backend.BlockdevCreate} function where it specifies
7006       whether we run on primary or not, and it affects both
7007       the child assembly and the device own Open() execution
7008
7009   """
7010   lu.cfg.SetDiskID(device, node)
7011   result = lu.rpc.call_blockdev_create(node, device, device.size,
7012                                        instance.name, force_open, info)
7013   result.Raise("Can't create block device %s on"
7014                " node %s for instance %s" % (device, node, instance.name))
7015   if device.physical_id is None:
7016     device.physical_id = result.payload
7017
7018
7019 def _GenerateUniqueNames(lu, exts):
7020   """Generate a suitable LV name.
7021
7022   This will generate a logical volume name for the given instance.
7023
7024   """
7025   results = []
7026   for val in exts:
7027     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7028     results.append("%s%s" % (new_id, val))
7029   return results
7030
7031
7032 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7033                          iv_name, p_minor, s_minor):
7034   """Generate a drbd8 device complete with its children.
7035
7036   """
7037   assert len(vgnames) == len(names) == 2
7038   port = lu.cfg.AllocatePort()
7039   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7040   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7041                           logical_id=(vgnames[0], names[0]))
7042   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7043                           logical_id=(vgnames[1], names[1]))
7044   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7045                           logical_id=(primary, secondary, port,
7046                                       p_minor, s_minor,
7047                                       shared_secret),
7048                           children=[dev_data, dev_meta],
7049                           iv_name=iv_name)
7050   return drbd_dev
7051
7052
7053 def _GenerateDiskTemplate(lu, template_name,
7054                           instance_name, primary_node,
7055                           secondary_nodes, disk_info,
7056                           file_storage_dir, file_driver,
7057                           base_index, feedback_fn):
7058   """Generate the entire disk layout for a given template type.
7059
7060   """
7061   #TODO: compute space requirements
7062
7063   vgname = lu.cfg.GetVGName()
7064   disk_count = len(disk_info)
7065   disks = []
7066   if template_name == constants.DT_DISKLESS:
7067     pass
7068   elif template_name == constants.DT_PLAIN:
7069     if len(secondary_nodes) != 0:
7070       raise errors.ProgrammerError("Wrong template configuration")
7071
7072     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7073                                       for i in range(disk_count)])
7074     for idx, disk in enumerate(disk_info):
7075       disk_index = idx + base_index
7076       vg = disk.get(constants.IDISK_VG, vgname)
7077       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7078       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7079                               size=disk[constants.IDISK_SIZE],
7080                               logical_id=(vg, names[idx]),
7081                               iv_name="disk/%d" % disk_index,
7082                               mode=disk[constants.IDISK_MODE])
7083       disks.append(disk_dev)
7084   elif template_name == constants.DT_DRBD8:
7085     if len(secondary_nodes) != 1:
7086       raise errors.ProgrammerError("Wrong template configuration")
7087     remote_node = secondary_nodes[0]
7088     minors = lu.cfg.AllocateDRBDMinor(
7089       [primary_node, remote_node] * len(disk_info), instance_name)
7090
7091     names = []
7092     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7093                                                for i in range(disk_count)]):
7094       names.append(lv_prefix + "_data")
7095       names.append(lv_prefix + "_meta")
7096     for idx, disk in enumerate(disk_info):
7097       disk_index = idx + base_index
7098       data_vg = disk.get(constants.IDISK_VG, vgname)
7099       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7100       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7101                                       disk[constants.IDISK_SIZE],
7102                                       [data_vg, meta_vg],
7103                                       names[idx * 2:idx * 2 + 2],
7104                                       "disk/%d" % disk_index,
7105                                       minors[idx * 2], minors[idx * 2 + 1])
7106       disk_dev.mode = disk[constants.IDISK_MODE]
7107       disks.append(disk_dev)
7108   elif template_name == constants.DT_FILE:
7109     if len(secondary_nodes) != 0:
7110       raise errors.ProgrammerError("Wrong template configuration")
7111
7112     opcodes.RequireFileStorage()
7113
7114     for idx, disk in enumerate(disk_info):
7115       disk_index = idx + base_index
7116       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7117                               size=disk[constants.IDISK_SIZE],
7118                               iv_name="disk/%d" % disk_index,
7119                               logical_id=(file_driver,
7120                                           "%s/disk%d" % (file_storage_dir,
7121                                                          disk_index)),
7122                               mode=disk[constants.IDISK_MODE])
7123       disks.append(disk_dev)
7124   elif template_name == constants.DT_SHARED_FILE:
7125     if len(secondary_nodes) != 0:
7126       raise errors.ProgrammerError("Wrong template configuration")
7127
7128     opcodes.RequireSharedFileStorage()
7129
7130     for idx, disk in enumerate(disk_info):
7131       disk_index = idx + base_index
7132       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7133                               size=disk[constants.IDISK_SIZE],
7134                               iv_name="disk/%d" % disk_index,
7135                               logical_id=(file_driver,
7136                                           "%s/disk%d" % (file_storage_dir,
7137                                                          disk_index)),
7138                               mode=disk[constants.IDISK_MODE])
7139       disks.append(disk_dev)
7140   elif template_name == constants.DT_BLOCK:
7141     if len(secondary_nodes) != 0:
7142       raise errors.ProgrammerError("Wrong template configuration")
7143
7144     for idx, disk in enumerate(disk_info):
7145       disk_index = idx + base_index
7146       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7147                               size=disk[constants.IDISK_SIZE],
7148                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7149                                           disk[constants.IDISK_ADOPT]),
7150                               iv_name="disk/%d" % disk_index,
7151                               mode=disk[constants.IDISK_MODE])
7152       disks.append(disk_dev)
7153
7154   else:
7155     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7156   return disks
7157
7158
7159 def _GetInstanceInfoText(instance):
7160   """Compute that text that should be added to the disk's metadata.
7161
7162   """
7163   return "originstname+%s" % instance.name
7164
7165
7166 def _CalcEta(time_taken, written, total_size):
7167   """Calculates the ETA based on size written and total size.
7168
7169   @param time_taken: The time taken so far
7170   @param written: amount written so far
7171   @param total_size: The total size of data to be written
7172   @return: The remaining time in seconds
7173
7174   """
7175   avg_time = time_taken / float(written)
7176   return (total_size - written) * avg_time
7177
7178
7179 def _WipeDisks(lu, instance):
7180   """Wipes instance disks.
7181
7182   @type lu: L{LogicalUnit}
7183   @param lu: the logical unit on whose behalf we execute
7184   @type instance: L{objects.Instance}
7185   @param instance: the instance whose disks we should create
7186   @return: the success of the wipe
7187
7188   """
7189   node = instance.primary_node
7190
7191   for device in instance.disks:
7192     lu.cfg.SetDiskID(device, node)
7193
7194   logging.info("Pause sync of instance %s disks", instance.name)
7195   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7196
7197   for idx, success in enumerate(result.payload):
7198     if not success:
7199       logging.warn("pause-sync of instance %s for disks %d failed",
7200                    instance.name, idx)
7201
7202   try:
7203     for idx, device in enumerate(instance.disks):
7204       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7205       # MAX_WIPE_CHUNK at max
7206       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7207                             constants.MIN_WIPE_CHUNK_PERCENT)
7208       # we _must_ make this an int, otherwise rounding errors will
7209       # occur
7210       wipe_chunk_size = int(wipe_chunk_size)
7211
7212       lu.LogInfo("* Wiping disk %d", idx)
7213       logging.info("Wiping disk %d for instance %s, node %s using"
7214                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7215
7216       offset = 0
7217       size = device.size
7218       last_output = 0
7219       start_time = time.time()
7220
7221       while offset < size:
7222         wipe_size = min(wipe_chunk_size, size - offset)
7223         logging.debug("Wiping disk %d, offset %s, chunk %s",
7224                       idx, offset, wipe_size)
7225         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7226         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7227                      (idx, offset, wipe_size))
7228         now = time.time()
7229         offset += wipe_size
7230         if now - last_output >= 60:
7231           eta = _CalcEta(now - start_time, offset, size)
7232           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7233                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7234           last_output = now
7235   finally:
7236     logging.info("Resume sync of instance %s disks", instance.name)
7237
7238     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7239
7240     for idx, success in enumerate(result.payload):
7241       if not success:
7242         lu.LogWarning("Resume sync of disk %d failed, please have a"
7243                       " look at the status and troubleshoot the issue", idx)
7244         logging.warn("resume-sync of instance %s for disks %d failed",
7245                      instance.name, idx)
7246
7247
7248 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7249   """Create all disks for an instance.
7250
7251   This abstracts away some work from AddInstance.
7252
7253   @type lu: L{LogicalUnit}
7254   @param lu: the logical unit on whose behalf we execute
7255   @type instance: L{objects.Instance}
7256   @param instance: the instance whose disks we should create
7257   @type to_skip: list
7258   @param to_skip: list of indices to skip
7259   @type target_node: string
7260   @param target_node: if passed, overrides the target node for creation
7261   @rtype: boolean
7262   @return: the success of the creation
7263
7264   """
7265   info = _GetInstanceInfoText(instance)
7266   if target_node is None:
7267     pnode = instance.primary_node
7268     all_nodes = instance.all_nodes
7269   else:
7270     pnode = target_node
7271     all_nodes = [pnode]
7272
7273   if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7274     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7275     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7276
7277     result.Raise("Failed to create directory '%s' on"
7278                  " node %s" % (file_storage_dir, pnode))
7279
7280   # Note: this needs to be kept in sync with adding of disks in
7281   # LUInstanceSetParams
7282   for idx, device in enumerate(instance.disks):
7283     if to_skip and idx in to_skip:
7284       continue
7285     logging.info("Creating volume %s for instance %s",
7286                  device.iv_name, instance.name)
7287     #HARDCODE
7288     for node in all_nodes:
7289       f_create = node == pnode
7290       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7291
7292
7293 def _RemoveDisks(lu, instance, target_node=None):
7294   """Remove all disks for an instance.
7295
7296   This abstracts away some work from `AddInstance()` and
7297   `RemoveInstance()`. Note that in case some of the devices couldn't
7298   be removed, the removal will continue with the other ones (compare
7299   with `_CreateDisks()`).
7300
7301   @type lu: L{LogicalUnit}
7302   @param lu: the logical unit on whose behalf we execute
7303   @type instance: L{objects.Instance}
7304   @param instance: the instance whose disks we should remove
7305   @type target_node: string
7306   @param target_node: used to override the node on which to remove the disks
7307   @rtype: boolean
7308   @return: the success of the removal
7309
7310   """
7311   logging.info("Removing block devices for instance %s", instance.name)
7312
7313   all_result = True
7314   for device in instance.disks:
7315     if target_node:
7316       edata = [(target_node, device)]
7317     else:
7318       edata = device.ComputeNodeTree(instance.primary_node)
7319     for node, disk in edata:
7320       lu.cfg.SetDiskID(disk, node)
7321       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7322       if msg:
7323         lu.LogWarning("Could not remove block device %s on node %s,"
7324                       " continuing anyway: %s", device.iv_name, node, msg)
7325         all_result = False
7326
7327   if instance.disk_template == constants.DT_FILE:
7328     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7329     if target_node:
7330       tgt = target_node
7331     else:
7332       tgt = instance.primary_node
7333     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7334     if result.fail_msg:
7335       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7336                     file_storage_dir, instance.primary_node, result.fail_msg)
7337       all_result = False
7338
7339   return all_result
7340
7341
7342 def _ComputeDiskSizePerVG(disk_template, disks):
7343   """Compute disk size requirements in the volume group
7344
7345   """
7346   def _compute(disks, payload):
7347     """Universal algorithm.
7348
7349     """
7350     vgs = {}
7351     for disk in disks:
7352       vgs[disk[constants.IDISK_VG]] = \
7353         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7354
7355     return vgs
7356
7357   # Required free disk space as a function of disk and swap space
7358   req_size_dict = {
7359     constants.DT_DISKLESS: {},
7360     constants.DT_PLAIN: _compute(disks, 0),
7361     # 128 MB are added for drbd metadata for each disk
7362     constants.DT_DRBD8: _compute(disks, 128),
7363     constants.DT_FILE: {},
7364     constants.DT_SHARED_FILE: {},
7365   }
7366
7367   if disk_template not in req_size_dict:
7368     raise errors.ProgrammerError("Disk template '%s' size requirement"
7369                                  " is unknown" %  disk_template)
7370
7371   return req_size_dict[disk_template]
7372
7373
7374 def _ComputeDiskSize(disk_template, disks):
7375   """Compute disk size requirements in the volume group
7376
7377   """
7378   # Required free disk space as a function of disk and swap space
7379   req_size_dict = {
7380     constants.DT_DISKLESS: None,
7381     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7382     # 128 MB are added for drbd metadata for each disk
7383     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7384     constants.DT_FILE: None,
7385     constants.DT_SHARED_FILE: 0,
7386     constants.DT_BLOCK: 0,
7387   }
7388
7389   if disk_template not in req_size_dict:
7390     raise errors.ProgrammerError("Disk template '%s' size requirement"
7391                                  " is unknown" %  disk_template)
7392
7393   return req_size_dict[disk_template]
7394
7395
7396 def _FilterVmNodes(lu, nodenames):
7397   """Filters out non-vm_capable nodes from a list.
7398
7399   @type lu: L{LogicalUnit}
7400   @param lu: the logical unit for which we check
7401   @type nodenames: list
7402   @param nodenames: the list of nodes on which we should check
7403   @rtype: list
7404   @return: the list of vm-capable nodes
7405
7406   """
7407   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7408   return [name for name in nodenames if name not in vm_nodes]
7409
7410
7411 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7412   """Hypervisor parameter validation.
7413
7414   This function abstract the hypervisor parameter validation to be
7415   used in both instance create and instance modify.
7416
7417   @type lu: L{LogicalUnit}
7418   @param lu: the logical unit for which we check
7419   @type nodenames: list
7420   @param nodenames: the list of nodes on which we should check
7421   @type hvname: string
7422   @param hvname: the name of the hypervisor we should use
7423   @type hvparams: dict
7424   @param hvparams: the parameters which we need to check
7425   @raise errors.OpPrereqError: if the parameters are not valid
7426
7427   """
7428   nodenames = _FilterVmNodes(lu, nodenames)
7429   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7430                                                   hvname,
7431                                                   hvparams)
7432   for node in nodenames:
7433     info = hvinfo[node]
7434     if info.offline:
7435       continue
7436     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7437
7438
7439 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7440   """OS parameters validation.
7441
7442   @type lu: L{LogicalUnit}
7443   @param lu: the logical unit for which we check
7444   @type required: boolean
7445   @param required: whether the validation should fail if the OS is not
7446       found
7447   @type nodenames: list
7448   @param nodenames: the list of nodes on which we should check
7449   @type osname: string
7450   @param osname: the name of the hypervisor we should use
7451   @type osparams: dict
7452   @param osparams: the parameters which we need to check
7453   @raise errors.OpPrereqError: if the parameters are not valid
7454
7455   """
7456   nodenames = _FilterVmNodes(lu, nodenames)
7457   result = lu.rpc.call_os_validate(required, nodenames, osname,
7458                                    [constants.OS_VALIDATE_PARAMETERS],
7459                                    osparams)
7460   for node, nres in result.items():
7461     # we don't check for offline cases since this should be run only
7462     # against the master node and/or an instance's nodes
7463     nres.Raise("OS Parameters validation failed on node %s" % node)
7464     if not nres.payload:
7465       lu.LogInfo("OS %s not found on node %s, validation skipped",
7466                  osname, node)
7467
7468
7469 class LUInstanceCreate(LogicalUnit):
7470   """Create an instance.
7471
7472   """
7473   HPATH = "instance-add"
7474   HTYPE = constants.HTYPE_INSTANCE
7475   REQ_BGL = False
7476
7477   def CheckArguments(self):
7478     """Check arguments.
7479
7480     """
7481     # do not require name_check to ease forward/backward compatibility
7482     # for tools
7483     if self.op.no_install and self.op.start:
7484       self.LogInfo("No-installation mode selected, disabling startup")
7485       self.op.start = False
7486     # validate/normalize the instance name
7487     self.op.instance_name = \
7488       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7489
7490     if self.op.ip_check and not self.op.name_check:
7491       # TODO: make the ip check more flexible and not depend on the name check
7492       raise errors.OpPrereqError("Cannot do IP address check without a name"
7493                                  " check", errors.ECODE_INVAL)
7494
7495     # check nics' parameter names
7496     for nic in self.op.nics:
7497       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7498
7499     # check disks. parameter names and consistent adopt/no-adopt strategy
7500     has_adopt = has_no_adopt = False
7501     for disk in self.op.disks:
7502       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7503       if constants.IDISK_ADOPT in disk:
7504         has_adopt = True
7505       else:
7506         has_no_adopt = True
7507     if has_adopt and has_no_adopt:
7508       raise errors.OpPrereqError("Either all disks are adopted or none is",
7509                                  errors.ECODE_INVAL)
7510     if has_adopt:
7511       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7512         raise errors.OpPrereqError("Disk adoption is not supported for the"
7513                                    " '%s' disk template" %
7514                                    self.op.disk_template,
7515                                    errors.ECODE_INVAL)
7516       if self.op.iallocator is not None:
7517         raise errors.OpPrereqError("Disk adoption not allowed with an"
7518                                    " iallocator script", errors.ECODE_INVAL)
7519       if self.op.mode == constants.INSTANCE_IMPORT:
7520         raise errors.OpPrereqError("Disk adoption not allowed for"
7521                                    " instance import", errors.ECODE_INVAL)
7522     else:
7523       if self.op.disk_template in constants.DTS_MUST_ADOPT:
7524         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7525                                    " but no 'adopt' parameter given" %
7526                                    self.op.disk_template,
7527                                    errors.ECODE_INVAL)
7528
7529     self.adopt_disks = has_adopt
7530
7531     # instance name verification
7532     if self.op.name_check:
7533       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7534       self.op.instance_name = self.hostname1.name
7535       # used in CheckPrereq for ip ping check
7536       self.check_ip = self.hostname1.ip
7537     else:
7538       self.check_ip = None
7539
7540     # file storage checks
7541     if (self.op.file_driver and
7542         not self.op.file_driver in constants.FILE_DRIVER):
7543       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7544                                  self.op.file_driver, errors.ECODE_INVAL)
7545
7546     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7547       raise errors.OpPrereqError("File storage directory path not absolute",
7548                                  errors.ECODE_INVAL)
7549
7550     ### Node/iallocator related checks
7551     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7552
7553     if self.op.pnode is not None:
7554       if self.op.disk_template in constants.DTS_INT_MIRROR:
7555         if self.op.snode is None:
7556           raise errors.OpPrereqError("The networked disk templates need"
7557                                      " a mirror node", errors.ECODE_INVAL)
7558       elif self.op.snode:
7559         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7560                         " template")
7561         self.op.snode = None
7562
7563     self._cds = _GetClusterDomainSecret()
7564
7565     if self.op.mode == constants.INSTANCE_IMPORT:
7566       # On import force_variant must be True, because if we forced it at
7567       # initial install, our only chance when importing it back is that it
7568       # works again!
7569       self.op.force_variant = True
7570
7571       if self.op.no_install:
7572         self.LogInfo("No-installation mode has no effect during import")
7573
7574     elif self.op.mode == constants.INSTANCE_CREATE:
7575       if self.op.os_type is None:
7576         raise errors.OpPrereqError("No guest OS specified",
7577                                    errors.ECODE_INVAL)
7578       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7579         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7580                                    " installation" % self.op.os_type,
7581                                    errors.ECODE_STATE)
7582       if self.op.disk_template is None:
7583         raise errors.OpPrereqError("No disk template specified",
7584                                    errors.ECODE_INVAL)
7585
7586     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7587       # Check handshake to ensure both clusters have the same domain secret
7588       src_handshake = self.op.source_handshake
7589       if not src_handshake:
7590         raise errors.OpPrereqError("Missing source handshake",
7591                                    errors.ECODE_INVAL)
7592
7593       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7594                                                            src_handshake)
7595       if errmsg:
7596         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7597                                    errors.ECODE_INVAL)
7598
7599       # Load and check source CA
7600       self.source_x509_ca_pem = self.op.source_x509_ca
7601       if not self.source_x509_ca_pem:
7602         raise errors.OpPrereqError("Missing source X509 CA",
7603                                    errors.ECODE_INVAL)
7604
7605       try:
7606         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7607                                                     self._cds)
7608       except OpenSSL.crypto.Error, err:
7609         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7610                                    (err, ), errors.ECODE_INVAL)
7611
7612       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7613       if errcode is not None:
7614         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7615                                    errors.ECODE_INVAL)
7616
7617       self.source_x509_ca = cert
7618
7619       src_instance_name = self.op.source_instance_name
7620       if not src_instance_name:
7621         raise errors.OpPrereqError("Missing source instance name",
7622                                    errors.ECODE_INVAL)
7623
7624       self.source_instance_name = \
7625           netutils.GetHostname(name=src_instance_name).name
7626
7627     else:
7628       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7629                                  self.op.mode, errors.ECODE_INVAL)
7630
7631   def ExpandNames(self):
7632     """ExpandNames for CreateInstance.
7633
7634     Figure out the right locks for instance creation.
7635
7636     """
7637     self.needed_locks = {}
7638
7639     instance_name = self.op.instance_name
7640     # this is just a preventive check, but someone might still add this
7641     # instance in the meantime, and creation will fail at lock-add time
7642     if instance_name in self.cfg.GetInstanceList():
7643       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7644                                  instance_name, errors.ECODE_EXISTS)
7645
7646     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7647
7648     if self.op.iallocator:
7649       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7650     else:
7651       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7652       nodelist = [self.op.pnode]
7653       if self.op.snode is not None:
7654         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7655         nodelist.append(self.op.snode)
7656       self.needed_locks[locking.LEVEL_NODE] = nodelist
7657
7658     # in case of import lock the source node too
7659     if self.op.mode == constants.INSTANCE_IMPORT:
7660       src_node = self.op.src_node
7661       src_path = self.op.src_path
7662
7663       if src_path is None:
7664         self.op.src_path = src_path = self.op.instance_name
7665
7666       if src_node is None:
7667         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7668         self.op.src_node = None
7669         if os.path.isabs(src_path):
7670           raise errors.OpPrereqError("Importing an instance from an absolute"
7671                                      " path requires a source node option",
7672                                      errors.ECODE_INVAL)
7673       else:
7674         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7675         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7676           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7677         if not os.path.isabs(src_path):
7678           self.op.src_path = src_path = \
7679             utils.PathJoin(constants.EXPORT_DIR, src_path)
7680
7681   def _RunAllocator(self):
7682     """Run the allocator based on input opcode.
7683
7684     """
7685     nics = [n.ToDict() for n in self.nics]
7686     ial = IAllocator(self.cfg, self.rpc,
7687                      mode=constants.IALLOCATOR_MODE_ALLOC,
7688                      name=self.op.instance_name,
7689                      disk_template=self.op.disk_template,
7690                      tags=[],
7691                      os=self.op.os_type,
7692                      vcpus=self.be_full[constants.BE_VCPUS],
7693                      mem_size=self.be_full[constants.BE_MEMORY],
7694                      disks=self.disks,
7695                      nics=nics,
7696                      hypervisor=self.op.hypervisor,
7697                      )
7698
7699     ial.Run(self.op.iallocator)
7700
7701     if not ial.success:
7702       raise errors.OpPrereqError("Can't compute nodes using"
7703                                  " iallocator '%s': %s" %
7704                                  (self.op.iallocator, ial.info),
7705                                  errors.ECODE_NORES)
7706     if len(ial.result) != ial.required_nodes:
7707       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7708                                  " of nodes (%s), required %s" %
7709                                  (self.op.iallocator, len(ial.result),
7710                                   ial.required_nodes), errors.ECODE_FAULT)
7711     self.op.pnode = ial.result[0]
7712     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7713                  self.op.instance_name, self.op.iallocator,
7714                  utils.CommaJoin(ial.result))
7715     if ial.required_nodes == 2:
7716       self.op.snode = ial.result[1]
7717
7718   def BuildHooksEnv(self):
7719     """Build hooks env.
7720
7721     This runs on master, primary and secondary nodes of the instance.
7722
7723     """
7724     env = {
7725       "ADD_MODE": self.op.mode,
7726       }
7727     if self.op.mode == constants.INSTANCE_IMPORT:
7728       env["SRC_NODE"] = self.op.src_node
7729       env["SRC_PATH"] = self.op.src_path
7730       env["SRC_IMAGES"] = self.src_images
7731
7732     env.update(_BuildInstanceHookEnv(
7733       name=self.op.instance_name,
7734       primary_node=self.op.pnode,
7735       secondary_nodes=self.secondaries,
7736       status=self.op.start,
7737       os_type=self.op.os_type,
7738       memory=self.be_full[constants.BE_MEMORY],
7739       vcpus=self.be_full[constants.BE_VCPUS],
7740       nics=_NICListToTuple(self, self.nics),
7741       disk_template=self.op.disk_template,
7742       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7743              for d in self.disks],
7744       bep=self.be_full,
7745       hvp=self.hv_full,
7746       hypervisor_name=self.op.hypervisor,
7747     ))
7748
7749     return env
7750
7751   def BuildHooksNodes(self):
7752     """Build hooks nodes.
7753
7754     """
7755     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7756     return nl, nl
7757
7758   def _ReadExportInfo(self):
7759     """Reads the export information from disk.
7760
7761     It will override the opcode source node and path with the actual
7762     information, if these two were not specified before.
7763
7764     @return: the export information
7765
7766     """
7767     assert self.op.mode == constants.INSTANCE_IMPORT
7768
7769     src_node = self.op.src_node
7770     src_path = self.op.src_path
7771
7772     if src_node is None:
7773       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7774       exp_list = self.rpc.call_export_list(locked_nodes)
7775       found = False
7776       for node in exp_list:
7777         if exp_list[node].fail_msg:
7778           continue
7779         if src_path in exp_list[node].payload:
7780           found = True
7781           self.op.src_node = src_node = node
7782           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7783                                                        src_path)
7784           break
7785       if not found:
7786         raise errors.OpPrereqError("No export found for relative path %s" %
7787                                     src_path, errors.ECODE_INVAL)
7788
7789     _CheckNodeOnline(self, src_node)
7790     result = self.rpc.call_export_info(src_node, src_path)
7791     result.Raise("No export or invalid export found in dir %s" % src_path)
7792
7793     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7794     if not export_info.has_section(constants.INISECT_EXP):
7795       raise errors.ProgrammerError("Corrupted export config",
7796                                    errors.ECODE_ENVIRON)
7797
7798     ei_version = export_info.get(constants.INISECT_EXP, "version")
7799     if (int(ei_version) != constants.EXPORT_VERSION):
7800       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7801                                  (ei_version, constants.EXPORT_VERSION),
7802                                  errors.ECODE_ENVIRON)
7803     return export_info
7804
7805   def _ReadExportParams(self, einfo):
7806     """Use export parameters as defaults.
7807
7808     In case the opcode doesn't specify (as in override) some instance
7809     parameters, then try to use them from the export information, if
7810     that declares them.
7811
7812     """
7813     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7814
7815     if self.op.disk_template is None:
7816       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7817         self.op.disk_template = einfo.get(constants.INISECT_INS,
7818                                           "disk_template")
7819       else:
7820         raise errors.OpPrereqError("No disk template specified and the export"
7821                                    " is missing the disk_template information",
7822                                    errors.ECODE_INVAL)
7823
7824     if not self.op.disks:
7825       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7826         disks = []
7827         # TODO: import the disk iv_name too
7828         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7829           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7830           disks.append({constants.IDISK_SIZE: disk_sz})
7831         self.op.disks = disks
7832       else:
7833         raise errors.OpPrereqError("No disk info specified and the export"
7834                                    " is missing the disk information",
7835                                    errors.ECODE_INVAL)
7836
7837     if (not self.op.nics and
7838         einfo.has_option(constants.INISECT_INS, "nic_count")):
7839       nics = []
7840       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7841         ndict = {}
7842         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7843           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7844           ndict[name] = v
7845         nics.append(ndict)
7846       self.op.nics = nics
7847
7848     if (self.op.hypervisor is None and
7849         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7850       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7851     if einfo.has_section(constants.INISECT_HYP):
7852       # use the export parameters but do not override the ones
7853       # specified by the user
7854       for name, value in einfo.items(constants.INISECT_HYP):
7855         if name not in self.op.hvparams:
7856           self.op.hvparams[name] = value
7857
7858     if einfo.has_section(constants.INISECT_BEP):
7859       # use the parameters, without overriding
7860       for name, value in einfo.items(constants.INISECT_BEP):
7861         if name not in self.op.beparams:
7862           self.op.beparams[name] = value
7863     else:
7864       # try to read the parameters old style, from the main section
7865       for name in constants.BES_PARAMETERS:
7866         if (name not in self.op.beparams and
7867             einfo.has_option(constants.INISECT_INS, name)):
7868           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7869
7870     if einfo.has_section(constants.INISECT_OSP):
7871       # use the parameters, without overriding
7872       for name, value in einfo.items(constants.INISECT_OSP):
7873         if name not in self.op.osparams:
7874           self.op.osparams[name] = value
7875
7876   def _RevertToDefaults(self, cluster):
7877     """Revert the instance parameters to the default values.
7878
7879     """
7880     # hvparams
7881     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7882     for name in self.op.hvparams.keys():
7883       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7884         del self.op.hvparams[name]
7885     # beparams
7886     be_defs = cluster.SimpleFillBE({})
7887     for name in self.op.beparams.keys():
7888       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7889         del self.op.beparams[name]
7890     # nic params
7891     nic_defs = cluster.SimpleFillNIC({})
7892     for nic in self.op.nics:
7893       for name in constants.NICS_PARAMETERS:
7894         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7895           del nic[name]
7896     # osparams
7897     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7898     for name in self.op.osparams.keys():
7899       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7900         del self.op.osparams[name]
7901
7902   def CheckPrereq(self):
7903     """Check prerequisites.
7904
7905     """
7906     if self.op.mode == constants.INSTANCE_IMPORT:
7907       export_info = self._ReadExportInfo()
7908       self._ReadExportParams(export_info)
7909
7910     if (not self.cfg.GetVGName() and
7911         self.op.disk_template not in constants.DTS_NOT_LVM):
7912       raise errors.OpPrereqError("Cluster does not support lvm-based"
7913                                  " instances", errors.ECODE_STATE)
7914
7915     if self.op.hypervisor is None:
7916       self.op.hypervisor = self.cfg.GetHypervisorType()
7917
7918     cluster = self.cfg.GetClusterInfo()
7919     enabled_hvs = cluster.enabled_hypervisors
7920     if self.op.hypervisor not in enabled_hvs:
7921       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7922                                  " cluster (%s)" % (self.op.hypervisor,
7923                                   ",".join(enabled_hvs)),
7924                                  errors.ECODE_STATE)
7925
7926     # check hypervisor parameter syntax (locally)
7927     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7928     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7929                                       self.op.hvparams)
7930     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7931     hv_type.CheckParameterSyntax(filled_hvp)
7932     self.hv_full = filled_hvp
7933     # check that we don't specify global parameters on an instance
7934     _CheckGlobalHvParams(self.op.hvparams)
7935
7936     # fill and remember the beparams dict
7937     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7938     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7939
7940     # build os parameters
7941     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7942
7943     # now that hvp/bep are in final format, let's reset to defaults,
7944     # if told to do so
7945     if self.op.identify_defaults:
7946       self._RevertToDefaults(cluster)
7947
7948     # NIC buildup
7949     self.nics = []
7950     for idx, nic in enumerate(self.op.nics):
7951       nic_mode_req = nic.get(constants.INIC_MODE, None)
7952       nic_mode = nic_mode_req
7953       if nic_mode is None:
7954         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7955
7956       # in routed mode, for the first nic, the default ip is 'auto'
7957       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7958         default_ip_mode = constants.VALUE_AUTO
7959       else:
7960         default_ip_mode = constants.VALUE_NONE
7961
7962       # ip validity checks
7963       ip = nic.get(constants.INIC_IP, default_ip_mode)
7964       if ip is None or ip.lower() == constants.VALUE_NONE:
7965         nic_ip = None
7966       elif ip.lower() == constants.VALUE_AUTO:
7967         if not self.op.name_check:
7968           raise errors.OpPrereqError("IP address set to auto but name checks"
7969                                      " have been skipped",
7970                                      errors.ECODE_INVAL)
7971         nic_ip = self.hostname1.ip
7972       else:
7973         if not netutils.IPAddress.IsValid(ip):
7974           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7975                                      errors.ECODE_INVAL)
7976         nic_ip = ip
7977
7978       # TODO: check the ip address for uniqueness
7979       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7980         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7981                                    errors.ECODE_INVAL)
7982
7983       # MAC address verification
7984       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7985       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7986         mac = utils.NormalizeAndValidateMac(mac)
7987
7988         try:
7989           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7990         except errors.ReservationError:
7991           raise errors.OpPrereqError("MAC address %s already in use"
7992                                      " in cluster" % mac,
7993                                      errors.ECODE_NOTUNIQUE)
7994
7995       #  Build nic parameters
7996       link = nic.get(constants.INIC_LINK, None)
7997       nicparams = {}
7998       if nic_mode_req:
7999         nicparams[constants.NIC_MODE] = nic_mode_req
8000       if link:
8001         nicparams[constants.NIC_LINK] = link
8002
8003       check_params = cluster.SimpleFillNIC(nicparams)
8004       objects.NIC.CheckParameterSyntax(check_params)
8005       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8006
8007     # disk checks/pre-build
8008     default_vg = self.cfg.GetVGName()
8009     self.disks = []
8010     for disk in self.op.disks:
8011       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8012       if mode not in constants.DISK_ACCESS_SET:
8013         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8014                                    mode, errors.ECODE_INVAL)
8015       size = disk.get(constants.IDISK_SIZE, None)
8016       if size is None:
8017         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8018       try:
8019         size = int(size)
8020       except (TypeError, ValueError):
8021         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8022                                    errors.ECODE_INVAL)
8023
8024       data_vg = disk.get(constants.IDISK_VG, default_vg)
8025       new_disk = {
8026         constants.IDISK_SIZE: size,
8027         constants.IDISK_MODE: mode,
8028         constants.IDISK_VG: data_vg,
8029         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8030         }
8031       if constants.IDISK_ADOPT in disk:
8032         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8033       self.disks.append(new_disk)
8034
8035     if self.op.mode == constants.INSTANCE_IMPORT:
8036
8037       # Check that the new instance doesn't have less disks than the export
8038       instance_disks = len(self.disks)
8039       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8040       if instance_disks < export_disks:
8041         raise errors.OpPrereqError("Not enough disks to import."
8042                                    " (instance: %d, export: %d)" %
8043                                    (instance_disks, export_disks),
8044                                    errors.ECODE_INVAL)
8045
8046       disk_images = []
8047       for idx in range(export_disks):
8048         option = 'disk%d_dump' % idx
8049         if export_info.has_option(constants.INISECT_INS, option):
8050           # FIXME: are the old os-es, disk sizes, etc. useful?
8051           export_name = export_info.get(constants.INISECT_INS, option)
8052           image = utils.PathJoin(self.op.src_path, export_name)
8053           disk_images.append(image)
8054         else:
8055           disk_images.append(False)
8056
8057       self.src_images = disk_images
8058
8059       old_name = export_info.get(constants.INISECT_INS, 'name')
8060       try:
8061         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8062       except (TypeError, ValueError), err:
8063         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8064                                    " an integer: %s" % str(err),
8065                                    errors.ECODE_STATE)
8066       if self.op.instance_name == old_name:
8067         for idx, nic in enumerate(self.nics):
8068           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8069             nic_mac_ini = 'nic%d_mac' % idx
8070             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8071
8072     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8073
8074     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8075     if self.op.ip_check:
8076       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8077         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8078                                    (self.check_ip, self.op.instance_name),
8079                                    errors.ECODE_NOTUNIQUE)
8080
8081     #### mac address generation
8082     # By generating here the mac address both the allocator and the hooks get
8083     # the real final mac address rather than the 'auto' or 'generate' value.
8084     # There is a race condition between the generation and the instance object
8085     # creation, which means that we know the mac is valid now, but we're not
8086     # sure it will be when we actually add the instance. If things go bad
8087     # adding the instance will abort because of a duplicate mac, and the
8088     # creation job will fail.
8089     for nic in self.nics:
8090       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8091         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8092
8093     #### allocator run
8094
8095     if self.op.iallocator is not None:
8096       self._RunAllocator()
8097
8098     #### node related checks
8099
8100     # check primary node
8101     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8102     assert self.pnode is not None, \
8103       "Cannot retrieve locked node %s" % self.op.pnode
8104     if pnode.offline:
8105       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8106                                  pnode.name, errors.ECODE_STATE)
8107     if pnode.drained:
8108       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8109                                  pnode.name, errors.ECODE_STATE)
8110     if not pnode.vm_capable:
8111       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8112                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8113
8114     self.secondaries = []
8115
8116     # mirror node verification
8117     if self.op.disk_template in constants.DTS_INT_MIRROR:
8118       if self.op.snode == pnode.name:
8119         raise errors.OpPrereqError("The secondary node cannot be the"
8120                                    " primary node", errors.ECODE_INVAL)
8121       _CheckNodeOnline(self, self.op.snode)
8122       _CheckNodeNotDrained(self, self.op.snode)
8123       _CheckNodeVmCapable(self, self.op.snode)
8124       self.secondaries.append(self.op.snode)
8125
8126     nodenames = [pnode.name] + self.secondaries
8127
8128     if not self.adopt_disks:
8129       # Check lv size requirements, if not adopting
8130       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8131       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8132
8133     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8134       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8135                                 disk[constants.IDISK_ADOPT])
8136                      for disk in self.disks])
8137       if len(all_lvs) != len(self.disks):
8138         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8139                                    errors.ECODE_INVAL)
8140       for lv_name in all_lvs:
8141         try:
8142           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8143           # to ReserveLV uses the same syntax
8144           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8145         except errors.ReservationError:
8146           raise errors.OpPrereqError("LV named %s used by another instance" %
8147                                      lv_name, errors.ECODE_NOTUNIQUE)
8148
8149       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8150       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8151
8152       node_lvs = self.rpc.call_lv_list([pnode.name],
8153                                        vg_names.payload.keys())[pnode.name]
8154       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8155       node_lvs = node_lvs.payload
8156
8157       delta = all_lvs.difference(node_lvs.keys())
8158       if delta:
8159         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8160                                    utils.CommaJoin(delta),
8161                                    errors.ECODE_INVAL)
8162       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8163       if online_lvs:
8164         raise errors.OpPrereqError("Online logical volumes found, cannot"
8165                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8166                                    errors.ECODE_STATE)
8167       # update the size of disk based on what is found
8168       for dsk in self.disks:
8169         dsk[constants.IDISK_SIZE] = \
8170           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8171                                         dsk[constants.IDISK_ADOPT])][0]))
8172
8173     elif self.op.disk_template == constants.DT_BLOCK:
8174       # Normalize and de-duplicate device paths
8175       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8176                        for disk in self.disks])
8177       if len(all_disks) != len(self.disks):
8178         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8179                                    errors.ECODE_INVAL)
8180       baddisks = [d for d in all_disks
8181                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8182       if baddisks:
8183         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8184                                    " cannot be adopted" %
8185                                    (", ".join(baddisks),
8186                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8187                                    errors.ECODE_INVAL)
8188
8189       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8190                                             list(all_disks))[pnode.name]
8191       node_disks.Raise("Cannot get block device information from node %s" %
8192                        pnode.name)
8193       node_disks = node_disks.payload
8194       delta = all_disks.difference(node_disks.keys())
8195       if delta:
8196         raise errors.OpPrereqError("Missing block device(s): %s" %
8197                                    utils.CommaJoin(delta),
8198                                    errors.ECODE_INVAL)
8199       for dsk in self.disks:
8200         dsk[constants.IDISK_SIZE] = \
8201           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8202
8203     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8204
8205     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8206     # check OS parameters (remotely)
8207     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8208
8209     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8210
8211     # memory check on primary node
8212     if self.op.start:
8213       _CheckNodeFreeMemory(self, self.pnode.name,
8214                            "creating instance %s" % self.op.instance_name,
8215                            self.be_full[constants.BE_MEMORY],
8216                            self.op.hypervisor)
8217
8218     self.dry_run_result = list(nodenames)
8219
8220   def Exec(self, feedback_fn):
8221     """Create and add the instance to the cluster.
8222
8223     """
8224     instance = self.op.instance_name
8225     pnode_name = self.pnode.name
8226
8227     ht_kind = self.op.hypervisor
8228     if ht_kind in constants.HTS_REQ_PORT:
8229       network_port = self.cfg.AllocatePort()
8230     else:
8231       network_port = None
8232
8233     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8234       # this is needed because os.path.join does not accept None arguments
8235       if self.op.file_storage_dir is None:
8236         string_file_storage_dir = ""
8237       else:
8238         string_file_storage_dir = self.op.file_storage_dir
8239
8240       # build the full file storage dir path
8241       if self.op.disk_template == constants.DT_SHARED_FILE:
8242         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8243       else:
8244         get_fsd_fn = self.cfg.GetFileStorageDir
8245
8246       file_storage_dir = utils.PathJoin(get_fsd_fn(),
8247                                         string_file_storage_dir, instance)
8248     else:
8249       file_storage_dir = ""
8250
8251     disks = _GenerateDiskTemplate(self,
8252                                   self.op.disk_template,
8253                                   instance, pnode_name,
8254                                   self.secondaries,
8255                                   self.disks,
8256                                   file_storage_dir,
8257                                   self.op.file_driver,
8258                                   0,
8259                                   feedback_fn)
8260
8261     iobj = objects.Instance(name=instance, os=self.op.os_type,
8262                             primary_node=pnode_name,
8263                             nics=self.nics, disks=disks,
8264                             disk_template=self.op.disk_template,
8265                             admin_up=False,
8266                             network_port=network_port,
8267                             beparams=self.op.beparams,
8268                             hvparams=self.op.hvparams,
8269                             hypervisor=self.op.hypervisor,
8270                             osparams=self.op.osparams,
8271                             )
8272
8273     if self.adopt_disks:
8274       if self.op.disk_template == constants.DT_PLAIN:
8275         # rename LVs to the newly-generated names; we need to construct
8276         # 'fake' LV disks with the old data, plus the new unique_id
8277         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8278         rename_to = []
8279         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8280           rename_to.append(t_dsk.logical_id)
8281           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8282           self.cfg.SetDiskID(t_dsk, pnode_name)
8283         result = self.rpc.call_blockdev_rename(pnode_name,
8284                                                zip(tmp_disks, rename_to))
8285         result.Raise("Failed to rename adoped LVs")
8286     else:
8287       feedback_fn("* creating instance disks...")
8288       try:
8289         _CreateDisks(self, iobj)
8290       except errors.OpExecError:
8291         self.LogWarning("Device creation failed, reverting...")
8292         try:
8293           _RemoveDisks(self, iobj)
8294         finally:
8295           self.cfg.ReleaseDRBDMinors(instance)
8296           raise
8297
8298     feedback_fn("adding instance %s to cluster config" % instance)
8299
8300     self.cfg.AddInstance(iobj, self.proc.GetECId())
8301
8302     # Declare that we don't want to remove the instance lock anymore, as we've
8303     # added the instance to the config
8304     del self.remove_locks[locking.LEVEL_INSTANCE]
8305
8306     if self.op.mode == constants.INSTANCE_IMPORT:
8307       # Release unused nodes
8308       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8309     else:
8310       # Release all nodes
8311       _ReleaseLocks(self, locking.LEVEL_NODE)
8312
8313     disk_abort = False
8314     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8315       feedback_fn("* wiping instance disks...")
8316       try:
8317         _WipeDisks(self, iobj)
8318       except errors.OpExecError, err:
8319         logging.exception("Wiping disks failed")
8320         self.LogWarning("Wiping instance disks failed (%s)", err)
8321         disk_abort = True
8322
8323     if disk_abort:
8324       # Something is already wrong with the disks, don't do anything else
8325       pass
8326     elif self.op.wait_for_sync:
8327       disk_abort = not _WaitForSync(self, iobj)
8328     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8329       # make sure the disks are not degraded (still sync-ing is ok)
8330       time.sleep(15)
8331       feedback_fn("* checking mirrors status")
8332       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8333     else:
8334       disk_abort = False
8335
8336     if disk_abort:
8337       _RemoveDisks(self, iobj)
8338       self.cfg.RemoveInstance(iobj.name)
8339       # Make sure the instance lock gets removed
8340       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8341       raise errors.OpExecError("There are some degraded disks for"
8342                                " this instance")
8343
8344     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8345       if self.op.mode == constants.INSTANCE_CREATE:
8346         if not self.op.no_install:
8347           feedback_fn("* running the instance OS create scripts...")
8348           # FIXME: pass debug option from opcode to backend
8349           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8350                                                  self.op.debug_level)
8351           result.Raise("Could not add os for instance %s"
8352                        " on node %s" % (instance, pnode_name))
8353
8354       elif self.op.mode == constants.INSTANCE_IMPORT:
8355         feedback_fn("* running the instance OS import scripts...")
8356
8357         transfers = []
8358
8359         for idx, image in enumerate(self.src_images):
8360           if not image:
8361             continue
8362
8363           # FIXME: pass debug option from opcode to backend
8364           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8365                                              constants.IEIO_FILE, (image, ),
8366                                              constants.IEIO_SCRIPT,
8367                                              (iobj.disks[idx], idx),
8368                                              None)
8369           transfers.append(dt)
8370
8371         import_result = \
8372           masterd.instance.TransferInstanceData(self, feedback_fn,
8373                                                 self.op.src_node, pnode_name,
8374                                                 self.pnode.secondary_ip,
8375                                                 iobj, transfers)
8376         if not compat.all(import_result):
8377           self.LogWarning("Some disks for instance %s on node %s were not"
8378                           " imported successfully" % (instance, pnode_name))
8379
8380       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8381         feedback_fn("* preparing remote import...")
8382         # The source cluster will stop the instance before attempting to make a
8383         # connection. In some cases stopping an instance can take a long time,
8384         # hence the shutdown timeout is added to the connection timeout.
8385         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8386                            self.op.source_shutdown_timeout)
8387         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8388
8389         assert iobj.primary_node == self.pnode.name
8390         disk_results = \
8391           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8392                                         self.source_x509_ca,
8393                                         self._cds, timeouts)
8394         if not compat.all(disk_results):
8395           # TODO: Should the instance still be started, even if some disks
8396           # failed to import (valid for local imports, too)?
8397           self.LogWarning("Some disks for instance %s on node %s were not"
8398                           " imported successfully" % (instance, pnode_name))
8399
8400         # Run rename script on newly imported instance
8401         assert iobj.name == instance
8402         feedback_fn("Running rename script for %s" % instance)
8403         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8404                                                    self.source_instance_name,
8405                                                    self.op.debug_level)
8406         if result.fail_msg:
8407           self.LogWarning("Failed to run rename script for %s on node"
8408                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8409
8410       else:
8411         # also checked in the prereq part
8412         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8413                                      % self.op.mode)
8414
8415     if self.op.start:
8416       iobj.admin_up = True
8417       self.cfg.Update(iobj, feedback_fn)
8418       logging.info("Starting instance %s on node %s", instance, pnode_name)
8419       feedback_fn("* starting instance...")
8420       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8421       result.Raise("Could not start instance")
8422
8423     return list(iobj.all_nodes)
8424
8425
8426 class LUInstanceConsole(NoHooksLU):
8427   """Connect to an instance's console.
8428
8429   This is somewhat special in that it returns the command line that
8430   you need to run on the master node in order to connect to the
8431   console.
8432
8433   """
8434   REQ_BGL = False
8435
8436   def ExpandNames(self):
8437     self._ExpandAndLockInstance()
8438
8439   def CheckPrereq(self):
8440     """Check prerequisites.
8441
8442     This checks that the instance is in the cluster.
8443
8444     """
8445     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8446     assert self.instance is not None, \
8447       "Cannot retrieve locked instance %s" % self.op.instance_name
8448     _CheckNodeOnline(self, self.instance.primary_node)
8449
8450   def Exec(self, feedback_fn):
8451     """Connect to the console of an instance
8452
8453     """
8454     instance = self.instance
8455     node = instance.primary_node
8456
8457     node_insts = self.rpc.call_instance_list([node],
8458                                              [instance.hypervisor])[node]
8459     node_insts.Raise("Can't get node information from %s" % node)
8460
8461     if instance.name not in node_insts.payload:
8462       if instance.admin_up:
8463         state = constants.INSTST_ERRORDOWN
8464       else:
8465         state = constants.INSTST_ADMINDOWN
8466       raise errors.OpExecError("Instance %s is not running (state %s)" %
8467                                (instance.name, state))
8468
8469     logging.debug("Connecting to console of %s on %s", instance.name, node)
8470
8471     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8472
8473
8474 def _GetInstanceConsole(cluster, instance):
8475   """Returns console information for an instance.
8476
8477   @type cluster: L{objects.Cluster}
8478   @type instance: L{objects.Instance}
8479   @rtype: dict
8480
8481   """
8482   hyper = hypervisor.GetHypervisor(instance.hypervisor)
8483   # beparams and hvparams are passed separately, to avoid editing the
8484   # instance and then saving the defaults in the instance itself.
8485   hvparams = cluster.FillHV(instance)
8486   beparams = cluster.FillBE(instance)
8487   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8488
8489   assert console.instance == instance.name
8490   assert console.Validate()
8491
8492   return console.ToDict()
8493
8494
8495 class LUInstanceReplaceDisks(LogicalUnit):
8496   """Replace the disks of an instance.
8497
8498   """
8499   HPATH = "mirrors-replace"
8500   HTYPE = constants.HTYPE_INSTANCE
8501   REQ_BGL = False
8502
8503   def CheckArguments(self):
8504     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8505                                   self.op.iallocator)
8506
8507   def ExpandNames(self):
8508     self._ExpandAndLockInstance()
8509
8510     assert locking.LEVEL_NODE not in self.needed_locks
8511     assert locking.LEVEL_NODEGROUP not in self.needed_locks
8512
8513     assert self.op.iallocator is None or self.op.remote_node is None, \
8514       "Conflicting options"
8515
8516     if self.op.remote_node is not None:
8517       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8518
8519       # Warning: do not remove the locking of the new secondary here
8520       # unless DRBD8.AddChildren is changed to work in parallel;
8521       # currently it doesn't since parallel invocations of
8522       # FindUnusedMinor will conflict
8523       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8524       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8525     else:
8526       self.needed_locks[locking.LEVEL_NODE] = []
8527       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8528
8529       if self.op.iallocator is not None:
8530         # iallocator will select a new node in the same group
8531         self.needed_locks[locking.LEVEL_NODEGROUP] = []
8532
8533     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8534                                    self.op.iallocator, self.op.remote_node,
8535                                    self.op.disks, False, self.op.early_release)
8536
8537     self.tasklets = [self.replacer]
8538
8539   def DeclareLocks(self, level):
8540     if level == locking.LEVEL_NODEGROUP:
8541       assert self.op.remote_node is None
8542       assert self.op.iallocator is not None
8543       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8544
8545       self.share_locks[locking.LEVEL_NODEGROUP] = 1
8546       self.needed_locks[locking.LEVEL_NODEGROUP] = \
8547         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8548
8549     elif level == locking.LEVEL_NODE:
8550       if self.op.iallocator is not None:
8551         assert self.op.remote_node is None
8552         assert not self.needed_locks[locking.LEVEL_NODE]
8553
8554         # Lock member nodes of all locked groups
8555         self.needed_locks[locking.LEVEL_NODE] = [node_name
8556           for group_uuid in self.acquired_locks[locking.LEVEL_NODEGROUP]
8557           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8558       else:
8559         self._LockInstancesNodes()
8560
8561   def BuildHooksEnv(self):
8562     """Build hooks env.
8563
8564     This runs on the master, the primary and all the secondaries.
8565
8566     """
8567     instance = self.replacer.instance
8568     env = {
8569       "MODE": self.op.mode,
8570       "NEW_SECONDARY": self.op.remote_node,
8571       "OLD_SECONDARY": instance.secondary_nodes[0],
8572       }
8573     env.update(_BuildInstanceHookEnvByObject(self, instance))
8574     return env
8575
8576   def BuildHooksNodes(self):
8577     """Build hooks nodes.
8578
8579     """
8580     instance = self.replacer.instance
8581     nl = [
8582       self.cfg.GetMasterNode(),
8583       instance.primary_node,
8584       ]
8585     if self.op.remote_node is not None:
8586       nl.append(self.op.remote_node)
8587     return nl, nl
8588
8589   def CheckPrereq(self):
8590     """Check prerequisites.
8591
8592     """
8593     assert (locking.LEVEL_NODEGROUP in self.acquired_locks or
8594             self.op.iallocator is None)
8595
8596     if locking.LEVEL_NODEGROUP in self.acquired_locks:
8597       groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8598       prevgroups = self.acquired_locks[locking.LEVEL_NODEGROUP]
8599       if prevgroups != groups:
8600         raise errors.OpExecError("Node groups used by instance '%s' changed"
8601                                  " since lock was acquired, current list is %r,"
8602                                  " used to be '%s'" %
8603                                  (self.op.instance_name,
8604                                   utils.CommaJoin(groups),
8605                                   utils.CommaJoin(prevgroups)))
8606
8607     return LogicalUnit.CheckPrereq(self)
8608
8609
8610 class TLReplaceDisks(Tasklet):
8611   """Replaces disks for an instance.
8612
8613   Note: Locking is not within the scope of this class.
8614
8615   """
8616   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8617                disks, delay_iallocator, early_release):
8618     """Initializes this class.
8619
8620     """
8621     Tasklet.__init__(self, lu)
8622
8623     # Parameters
8624     self.instance_name = instance_name
8625     self.mode = mode
8626     self.iallocator_name = iallocator_name
8627     self.remote_node = remote_node
8628     self.disks = disks
8629     self.delay_iallocator = delay_iallocator
8630     self.early_release = early_release
8631
8632     # Runtime data
8633     self.instance = None
8634     self.new_node = None
8635     self.target_node = None
8636     self.other_node = None
8637     self.remote_node_info = None
8638     self.node_secondary_ip = None
8639
8640   @staticmethod
8641   def CheckArguments(mode, remote_node, iallocator):
8642     """Helper function for users of this class.
8643
8644     """
8645     # check for valid parameter combination
8646     if mode == constants.REPLACE_DISK_CHG:
8647       if remote_node is None and iallocator is None:
8648         raise errors.OpPrereqError("When changing the secondary either an"
8649                                    " iallocator script must be used or the"
8650                                    " new node given", errors.ECODE_INVAL)
8651
8652       if remote_node is not None and iallocator is not None:
8653         raise errors.OpPrereqError("Give either the iallocator or the new"
8654                                    " secondary, not both", errors.ECODE_INVAL)
8655
8656     elif remote_node is not None or iallocator is not None:
8657       # Not replacing the secondary
8658       raise errors.OpPrereqError("The iallocator and new node options can"
8659                                  " only be used when changing the"
8660                                  " secondary node", errors.ECODE_INVAL)
8661
8662   @staticmethod
8663   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8664     """Compute a new secondary node using an IAllocator.
8665
8666     """
8667     ial = IAllocator(lu.cfg, lu.rpc,
8668                      mode=constants.IALLOCATOR_MODE_RELOC,
8669                      name=instance_name,
8670                      relocate_from=relocate_from)
8671
8672     ial.Run(iallocator_name)
8673
8674     if not ial.success:
8675       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8676                                  " %s" % (iallocator_name, ial.info),
8677                                  errors.ECODE_NORES)
8678
8679     if len(ial.result) != ial.required_nodes:
8680       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8681                                  " of nodes (%s), required %s" %
8682                                  (iallocator_name,
8683                                   len(ial.result), ial.required_nodes),
8684                                  errors.ECODE_FAULT)
8685
8686     remote_node_name = ial.result[0]
8687
8688     lu.LogInfo("Selected new secondary for instance '%s': %s",
8689                instance_name, remote_node_name)
8690
8691     return remote_node_name
8692
8693   def _FindFaultyDisks(self, node_name):
8694     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8695                                     node_name, True)
8696
8697   def _CheckDisksActivated(self, instance):
8698     """Checks if the instance disks are activated.
8699
8700     @param instance: The instance to check disks
8701     @return: True if they are activated, False otherwise
8702
8703     """
8704     nodes = instance.all_nodes
8705
8706     for idx, dev in enumerate(instance.disks):
8707       for node in nodes:
8708         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8709         self.cfg.SetDiskID(dev, node)
8710
8711         result = self.rpc.call_blockdev_find(node, dev)
8712
8713         if result.offline:
8714           continue
8715         elif result.fail_msg or not result.payload:
8716           return False
8717
8718     return True
8719
8720   def CheckPrereq(self):
8721     """Check prerequisites.
8722
8723     This checks that the instance is in the cluster.
8724
8725     """
8726     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8727     assert instance is not None, \
8728       "Cannot retrieve locked instance %s" % self.instance_name
8729
8730     if instance.disk_template != constants.DT_DRBD8:
8731       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8732                                  " instances", errors.ECODE_INVAL)
8733
8734     if len(instance.secondary_nodes) != 1:
8735       raise errors.OpPrereqError("The instance has a strange layout,"
8736                                  " expected one secondary but found %d" %
8737                                  len(instance.secondary_nodes),
8738                                  errors.ECODE_FAULT)
8739
8740     if not self.delay_iallocator:
8741       self._CheckPrereq2()
8742
8743   def _CheckPrereq2(self):
8744     """Check prerequisites, second part.
8745
8746     This function should always be part of CheckPrereq. It was separated and is
8747     now called from Exec because during node evacuation iallocator was only
8748     called with an unmodified cluster model, not taking planned changes into
8749     account.
8750
8751     """
8752     instance = self.instance
8753     secondary_node = instance.secondary_nodes[0]
8754
8755     if self.iallocator_name is None:
8756       remote_node = self.remote_node
8757     else:
8758       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8759                                        instance.name, instance.secondary_nodes)
8760
8761     if remote_node is None:
8762       self.remote_node_info = None
8763     else:
8764       assert remote_node in self.lu.acquired_locks[locking.LEVEL_NODE], \
8765              "Remote node '%s' is not locked" % remote_node
8766
8767       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8768       assert self.remote_node_info is not None, \
8769         "Cannot retrieve locked node %s" % remote_node
8770
8771     if remote_node == self.instance.primary_node:
8772       raise errors.OpPrereqError("The specified node is the primary node of"
8773                                  " the instance", errors.ECODE_INVAL)
8774
8775     if remote_node == secondary_node:
8776       raise errors.OpPrereqError("The specified node is already the"
8777                                  " secondary node of the instance",
8778                                  errors.ECODE_INVAL)
8779
8780     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8781                                     constants.REPLACE_DISK_CHG):
8782       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8783                                  errors.ECODE_INVAL)
8784
8785     if self.mode == constants.REPLACE_DISK_AUTO:
8786       if not self._CheckDisksActivated(instance):
8787         raise errors.OpPrereqError("Please run activate-disks on instance %s"
8788                                    " first" % self.instance_name,
8789                                    errors.ECODE_STATE)
8790       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8791       faulty_secondary = self._FindFaultyDisks(secondary_node)
8792
8793       if faulty_primary and faulty_secondary:
8794         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8795                                    " one node and can not be repaired"
8796                                    " automatically" % self.instance_name,
8797                                    errors.ECODE_STATE)
8798
8799       if faulty_primary:
8800         self.disks = faulty_primary
8801         self.target_node = instance.primary_node
8802         self.other_node = secondary_node
8803         check_nodes = [self.target_node, self.other_node]
8804       elif faulty_secondary:
8805         self.disks = faulty_secondary
8806         self.target_node = secondary_node
8807         self.other_node = instance.primary_node
8808         check_nodes = [self.target_node, self.other_node]
8809       else:
8810         self.disks = []
8811         check_nodes = []
8812
8813     else:
8814       # Non-automatic modes
8815       if self.mode == constants.REPLACE_DISK_PRI:
8816         self.target_node = instance.primary_node
8817         self.other_node = secondary_node
8818         check_nodes = [self.target_node, self.other_node]
8819
8820       elif self.mode == constants.REPLACE_DISK_SEC:
8821         self.target_node = secondary_node
8822         self.other_node = instance.primary_node
8823         check_nodes = [self.target_node, self.other_node]
8824
8825       elif self.mode == constants.REPLACE_DISK_CHG:
8826         self.new_node = remote_node
8827         self.other_node = instance.primary_node
8828         self.target_node = secondary_node
8829         check_nodes = [self.new_node, self.other_node]
8830
8831         _CheckNodeNotDrained(self.lu, remote_node)
8832         _CheckNodeVmCapable(self.lu, remote_node)
8833
8834         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8835         assert old_node_info is not None
8836         if old_node_info.offline and not self.early_release:
8837           # doesn't make sense to delay the release
8838           self.early_release = True
8839           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8840                           " early-release mode", secondary_node)
8841
8842       else:
8843         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8844                                      self.mode)
8845
8846       # If not specified all disks should be replaced
8847       if not self.disks:
8848         self.disks = range(len(self.instance.disks))
8849
8850     for node in check_nodes:
8851       _CheckNodeOnline(self.lu, node)
8852
8853     touched_nodes = frozenset(node_name for node_name in [self.new_node,
8854                                                           self.other_node,
8855                                                           self.target_node]
8856                               if node_name is not None)
8857
8858     # Release unneeded node locks
8859     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
8860
8861     # Release any owned node group
8862     if self.lu.context.glm.is_owned(locking.LEVEL_NODEGROUP):
8863       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
8864
8865     # Check whether disks are valid
8866     for disk_idx in self.disks:
8867       instance.FindDisk(disk_idx)
8868
8869     # Get secondary node IP addresses
8870     self.node_secondary_ip = \
8871       dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
8872            for node_name in touched_nodes)
8873
8874   def Exec(self, feedback_fn):
8875     """Execute disk replacement.
8876
8877     This dispatches the disk replacement to the appropriate handler.
8878
8879     """
8880     if self.delay_iallocator:
8881       self._CheckPrereq2()
8882
8883     if __debug__:
8884       # Verify owned locks before starting operation
8885       owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8886       assert set(owned_locks) == set(self.node_secondary_ip), \
8887           ("Incorrect node locks, owning %s, expected %s" %
8888            (owned_locks, self.node_secondary_ip.keys()))
8889
8890       owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_INSTANCE)
8891       assert list(owned_locks) == [self.instance_name], \
8892           "Instance '%s' not locked" % self.instance_name
8893
8894       assert not self.lu.context.glm.is_owned(locking.LEVEL_NODEGROUP), \
8895           "Should not own any node group lock at this point"
8896
8897     if not self.disks:
8898       feedback_fn("No disks need replacement")
8899       return
8900
8901     feedback_fn("Replacing disk(s) %s for %s" %
8902                 (utils.CommaJoin(self.disks), self.instance.name))
8903
8904     activate_disks = (not self.instance.admin_up)
8905
8906     # Activate the instance disks if we're replacing them on a down instance
8907     if activate_disks:
8908       _StartInstanceDisks(self.lu, self.instance, True)
8909
8910     try:
8911       # Should we replace the secondary node?
8912       if self.new_node is not None:
8913         fn = self._ExecDrbd8Secondary
8914       else:
8915         fn = self._ExecDrbd8DiskOnly
8916
8917       result = fn(feedback_fn)
8918     finally:
8919       # Deactivate the instance disks if we're replacing them on a
8920       # down instance
8921       if activate_disks:
8922         _SafeShutdownInstanceDisks(self.lu, self.instance)
8923
8924     if __debug__:
8925       # Verify owned locks
8926       owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8927       nodes = frozenset(self.node_secondary_ip)
8928       assert ((self.early_release and not owned_locks) or
8929               (not self.early_release and not (set(owned_locks) - nodes))), \
8930         ("Not owning the correct locks, early_release=%s, owned=%r,"
8931          " nodes=%r" % (self.early_release, owned_locks, nodes))
8932
8933     return result
8934
8935   def _CheckVolumeGroup(self, nodes):
8936     self.lu.LogInfo("Checking volume groups")
8937
8938     vgname = self.cfg.GetVGName()
8939
8940     # Make sure volume group exists on all involved nodes
8941     results = self.rpc.call_vg_list(nodes)
8942     if not results:
8943       raise errors.OpExecError("Can't list volume groups on the nodes")
8944
8945     for node in nodes:
8946       res = results[node]
8947       res.Raise("Error checking node %s" % node)
8948       if vgname not in res.payload:
8949         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8950                                  (vgname, node))
8951
8952   def _CheckDisksExistence(self, nodes):
8953     # Check disk existence
8954     for idx, dev in enumerate(self.instance.disks):
8955       if idx not in self.disks:
8956         continue
8957
8958       for node in nodes:
8959         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8960         self.cfg.SetDiskID(dev, node)
8961
8962         result = self.rpc.call_blockdev_find(node, dev)
8963
8964         msg = result.fail_msg
8965         if msg or not result.payload:
8966           if not msg:
8967             msg = "disk not found"
8968           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8969                                    (idx, node, msg))
8970
8971   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8972     for idx, dev in enumerate(self.instance.disks):
8973       if idx not in self.disks:
8974         continue
8975
8976       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8977                       (idx, node_name))
8978
8979       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8980                                    ldisk=ldisk):
8981         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8982                                  " replace disks for instance %s" %
8983                                  (node_name, self.instance.name))
8984
8985   def _CreateNewStorage(self, node_name):
8986     iv_names = {}
8987
8988     for idx, dev in enumerate(self.instance.disks):
8989       if idx not in self.disks:
8990         continue
8991
8992       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8993
8994       self.cfg.SetDiskID(dev, node_name)
8995
8996       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8997       names = _GenerateUniqueNames(self.lu, lv_names)
8998
8999       vg_data = dev.children[0].logical_id[0]
9000       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9001                              logical_id=(vg_data, names[0]))
9002       vg_meta = dev.children[1].logical_id[0]
9003       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9004                              logical_id=(vg_meta, names[1]))
9005
9006       new_lvs = [lv_data, lv_meta]
9007       old_lvs = dev.children
9008       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9009
9010       # we pass force_create=True to force the LVM creation
9011       for new_lv in new_lvs:
9012         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9013                         _GetInstanceInfoText(self.instance), False)
9014
9015     return iv_names
9016
9017   def _CheckDevices(self, node_name, iv_names):
9018     for name, (dev, _, _) in iv_names.iteritems():
9019       self.cfg.SetDiskID(dev, node_name)
9020
9021       result = self.rpc.call_blockdev_find(node_name, dev)
9022
9023       msg = result.fail_msg
9024       if msg or not result.payload:
9025         if not msg:
9026           msg = "disk not found"
9027         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9028                                  (name, msg))
9029
9030       if result.payload.is_degraded:
9031         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9032
9033   def _RemoveOldStorage(self, node_name, iv_names):
9034     for name, (_, old_lvs, _) in iv_names.iteritems():
9035       self.lu.LogInfo("Remove logical volumes for %s" % name)
9036
9037       for lv in old_lvs:
9038         self.cfg.SetDiskID(lv, node_name)
9039
9040         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9041         if msg:
9042           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9043                              hint="remove unused LVs manually")
9044
9045   def _ExecDrbd8DiskOnly(self, feedback_fn):
9046     """Replace a disk on the primary or secondary for DRBD 8.
9047
9048     The algorithm for replace is quite complicated:
9049
9050       1. for each disk to be replaced:
9051
9052         1. create new LVs on the target node with unique names
9053         1. detach old LVs from the drbd device
9054         1. rename old LVs to name_replaced.<time_t>
9055         1. rename new LVs to old LVs
9056         1. attach the new LVs (with the old names now) to the drbd device
9057
9058       1. wait for sync across all devices
9059
9060       1. for each modified disk:
9061
9062         1. remove old LVs (which have the name name_replaces.<time_t>)
9063
9064     Failures are not very well handled.
9065
9066     """
9067     steps_total = 6
9068
9069     # Step: check device activation
9070     self.lu.LogStep(1, steps_total, "Check device existence")
9071     self._CheckDisksExistence([self.other_node, self.target_node])
9072     self._CheckVolumeGroup([self.target_node, self.other_node])
9073
9074     # Step: check other node consistency
9075     self.lu.LogStep(2, steps_total, "Check peer consistency")
9076     self._CheckDisksConsistency(self.other_node,
9077                                 self.other_node == self.instance.primary_node,
9078                                 False)
9079
9080     # Step: create new storage
9081     self.lu.LogStep(3, steps_total, "Allocate new storage")
9082     iv_names = self._CreateNewStorage(self.target_node)
9083
9084     # Step: for each lv, detach+rename*2+attach
9085     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9086     for dev, old_lvs, new_lvs in iv_names.itervalues():
9087       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9088
9089       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9090                                                      old_lvs)
9091       result.Raise("Can't detach drbd from local storage on node"
9092                    " %s for device %s" % (self.target_node, dev.iv_name))
9093       #dev.children = []
9094       #cfg.Update(instance)
9095
9096       # ok, we created the new LVs, so now we know we have the needed
9097       # storage; as such, we proceed on the target node to rename
9098       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9099       # using the assumption that logical_id == physical_id (which in
9100       # turn is the unique_id on that node)
9101
9102       # FIXME(iustin): use a better name for the replaced LVs
9103       temp_suffix = int(time.time())
9104       ren_fn = lambda d, suff: (d.physical_id[0],
9105                                 d.physical_id[1] + "_replaced-%s" % suff)
9106
9107       # Build the rename list based on what LVs exist on the node
9108       rename_old_to_new = []
9109       for to_ren in old_lvs:
9110         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9111         if not result.fail_msg and result.payload:
9112           # device exists
9113           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9114
9115       self.lu.LogInfo("Renaming the old LVs on the target node")
9116       result = self.rpc.call_blockdev_rename(self.target_node,
9117                                              rename_old_to_new)
9118       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9119
9120       # Now we rename the new LVs to the old LVs
9121       self.lu.LogInfo("Renaming the new LVs on the target node")
9122       rename_new_to_old = [(new, old.physical_id)
9123                            for old, new in zip(old_lvs, new_lvs)]
9124       result = self.rpc.call_blockdev_rename(self.target_node,
9125                                              rename_new_to_old)
9126       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9127
9128       for old, new in zip(old_lvs, new_lvs):
9129         new.logical_id = old.logical_id
9130         self.cfg.SetDiskID(new, self.target_node)
9131
9132       for disk in old_lvs:
9133         disk.logical_id = ren_fn(disk, temp_suffix)
9134         self.cfg.SetDiskID(disk, self.target_node)
9135
9136       # Now that the new lvs have the old name, we can add them to the device
9137       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9138       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9139                                                   new_lvs)
9140       msg = result.fail_msg
9141       if msg:
9142         for new_lv in new_lvs:
9143           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9144                                                new_lv).fail_msg
9145           if msg2:
9146             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9147                                hint=("cleanup manually the unused logical"
9148                                      "volumes"))
9149         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9150
9151       dev.children = new_lvs
9152
9153       self.cfg.Update(self.instance, feedback_fn)
9154
9155     cstep = 5
9156     if self.early_release:
9157       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9158       cstep += 1
9159       self._RemoveOldStorage(self.target_node, iv_names)
9160       # WARNING: we release both node locks here, do not do other RPCs
9161       # than WaitForSync to the primary node
9162       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9163                     names=[self.target_node, self.other_node])
9164
9165     # Wait for sync
9166     # This can fail as the old devices are degraded and _WaitForSync
9167     # does a combined result over all disks, so we don't check its return value
9168     self.lu.LogStep(cstep, steps_total, "Sync devices")
9169     cstep += 1
9170     _WaitForSync(self.lu, self.instance)
9171
9172     # Check all devices manually
9173     self._CheckDevices(self.instance.primary_node, iv_names)
9174
9175     # Step: remove old storage
9176     if not self.early_release:
9177       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9178       cstep += 1
9179       self._RemoveOldStorage(self.target_node, iv_names)
9180
9181   def _ExecDrbd8Secondary(self, feedback_fn):
9182     """Replace the secondary node for DRBD 8.
9183
9184     The algorithm for replace is quite complicated:
9185       - for all disks of the instance:
9186         - create new LVs on the new node with same names
9187         - shutdown the drbd device on the old secondary
9188         - disconnect the drbd network on the primary
9189         - create the drbd device on the new secondary
9190         - network attach the drbd on the primary, using an artifice:
9191           the drbd code for Attach() will connect to the network if it
9192           finds a device which is connected to the good local disks but
9193           not network enabled
9194       - wait for sync across all devices
9195       - remove all disks from the old secondary
9196
9197     Failures are not very well handled.
9198
9199     """
9200     steps_total = 6
9201
9202     # Step: check device activation
9203     self.lu.LogStep(1, steps_total, "Check device existence")
9204     self._CheckDisksExistence([self.instance.primary_node])
9205     self._CheckVolumeGroup([self.instance.primary_node])
9206
9207     # Step: check other node consistency
9208     self.lu.LogStep(2, steps_total, "Check peer consistency")
9209     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9210
9211     # Step: create new storage
9212     self.lu.LogStep(3, steps_total, "Allocate new storage")
9213     for idx, dev in enumerate(self.instance.disks):
9214       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9215                       (self.new_node, idx))
9216       # we pass force_create=True to force LVM creation
9217       for new_lv in dev.children:
9218         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9219                         _GetInstanceInfoText(self.instance), False)
9220
9221     # Step 4: dbrd minors and drbd setups changes
9222     # after this, we must manually remove the drbd minors on both the
9223     # error and the success paths
9224     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9225     minors = self.cfg.AllocateDRBDMinor([self.new_node
9226                                          for dev in self.instance.disks],
9227                                         self.instance.name)
9228     logging.debug("Allocated minors %r", minors)
9229
9230     iv_names = {}
9231     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9232       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9233                       (self.new_node, idx))
9234       # create new devices on new_node; note that we create two IDs:
9235       # one without port, so the drbd will be activated without
9236       # networking information on the new node at this stage, and one
9237       # with network, for the latter activation in step 4
9238       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9239       if self.instance.primary_node == o_node1:
9240         p_minor = o_minor1
9241       else:
9242         assert self.instance.primary_node == o_node2, "Three-node instance?"
9243         p_minor = o_minor2
9244
9245       new_alone_id = (self.instance.primary_node, self.new_node, None,
9246                       p_minor, new_minor, o_secret)
9247       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9248                     p_minor, new_minor, o_secret)
9249
9250       iv_names[idx] = (dev, dev.children, new_net_id)
9251       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9252                     new_net_id)
9253       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9254                               logical_id=new_alone_id,
9255                               children=dev.children,
9256                               size=dev.size)
9257       try:
9258         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9259                               _GetInstanceInfoText(self.instance), False)
9260       except errors.GenericError:
9261         self.cfg.ReleaseDRBDMinors(self.instance.name)
9262         raise
9263
9264     # We have new devices, shutdown the drbd on the old secondary
9265     for idx, dev in enumerate(self.instance.disks):
9266       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9267       self.cfg.SetDiskID(dev, self.target_node)
9268       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9269       if msg:
9270         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9271                            "node: %s" % (idx, msg),
9272                            hint=("Please cleanup this device manually as"
9273                                  " soon as possible"))
9274
9275     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9276     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9277                                                self.node_secondary_ip,
9278                                                self.instance.disks)\
9279                                               [self.instance.primary_node]
9280
9281     msg = result.fail_msg
9282     if msg:
9283       # detaches didn't succeed (unlikely)
9284       self.cfg.ReleaseDRBDMinors(self.instance.name)
9285       raise errors.OpExecError("Can't detach the disks from the network on"
9286                                " old node: %s" % (msg,))
9287
9288     # if we managed to detach at least one, we update all the disks of
9289     # the instance to point to the new secondary
9290     self.lu.LogInfo("Updating instance configuration")
9291     for dev, _, new_logical_id in iv_names.itervalues():
9292       dev.logical_id = new_logical_id
9293       self.cfg.SetDiskID(dev, self.instance.primary_node)
9294
9295     self.cfg.Update(self.instance, feedback_fn)
9296
9297     # and now perform the drbd attach
9298     self.lu.LogInfo("Attaching primary drbds to new secondary"
9299                     " (standalone => connected)")
9300     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9301                                             self.new_node],
9302                                            self.node_secondary_ip,
9303                                            self.instance.disks,
9304                                            self.instance.name,
9305                                            False)
9306     for to_node, to_result in result.items():
9307       msg = to_result.fail_msg
9308       if msg:
9309         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9310                            to_node, msg,
9311                            hint=("please do a gnt-instance info to see the"
9312                                  " status of disks"))
9313     cstep = 5
9314     if self.early_release:
9315       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9316       cstep += 1
9317       self._RemoveOldStorage(self.target_node, iv_names)
9318       # WARNING: we release all node locks here, do not do other RPCs
9319       # than WaitForSync to the primary node
9320       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9321                     names=[self.instance.primary_node,
9322                            self.target_node,
9323                            self.new_node])
9324
9325     # Wait for sync
9326     # This can fail as the old devices are degraded and _WaitForSync
9327     # does a combined result over all disks, so we don't check its return value
9328     self.lu.LogStep(cstep, steps_total, "Sync devices")
9329     cstep += 1
9330     _WaitForSync(self.lu, self.instance)
9331
9332     # Check all devices manually
9333     self._CheckDevices(self.instance.primary_node, iv_names)
9334
9335     # Step: remove old storage
9336     if not self.early_release:
9337       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9338       self._RemoveOldStorage(self.target_node, iv_names)
9339
9340
9341 class LURepairNodeStorage(NoHooksLU):
9342   """Repairs the volume group on a node.
9343
9344   """
9345   REQ_BGL = False
9346
9347   def CheckArguments(self):
9348     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9349
9350     storage_type = self.op.storage_type
9351
9352     if (constants.SO_FIX_CONSISTENCY not in
9353         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9354       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9355                                  " repaired" % storage_type,
9356                                  errors.ECODE_INVAL)
9357
9358   def ExpandNames(self):
9359     self.needed_locks = {
9360       locking.LEVEL_NODE: [self.op.node_name],
9361       }
9362
9363   def _CheckFaultyDisks(self, instance, node_name):
9364     """Ensure faulty disks abort the opcode or at least warn."""
9365     try:
9366       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9367                                   node_name, True):
9368         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9369                                    " node '%s'" % (instance.name, node_name),
9370                                    errors.ECODE_STATE)
9371     except errors.OpPrereqError, err:
9372       if self.op.ignore_consistency:
9373         self.proc.LogWarning(str(err.args[0]))
9374       else:
9375         raise
9376
9377   def CheckPrereq(self):
9378     """Check prerequisites.
9379
9380     """
9381     # Check whether any instance on this node has faulty disks
9382     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9383       if not inst.admin_up:
9384         continue
9385       check_nodes = set(inst.all_nodes)
9386       check_nodes.discard(self.op.node_name)
9387       for inst_node_name in check_nodes:
9388         self._CheckFaultyDisks(inst, inst_node_name)
9389
9390   def Exec(self, feedback_fn):
9391     feedback_fn("Repairing storage unit '%s' on %s ..." %
9392                 (self.op.name, self.op.node_name))
9393
9394     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9395     result = self.rpc.call_storage_execute(self.op.node_name,
9396                                            self.op.storage_type, st_args,
9397                                            self.op.name,
9398                                            constants.SO_FIX_CONSISTENCY)
9399     result.Raise("Failed to repair storage unit '%s' on %s" %
9400                  (self.op.name, self.op.node_name))
9401
9402
9403 class LUNodeEvacStrategy(NoHooksLU):
9404   """Computes the node evacuation strategy.
9405
9406   """
9407   REQ_BGL = False
9408
9409   def CheckArguments(self):
9410     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9411
9412   def ExpandNames(self):
9413     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9414     self.needed_locks = locks = {}
9415     if self.op.remote_node is None:
9416       locks[locking.LEVEL_NODE] = locking.ALL_SET
9417     else:
9418       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9419       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9420
9421   def Exec(self, feedback_fn):
9422     if self.op.remote_node is not None:
9423       instances = []
9424       for node in self.op.nodes:
9425         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9426       result = []
9427       for i in instances:
9428         if i.primary_node == self.op.remote_node:
9429           raise errors.OpPrereqError("Node %s is the primary node of"
9430                                      " instance %s, cannot use it as"
9431                                      " secondary" %
9432                                      (self.op.remote_node, i.name),
9433                                      errors.ECODE_INVAL)
9434         result.append([i.name, self.op.remote_node])
9435     else:
9436       ial = IAllocator(self.cfg, self.rpc,
9437                        mode=constants.IALLOCATOR_MODE_MEVAC,
9438                        evac_nodes=self.op.nodes)
9439       ial.Run(self.op.iallocator, validate=True)
9440       if not ial.success:
9441         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9442                                  errors.ECODE_NORES)
9443       result = ial.result
9444     return result
9445
9446
9447 class LUInstanceGrowDisk(LogicalUnit):
9448   """Grow a disk of an instance.
9449
9450   """
9451   HPATH = "disk-grow"
9452   HTYPE = constants.HTYPE_INSTANCE
9453   REQ_BGL = False
9454
9455   def ExpandNames(self):
9456     self._ExpandAndLockInstance()
9457     self.needed_locks[locking.LEVEL_NODE] = []
9458     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9459
9460   def DeclareLocks(self, level):
9461     if level == locking.LEVEL_NODE:
9462       self._LockInstancesNodes()
9463
9464   def BuildHooksEnv(self):
9465     """Build hooks env.
9466
9467     This runs on the master, the primary and all the secondaries.
9468
9469     """
9470     env = {
9471       "DISK": self.op.disk,
9472       "AMOUNT": self.op.amount,
9473       }
9474     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9475     return env
9476
9477   def BuildHooksNodes(self):
9478     """Build hooks nodes.
9479
9480     """
9481     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9482     return (nl, nl)
9483
9484   def CheckPrereq(self):
9485     """Check prerequisites.
9486
9487     This checks that the instance is in the cluster.
9488
9489     """
9490     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9491     assert instance is not None, \
9492       "Cannot retrieve locked instance %s" % self.op.instance_name
9493     nodenames = list(instance.all_nodes)
9494     for node in nodenames:
9495       _CheckNodeOnline(self, node)
9496
9497     self.instance = instance
9498
9499     if instance.disk_template not in constants.DTS_GROWABLE:
9500       raise errors.OpPrereqError("Instance's disk layout does not support"
9501                                  " growing", errors.ECODE_INVAL)
9502
9503     self.disk = instance.FindDisk(self.op.disk)
9504
9505     if instance.disk_template not in (constants.DT_FILE,
9506                                       constants.DT_SHARED_FILE):
9507       # TODO: check the free disk space for file, when that feature will be
9508       # supported
9509       _CheckNodesFreeDiskPerVG(self, nodenames,
9510                                self.disk.ComputeGrowth(self.op.amount))
9511
9512   def Exec(self, feedback_fn):
9513     """Execute disk grow.
9514
9515     """
9516     instance = self.instance
9517     disk = self.disk
9518
9519     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9520     if not disks_ok:
9521       raise errors.OpExecError("Cannot activate block device to grow")
9522
9523     for node in instance.all_nodes:
9524       self.cfg.SetDiskID(disk, node)
9525       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9526       result.Raise("Grow request failed to node %s" % node)
9527
9528       # TODO: Rewrite code to work properly
9529       # DRBD goes into sync mode for a short amount of time after executing the
9530       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9531       # calling "resize" in sync mode fails. Sleeping for a short amount of
9532       # time is a work-around.
9533       time.sleep(5)
9534
9535     disk.RecordGrow(self.op.amount)
9536     self.cfg.Update(instance, feedback_fn)
9537     if self.op.wait_for_sync:
9538       disk_abort = not _WaitForSync(self, instance, disks=[disk])
9539       if disk_abort:
9540         self.proc.LogWarning("Disk sync-ing has not returned a good"
9541                              " status; please check the instance")
9542       if not instance.admin_up:
9543         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9544     elif not instance.admin_up:
9545       self.proc.LogWarning("Not shutting down the disk even if the instance is"
9546                            " not supposed to be running because no wait for"
9547                            " sync mode was requested")
9548
9549
9550 class LUInstanceQueryData(NoHooksLU):
9551   """Query runtime instance data.
9552
9553   """
9554   REQ_BGL = False
9555
9556   def ExpandNames(self):
9557     self.needed_locks = {}
9558
9559     # Use locking if requested or when non-static information is wanted
9560     if not (self.op.static or self.op.use_locking):
9561       self.LogWarning("Non-static data requested, locks need to be acquired")
9562       self.op.use_locking = True
9563
9564     if self.op.instances or not self.op.use_locking:
9565       # Expand instance names right here
9566       self.wanted_names = _GetWantedInstances(self, self.op.instances)
9567     else:
9568       # Will use acquired locks
9569       self.wanted_names = None
9570
9571     if self.op.use_locking:
9572       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9573
9574       if self.wanted_names is None:
9575         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9576       else:
9577         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9578
9579       self.needed_locks[locking.LEVEL_NODE] = []
9580       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9581       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9582
9583   def DeclareLocks(self, level):
9584     if self.op.use_locking and level == locking.LEVEL_NODE:
9585       self._LockInstancesNodes()
9586
9587   def CheckPrereq(self):
9588     """Check prerequisites.
9589
9590     This only checks the optional instance list against the existing names.
9591
9592     """
9593     if self.wanted_names is None:
9594       assert self.op.use_locking, "Locking was not used"
9595       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9596
9597     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9598                              for name in self.wanted_names]
9599
9600   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9601     """Returns the status of a block device
9602
9603     """
9604     if self.op.static or not node:
9605       return None
9606
9607     self.cfg.SetDiskID(dev, node)
9608
9609     result = self.rpc.call_blockdev_find(node, dev)
9610     if result.offline:
9611       return None
9612
9613     result.Raise("Can't compute disk status for %s" % instance_name)
9614
9615     status = result.payload
9616     if status is None:
9617       return None
9618
9619     return (status.dev_path, status.major, status.minor,
9620             status.sync_percent, status.estimated_time,
9621             status.is_degraded, status.ldisk_status)
9622
9623   def _ComputeDiskStatus(self, instance, snode, dev):
9624     """Compute block device status.
9625
9626     """
9627     if dev.dev_type in constants.LDS_DRBD:
9628       # we change the snode then (otherwise we use the one passed in)
9629       if dev.logical_id[0] == instance.primary_node:
9630         snode = dev.logical_id[1]
9631       else:
9632         snode = dev.logical_id[0]
9633
9634     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9635                                               instance.name, dev)
9636     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9637
9638     if dev.children:
9639       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9640                       for child in dev.children]
9641     else:
9642       dev_children = []
9643
9644     return {
9645       "iv_name": dev.iv_name,
9646       "dev_type": dev.dev_type,
9647       "logical_id": dev.logical_id,
9648       "physical_id": dev.physical_id,
9649       "pstatus": dev_pstatus,
9650       "sstatus": dev_sstatus,
9651       "children": dev_children,
9652       "mode": dev.mode,
9653       "size": dev.size,
9654       }
9655
9656   def Exec(self, feedback_fn):
9657     """Gather and return data"""
9658     result = {}
9659
9660     cluster = self.cfg.GetClusterInfo()
9661
9662     for instance in self.wanted_instances:
9663       if not self.op.static:
9664         remote_info = self.rpc.call_instance_info(instance.primary_node,
9665                                                   instance.name,
9666                                                   instance.hypervisor)
9667         remote_info.Raise("Error checking node %s" % instance.primary_node)
9668         remote_info = remote_info.payload
9669         if remote_info and "state" in remote_info:
9670           remote_state = "up"
9671         else:
9672           remote_state = "down"
9673       else:
9674         remote_state = None
9675       if instance.admin_up:
9676         config_state = "up"
9677       else:
9678         config_state = "down"
9679
9680       disks = [self._ComputeDiskStatus(instance, None, device)
9681                for device in instance.disks]
9682
9683       result[instance.name] = {
9684         "name": instance.name,
9685         "config_state": config_state,
9686         "run_state": remote_state,
9687         "pnode": instance.primary_node,
9688         "snodes": instance.secondary_nodes,
9689         "os": instance.os,
9690         # this happens to be the same format used for hooks
9691         "nics": _NICListToTuple(self, instance.nics),
9692         "disk_template": instance.disk_template,
9693         "disks": disks,
9694         "hypervisor": instance.hypervisor,
9695         "network_port": instance.network_port,
9696         "hv_instance": instance.hvparams,
9697         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9698         "be_instance": instance.beparams,
9699         "be_actual": cluster.FillBE(instance),
9700         "os_instance": instance.osparams,
9701         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9702         "serial_no": instance.serial_no,
9703         "mtime": instance.mtime,
9704         "ctime": instance.ctime,
9705         "uuid": instance.uuid,
9706         }
9707
9708     return result
9709
9710
9711 class LUInstanceSetParams(LogicalUnit):
9712   """Modifies an instances's parameters.
9713
9714   """
9715   HPATH = "instance-modify"
9716   HTYPE = constants.HTYPE_INSTANCE
9717   REQ_BGL = False
9718
9719   def CheckArguments(self):
9720     if not (self.op.nics or self.op.disks or self.op.disk_template or
9721             self.op.hvparams or self.op.beparams or self.op.os_name):
9722       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9723
9724     if self.op.hvparams:
9725       _CheckGlobalHvParams(self.op.hvparams)
9726
9727     # Disk validation
9728     disk_addremove = 0
9729     for disk_op, disk_dict in self.op.disks:
9730       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9731       if disk_op == constants.DDM_REMOVE:
9732         disk_addremove += 1
9733         continue
9734       elif disk_op == constants.DDM_ADD:
9735         disk_addremove += 1
9736       else:
9737         if not isinstance(disk_op, int):
9738           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9739         if not isinstance(disk_dict, dict):
9740           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9741           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9742
9743       if disk_op == constants.DDM_ADD:
9744         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9745         if mode not in constants.DISK_ACCESS_SET:
9746           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9747                                      errors.ECODE_INVAL)
9748         size = disk_dict.get(constants.IDISK_SIZE, None)
9749         if size is None:
9750           raise errors.OpPrereqError("Required disk parameter size missing",
9751                                      errors.ECODE_INVAL)
9752         try:
9753           size = int(size)
9754         except (TypeError, ValueError), err:
9755           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9756                                      str(err), errors.ECODE_INVAL)
9757         disk_dict[constants.IDISK_SIZE] = size
9758       else:
9759         # modification of disk
9760         if constants.IDISK_SIZE in disk_dict:
9761           raise errors.OpPrereqError("Disk size change not possible, use"
9762                                      " grow-disk", errors.ECODE_INVAL)
9763
9764     if disk_addremove > 1:
9765       raise errors.OpPrereqError("Only one disk add or remove operation"
9766                                  " supported at a time", errors.ECODE_INVAL)
9767
9768     if self.op.disks and self.op.disk_template is not None:
9769       raise errors.OpPrereqError("Disk template conversion and other disk"
9770                                  " changes not supported at the same time",
9771                                  errors.ECODE_INVAL)
9772
9773     if (self.op.disk_template and
9774         self.op.disk_template in constants.DTS_INT_MIRROR and
9775         self.op.remote_node is None):
9776       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9777                                  " one requires specifying a secondary node",
9778                                  errors.ECODE_INVAL)
9779
9780     # NIC validation
9781     nic_addremove = 0
9782     for nic_op, nic_dict in self.op.nics:
9783       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9784       if nic_op == constants.DDM_REMOVE:
9785         nic_addremove += 1
9786         continue
9787       elif nic_op == constants.DDM_ADD:
9788         nic_addremove += 1
9789       else:
9790         if not isinstance(nic_op, int):
9791           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9792         if not isinstance(nic_dict, dict):
9793           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9794           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9795
9796       # nic_dict should be a dict
9797       nic_ip = nic_dict.get(constants.INIC_IP, None)
9798       if nic_ip is not None:
9799         if nic_ip.lower() == constants.VALUE_NONE:
9800           nic_dict[constants.INIC_IP] = None
9801         else:
9802           if not netutils.IPAddress.IsValid(nic_ip):
9803             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9804                                        errors.ECODE_INVAL)
9805
9806       nic_bridge = nic_dict.get('bridge', None)
9807       nic_link = nic_dict.get(constants.INIC_LINK, None)
9808       if nic_bridge and nic_link:
9809         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9810                                    " at the same time", errors.ECODE_INVAL)
9811       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9812         nic_dict['bridge'] = None
9813       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9814         nic_dict[constants.INIC_LINK] = None
9815
9816       if nic_op == constants.DDM_ADD:
9817         nic_mac = nic_dict.get(constants.INIC_MAC, None)
9818         if nic_mac is None:
9819           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9820
9821       if constants.INIC_MAC in nic_dict:
9822         nic_mac = nic_dict[constants.INIC_MAC]
9823         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9824           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9825
9826         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9827           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9828                                      " modifying an existing nic",
9829                                      errors.ECODE_INVAL)
9830
9831     if nic_addremove > 1:
9832       raise errors.OpPrereqError("Only one NIC add or remove operation"
9833                                  " supported at a time", errors.ECODE_INVAL)
9834
9835   def ExpandNames(self):
9836     self._ExpandAndLockInstance()
9837     self.needed_locks[locking.LEVEL_NODE] = []
9838     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9839
9840   def DeclareLocks(self, level):
9841     if level == locking.LEVEL_NODE:
9842       self._LockInstancesNodes()
9843       if self.op.disk_template and self.op.remote_node:
9844         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9845         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9846
9847   def BuildHooksEnv(self):
9848     """Build hooks env.
9849
9850     This runs on the master, primary and secondaries.
9851
9852     """
9853     args = dict()
9854     if constants.BE_MEMORY in self.be_new:
9855       args['memory'] = self.be_new[constants.BE_MEMORY]
9856     if constants.BE_VCPUS in self.be_new:
9857       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9858     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9859     # information at all.
9860     if self.op.nics:
9861       args['nics'] = []
9862       nic_override = dict(self.op.nics)
9863       for idx, nic in enumerate(self.instance.nics):
9864         if idx in nic_override:
9865           this_nic_override = nic_override[idx]
9866         else:
9867           this_nic_override = {}
9868         if constants.INIC_IP in this_nic_override:
9869           ip = this_nic_override[constants.INIC_IP]
9870         else:
9871           ip = nic.ip
9872         if constants.INIC_MAC in this_nic_override:
9873           mac = this_nic_override[constants.INIC_MAC]
9874         else:
9875           mac = nic.mac
9876         if idx in self.nic_pnew:
9877           nicparams = self.nic_pnew[idx]
9878         else:
9879           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9880         mode = nicparams[constants.NIC_MODE]
9881         link = nicparams[constants.NIC_LINK]
9882         args['nics'].append((ip, mac, mode, link))
9883       if constants.DDM_ADD in nic_override:
9884         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9885         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9886         nicparams = self.nic_pnew[constants.DDM_ADD]
9887         mode = nicparams[constants.NIC_MODE]
9888         link = nicparams[constants.NIC_LINK]
9889         args['nics'].append((ip, mac, mode, link))
9890       elif constants.DDM_REMOVE in nic_override:
9891         del args['nics'][-1]
9892
9893     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9894     if self.op.disk_template:
9895       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9896
9897     return env
9898
9899   def BuildHooksNodes(self):
9900     """Build hooks nodes.
9901
9902     """
9903     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9904     return (nl, nl)
9905
9906   def CheckPrereq(self):
9907     """Check prerequisites.
9908
9909     This only checks the instance list against the existing names.
9910
9911     """
9912     # checking the new params on the primary/secondary nodes
9913
9914     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9915     cluster = self.cluster = self.cfg.GetClusterInfo()
9916     assert self.instance is not None, \
9917       "Cannot retrieve locked instance %s" % self.op.instance_name
9918     pnode = instance.primary_node
9919     nodelist = list(instance.all_nodes)
9920
9921     # OS change
9922     if self.op.os_name and not self.op.force:
9923       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9924                       self.op.force_variant)
9925       instance_os = self.op.os_name
9926     else:
9927       instance_os = instance.os
9928
9929     if self.op.disk_template:
9930       if instance.disk_template == self.op.disk_template:
9931         raise errors.OpPrereqError("Instance already has disk template %s" %
9932                                    instance.disk_template, errors.ECODE_INVAL)
9933
9934       if (instance.disk_template,
9935           self.op.disk_template) not in self._DISK_CONVERSIONS:
9936         raise errors.OpPrereqError("Unsupported disk template conversion from"
9937                                    " %s to %s" % (instance.disk_template,
9938                                                   self.op.disk_template),
9939                                    errors.ECODE_INVAL)
9940       _CheckInstanceDown(self, instance, "cannot change disk template")
9941       if self.op.disk_template in constants.DTS_INT_MIRROR:
9942         if self.op.remote_node == pnode:
9943           raise errors.OpPrereqError("Given new secondary node %s is the same"
9944                                      " as the primary node of the instance" %
9945                                      self.op.remote_node, errors.ECODE_STATE)
9946         _CheckNodeOnline(self, self.op.remote_node)
9947         _CheckNodeNotDrained(self, self.op.remote_node)
9948         # FIXME: here we assume that the old instance type is DT_PLAIN
9949         assert instance.disk_template == constants.DT_PLAIN
9950         disks = [{constants.IDISK_SIZE: d.size,
9951                   constants.IDISK_VG: d.logical_id[0]}
9952                  for d in instance.disks]
9953         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9954         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9955
9956     # hvparams processing
9957     if self.op.hvparams:
9958       hv_type = instance.hypervisor
9959       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9960       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9961       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9962
9963       # local check
9964       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9965       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9966       self.hv_new = hv_new # the new actual values
9967       self.hv_inst = i_hvdict # the new dict (without defaults)
9968     else:
9969       self.hv_new = self.hv_inst = {}
9970
9971     # beparams processing
9972     if self.op.beparams:
9973       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9974                                    use_none=True)
9975       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9976       be_new = cluster.SimpleFillBE(i_bedict)
9977       self.be_new = be_new # the new actual values
9978       self.be_inst = i_bedict # the new dict (without defaults)
9979     else:
9980       self.be_new = self.be_inst = {}
9981
9982     # osparams processing
9983     if self.op.osparams:
9984       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9985       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9986       self.os_inst = i_osdict # the new dict (without defaults)
9987     else:
9988       self.os_inst = {}
9989
9990     self.warn = []
9991
9992     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9993       mem_check_list = [pnode]
9994       if be_new[constants.BE_AUTO_BALANCE]:
9995         # either we changed auto_balance to yes or it was from before
9996         mem_check_list.extend(instance.secondary_nodes)
9997       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9998                                                   instance.hypervisor)
9999       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10000                                          instance.hypervisor)
10001       pninfo = nodeinfo[pnode]
10002       msg = pninfo.fail_msg
10003       if msg:
10004         # Assume the primary node is unreachable and go ahead
10005         self.warn.append("Can't get info from primary node %s: %s" %
10006                          (pnode,  msg))
10007       elif not isinstance(pninfo.payload.get('memory_free', None), int):
10008         self.warn.append("Node data from primary node %s doesn't contain"
10009                          " free memory information" % pnode)
10010       elif instance_info.fail_msg:
10011         self.warn.append("Can't get instance runtime information: %s" %
10012                         instance_info.fail_msg)
10013       else:
10014         if instance_info.payload:
10015           current_mem = int(instance_info.payload['memory'])
10016         else:
10017           # Assume instance not running
10018           # (there is a slight race condition here, but it's not very probable,
10019           # and we have no other way to check)
10020           current_mem = 0
10021         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10022                     pninfo.payload['memory_free'])
10023         if miss_mem > 0:
10024           raise errors.OpPrereqError("This change will prevent the instance"
10025                                      " from starting, due to %d MB of memory"
10026                                      " missing on its primary node" % miss_mem,
10027                                      errors.ECODE_NORES)
10028
10029       if be_new[constants.BE_AUTO_BALANCE]:
10030         for node, nres in nodeinfo.items():
10031           if node not in instance.secondary_nodes:
10032             continue
10033           msg = nres.fail_msg
10034           if msg:
10035             self.warn.append("Can't get info from secondary node %s: %s" %
10036                              (node, msg))
10037           elif not isinstance(nres.payload.get('memory_free', None), int):
10038             self.warn.append("Secondary node %s didn't return free"
10039                              " memory information" % node)
10040           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10041             self.warn.append("Not enough memory to failover instance to"
10042                              " secondary node %s" % node)
10043
10044     # NIC processing
10045     self.nic_pnew = {}
10046     self.nic_pinst = {}
10047     for nic_op, nic_dict in self.op.nics:
10048       if nic_op == constants.DDM_REMOVE:
10049         if not instance.nics:
10050           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10051                                      errors.ECODE_INVAL)
10052         continue
10053       if nic_op != constants.DDM_ADD:
10054         # an existing nic
10055         if not instance.nics:
10056           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10057                                      " no NICs" % nic_op,
10058                                      errors.ECODE_INVAL)
10059         if nic_op < 0 or nic_op >= len(instance.nics):
10060           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10061                                      " are 0 to %d" %
10062                                      (nic_op, len(instance.nics) - 1),
10063                                      errors.ECODE_INVAL)
10064         old_nic_params = instance.nics[nic_op].nicparams
10065         old_nic_ip = instance.nics[nic_op].ip
10066       else:
10067         old_nic_params = {}
10068         old_nic_ip = None
10069
10070       update_params_dict = dict([(key, nic_dict[key])
10071                                  for key in constants.NICS_PARAMETERS
10072                                  if key in nic_dict])
10073
10074       if 'bridge' in nic_dict:
10075         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10076
10077       new_nic_params = _GetUpdatedParams(old_nic_params,
10078                                          update_params_dict)
10079       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10080       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10081       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10082       self.nic_pinst[nic_op] = new_nic_params
10083       self.nic_pnew[nic_op] = new_filled_nic_params
10084       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10085
10086       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10087         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10088         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10089         if msg:
10090           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10091           if self.op.force:
10092             self.warn.append(msg)
10093           else:
10094             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10095       if new_nic_mode == constants.NIC_MODE_ROUTED:
10096         if constants.INIC_IP in nic_dict:
10097           nic_ip = nic_dict[constants.INIC_IP]
10098         else:
10099           nic_ip = old_nic_ip
10100         if nic_ip is None:
10101           raise errors.OpPrereqError('Cannot set the nic ip to None'
10102                                      ' on a routed nic', errors.ECODE_INVAL)
10103       if constants.INIC_MAC in nic_dict:
10104         nic_mac = nic_dict[constants.INIC_MAC]
10105         if nic_mac is None:
10106           raise errors.OpPrereqError('Cannot set the nic mac to None',
10107                                      errors.ECODE_INVAL)
10108         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10109           # otherwise generate the mac
10110           nic_dict[constants.INIC_MAC] = \
10111             self.cfg.GenerateMAC(self.proc.GetECId())
10112         else:
10113           # or validate/reserve the current one
10114           try:
10115             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10116           except errors.ReservationError:
10117             raise errors.OpPrereqError("MAC address %s already in use"
10118                                        " in cluster" % nic_mac,
10119                                        errors.ECODE_NOTUNIQUE)
10120
10121     # DISK processing
10122     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10123       raise errors.OpPrereqError("Disk operations not supported for"
10124                                  " diskless instances",
10125                                  errors.ECODE_INVAL)
10126     for disk_op, _ in self.op.disks:
10127       if disk_op == constants.DDM_REMOVE:
10128         if len(instance.disks) == 1:
10129           raise errors.OpPrereqError("Cannot remove the last disk of"
10130                                      " an instance", errors.ECODE_INVAL)
10131         _CheckInstanceDown(self, instance, "cannot remove disks")
10132
10133       if (disk_op == constants.DDM_ADD and
10134           len(instance.disks) >= constants.MAX_DISKS):
10135         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10136                                    " add more" % constants.MAX_DISKS,
10137                                    errors.ECODE_STATE)
10138       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10139         # an existing disk
10140         if disk_op < 0 or disk_op >= len(instance.disks):
10141           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10142                                      " are 0 to %d" %
10143                                      (disk_op, len(instance.disks)),
10144                                      errors.ECODE_INVAL)
10145
10146     return
10147
10148   def _ConvertPlainToDrbd(self, feedback_fn):
10149     """Converts an instance from plain to drbd.
10150
10151     """
10152     feedback_fn("Converting template to drbd")
10153     instance = self.instance
10154     pnode = instance.primary_node
10155     snode = self.op.remote_node
10156
10157     # create a fake disk info for _GenerateDiskTemplate
10158     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10159                   constants.IDISK_VG: d.logical_id[0]}
10160                  for d in instance.disks]
10161     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10162                                       instance.name, pnode, [snode],
10163                                       disk_info, None, None, 0, feedback_fn)
10164     info = _GetInstanceInfoText(instance)
10165     feedback_fn("Creating aditional volumes...")
10166     # first, create the missing data and meta devices
10167     for disk in new_disks:
10168       # unfortunately this is... not too nice
10169       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10170                             info, True)
10171       for child in disk.children:
10172         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10173     # at this stage, all new LVs have been created, we can rename the
10174     # old ones
10175     feedback_fn("Renaming original volumes...")
10176     rename_list = [(o, n.children[0].logical_id)
10177                    for (o, n) in zip(instance.disks, new_disks)]
10178     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10179     result.Raise("Failed to rename original LVs")
10180
10181     feedback_fn("Initializing DRBD devices...")
10182     # all child devices are in place, we can now create the DRBD devices
10183     for disk in new_disks:
10184       for node in [pnode, snode]:
10185         f_create = node == pnode
10186         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10187
10188     # at this point, the instance has been modified
10189     instance.disk_template = constants.DT_DRBD8
10190     instance.disks = new_disks
10191     self.cfg.Update(instance, feedback_fn)
10192
10193     # disks are created, waiting for sync
10194     disk_abort = not _WaitForSync(self, instance)
10195     if disk_abort:
10196       raise errors.OpExecError("There are some degraded disks for"
10197                                " this instance, please cleanup manually")
10198
10199   def _ConvertDrbdToPlain(self, feedback_fn):
10200     """Converts an instance from drbd to plain.
10201
10202     """
10203     instance = self.instance
10204     assert len(instance.secondary_nodes) == 1
10205     pnode = instance.primary_node
10206     snode = instance.secondary_nodes[0]
10207     feedback_fn("Converting template to plain")
10208
10209     old_disks = instance.disks
10210     new_disks = [d.children[0] for d in old_disks]
10211
10212     # copy over size and mode
10213     for parent, child in zip(old_disks, new_disks):
10214       child.size = parent.size
10215       child.mode = parent.mode
10216
10217     # update instance structure
10218     instance.disks = new_disks
10219     instance.disk_template = constants.DT_PLAIN
10220     self.cfg.Update(instance, feedback_fn)
10221
10222     feedback_fn("Removing volumes on the secondary node...")
10223     for disk in old_disks:
10224       self.cfg.SetDiskID(disk, snode)
10225       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10226       if msg:
10227         self.LogWarning("Could not remove block device %s on node %s,"
10228                         " continuing anyway: %s", disk.iv_name, snode, msg)
10229
10230     feedback_fn("Removing unneeded volumes on the primary node...")
10231     for idx, disk in enumerate(old_disks):
10232       meta = disk.children[1]
10233       self.cfg.SetDiskID(meta, pnode)
10234       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10235       if msg:
10236         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10237                         " continuing anyway: %s", idx, pnode, msg)
10238
10239   def Exec(self, feedback_fn):
10240     """Modifies an instance.
10241
10242     All parameters take effect only at the next restart of the instance.
10243
10244     """
10245     # Process here the warnings from CheckPrereq, as we don't have a
10246     # feedback_fn there.
10247     for warn in self.warn:
10248       feedback_fn("WARNING: %s" % warn)
10249
10250     result = []
10251     instance = self.instance
10252     # disk changes
10253     for disk_op, disk_dict in self.op.disks:
10254       if disk_op == constants.DDM_REMOVE:
10255         # remove the last disk
10256         device = instance.disks.pop()
10257         device_idx = len(instance.disks)
10258         for node, disk in device.ComputeNodeTree(instance.primary_node):
10259           self.cfg.SetDiskID(disk, node)
10260           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10261           if msg:
10262             self.LogWarning("Could not remove disk/%d on node %s: %s,"
10263                             " continuing anyway", device_idx, node, msg)
10264         result.append(("disk/%d" % device_idx, "remove"))
10265       elif disk_op == constants.DDM_ADD:
10266         # add a new disk
10267         if instance.disk_template in (constants.DT_FILE,
10268                                         constants.DT_SHARED_FILE):
10269           file_driver, file_path = instance.disks[0].logical_id
10270           file_path = os.path.dirname(file_path)
10271         else:
10272           file_driver = file_path = None
10273         disk_idx_base = len(instance.disks)
10274         new_disk = _GenerateDiskTemplate(self,
10275                                          instance.disk_template,
10276                                          instance.name, instance.primary_node,
10277                                          instance.secondary_nodes,
10278                                          [disk_dict],
10279                                          file_path,
10280                                          file_driver,
10281                                          disk_idx_base, feedback_fn)[0]
10282         instance.disks.append(new_disk)
10283         info = _GetInstanceInfoText(instance)
10284
10285         logging.info("Creating volume %s for instance %s",
10286                      new_disk.iv_name, instance.name)
10287         # Note: this needs to be kept in sync with _CreateDisks
10288         #HARDCODE
10289         for node in instance.all_nodes:
10290           f_create = node == instance.primary_node
10291           try:
10292             _CreateBlockDev(self, node, instance, new_disk,
10293                             f_create, info, f_create)
10294           except errors.OpExecError, err:
10295             self.LogWarning("Failed to create volume %s (%s) on"
10296                             " node %s: %s",
10297                             new_disk.iv_name, new_disk, node, err)
10298         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10299                        (new_disk.size, new_disk.mode)))
10300       else:
10301         # change a given disk
10302         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10303         result.append(("disk.mode/%d" % disk_op,
10304                        disk_dict[constants.IDISK_MODE]))
10305
10306     if self.op.disk_template:
10307       r_shut = _ShutdownInstanceDisks(self, instance)
10308       if not r_shut:
10309         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10310                                  " proceed with disk template conversion")
10311       mode = (instance.disk_template, self.op.disk_template)
10312       try:
10313         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10314       except:
10315         self.cfg.ReleaseDRBDMinors(instance.name)
10316         raise
10317       result.append(("disk_template", self.op.disk_template))
10318
10319     # NIC changes
10320     for nic_op, nic_dict in self.op.nics:
10321       if nic_op == constants.DDM_REMOVE:
10322         # remove the last nic
10323         del instance.nics[-1]
10324         result.append(("nic.%d" % len(instance.nics), "remove"))
10325       elif nic_op == constants.DDM_ADD:
10326         # mac and bridge should be set, by now
10327         mac = nic_dict[constants.INIC_MAC]
10328         ip = nic_dict.get(constants.INIC_IP, None)
10329         nicparams = self.nic_pinst[constants.DDM_ADD]
10330         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10331         instance.nics.append(new_nic)
10332         result.append(("nic.%d" % (len(instance.nics) - 1),
10333                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
10334                        (new_nic.mac, new_nic.ip,
10335                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10336                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10337                        )))
10338       else:
10339         for key in (constants.INIC_MAC, constants.INIC_IP):
10340           if key in nic_dict:
10341             setattr(instance.nics[nic_op], key, nic_dict[key])
10342         if nic_op in self.nic_pinst:
10343           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10344         for key, val in nic_dict.iteritems():
10345           result.append(("nic.%s/%d" % (key, nic_op), val))
10346
10347     # hvparams changes
10348     if self.op.hvparams:
10349       instance.hvparams = self.hv_inst
10350       for key, val in self.op.hvparams.iteritems():
10351         result.append(("hv/%s" % key, val))
10352
10353     # beparams changes
10354     if self.op.beparams:
10355       instance.beparams = self.be_inst
10356       for key, val in self.op.beparams.iteritems():
10357         result.append(("be/%s" % key, val))
10358
10359     # OS change
10360     if self.op.os_name:
10361       instance.os = self.op.os_name
10362
10363     # osparams changes
10364     if self.op.osparams:
10365       instance.osparams = self.os_inst
10366       for key, val in self.op.osparams.iteritems():
10367         result.append(("os/%s" % key, val))
10368
10369     self.cfg.Update(instance, feedback_fn)
10370
10371     return result
10372
10373   _DISK_CONVERSIONS = {
10374     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10375     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10376     }
10377
10378
10379 class LUBackupQuery(NoHooksLU):
10380   """Query the exports list
10381
10382   """
10383   REQ_BGL = False
10384
10385   def ExpandNames(self):
10386     self.needed_locks = {}
10387     self.share_locks[locking.LEVEL_NODE] = 1
10388     if not self.op.nodes:
10389       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10390     else:
10391       self.needed_locks[locking.LEVEL_NODE] = \
10392         _GetWantedNodes(self, self.op.nodes)
10393
10394   def Exec(self, feedback_fn):
10395     """Compute the list of all the exported system images.
10396
10397     @rtype: dict
10398     @return: a dictionary with the structure node->(export-list)
10399         where export-list is a list of the instances exported on
10400         that node.
10401
10402     """
10403     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10404     rpcresult = self.rpc.call_export_list(self.nodes)
10405     result = {}
10406     for node in rpcresult:
10407       if rpcresult[node].fail_msg:
10408         result[node] = False
10409       else:
10410         result[node] = rpcresult[node].payload
10411
10412     return result
10413
10414
10415 class LUBackupPrepare(NoHooksLU):
10416   """Prepares an instance for an export and returns useful information.
10417
10418   """
10419   REQ_BGL = False
10420
10421   def ExpandNames(self):
10422     self._ExpandAndLockInstance()
10423
10424   def CheckPrereq(self):
10425     """Check prerequisites.
10426
10427     """
10428     instance_name = self.op.instance_name
10429
10430     self.instance = self.cfg.GetInstanceInfo(instance_name)
10431     assert self.instance is not None, \
10432           "Cannot retrieve locked instance %s" % self.op.instance_name
10433     _CheckNodeOnline(self, self.instance.primary_node)
10434
10435     self._cds = _GetClusterDomainSecret()
10436
10437   def Exec(self, feedback_fn):
10438     """Prepares an instance for an export.
10439
10440     """
10441     instance = self.instance
10442
10443     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10444       salt = utils.GenerateSecret(8)
10445
10446       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10447       result = self.rpc.call_x509_cert_create(instance.primary_node,
10448                                               constants.RIE_CERT_VALIDITY)
10449       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10450
10451       (name, cert_pem) = result.payload
10452
10453       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10454                                              cert_pem)
10455
10456       return {
10457         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10458         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10459                           salt),
10460         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10461         }
10462
10463     return None
10464
10465
10466 class LUBackupExport(LogicalUnit):
10467   """Export an instance to an image in the cluster.
10468
10469   """
10470   HPATH = "instance-export"
10471   HTYPE = constants.HTYPE_INSTANCE
10472   REQ_BGL = False
10473
10474   def CheckArguments(self):
10475     """Check the arguments.
10476
10477     """
10478     self.x509_key_name = self.op.x509_key_name
10479     self.dest_x509_ca_pem = self.op.destination_x509_ca
10480
10481     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10482       if not self.x509_key_name:
10483         raise errors.OpPrereqError("Missing X509 key name for encryption",
10484                                    errors.ECODE_INVAL)
10485
10486       if not self.dest_x509_ca_pem:
10487         raise errors.OpPrereqError("Missing destination X509 CA",
10488                                    errors.ECODE_INVAL)
10489
10490   def ExpandNames(self):
10491     self._ExpandAndLockInstance()
10492
10493     # Lock all nodes for local exports
10494     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10495       # FIXME: lock only instance primary and destination node
10496       #
10497       # Sad but true, for now we have do lock all nodes, as we don't know where
10498       # the previous export might be, and in this LU we search for it and
10499       # remove it from its current node. In the future we could fix this by:
10500       #  - making a tasklet to search (share-lock all), then create the
10501       #    new one, then one to remove, after
10502       #  - removing the removal operation altogether
10503       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10504
10505   def DeclareLocks(self, level):
10506     """Last minute lock declaration."""
10507     # All nodes are locked anyway, so nothing to do here.
10508
10509   def BuildHooksEnv(self):
10510     """Build hooks env.
10511
10512     This will run on the master, primary node and target node.
10513
10514     """
10515     env = {
10516       "EXPORT_MODE": self.op.mode,
10517       "EXPORT_NODE": self.op.target_node,
10518       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10519       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10520       # TODO: Generic function for boolean env variables
10521       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10522       }
10523
10524     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10525
10526     return env
10527
10528   def BuildHooksNodes(self):
10529     """Build hooks nodes.
10530
10531     """
10532     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10533
10534     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10535       nl.append(self.op.target_node)
10536
10537     return (nl, nl)
10538
10539   def CheckPrereq(self):
10540     """Check prerequisites.
10541
10542     This checks that the instance and node names are valid.
10543
10544     """
10545     instance_name = self.op.instance_name
10546
10547     self.instance = self.cfg.GetInstanceInfo(instance_name)
10548     assert self.instance is not None, \
10549           "Cannot retrieve locked instance %s" % self.op.instance_name
10550     _CheckNodeOnline(self, self.instance.primary_node)
10551
10552     if (self.op.remove_instance and self.instance.admin_up and
10553         not self.op.shutdown):
10554       raise errors.OpPrereqError("Can not remove instance without shutting it"
10555                                  " down before")
10556
10557     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10558       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10559       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10560       assert self.dst_node is not None
10561
10562       _CheckNodeOnline(self, self.dst_node.name)
10563       _CheckNodeNotDrained(self, self.dst_node.name)
10564
10565       self._cds = None
10566       self.dest_disk_info = None
10567       self.dest_x509_ca = None
10568
10569     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10570       self.dst_node = None
10571
10572       if len(self.op.target_node) != len(self.instance.disks):
10573         raise errors.OpPrereqError(("Received destination information for %s"
10574                                     " disks, but instance %s has %s disks") %
10575                                    (len(self.op.target_node), instance_name,
10576                                     len(self.instance.disks)),
10577                                    errors.ECODE_INVAL)
10578
10579       cds = _GetClusterDomainSecret()
10580
10581       # Check X509 key name
10582       try:
10583         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10584       except (TypeError, ValueError), err:
10585         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10586
10587       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10588         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10589                                    errors.ECODE_INVAL)
10590
10591       # Load and verify CA
10592       try:
10593         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10594       except OpenSSL.crypto.Error, err:
10595         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10596                                    (err, ), errors.ECODE_INVAL)
10597
10598       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10599       if errcode is not None:
10600         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10601                                    (msg, ), errors.ECODE_INVAL)
10602
10603       self.dest_x509_ca = cert
10604
10605       # Verify target information
10606       disk_info = []
10607       for idx, disk_data in enumerate(self.op.target_node):
10608         try:
10609           (host, port, magic) = \
10610             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10611         except errors.GenericError, err:
10612           raise errors.OpPrereqError("Target info for disk %s: %s" %
10613                                      (idx, err), errors.ECODE_INVAL)
10614
10615         disk_info.append((host, port, magic))
10616
10617       assert len(disk_info) == len(self.op.target_node)
10618       self.dest_disk_info = disk_info
10619
10620     else:
10621       raise errors.ProgrammerError("Unhandled export mode %r" %
10622                                    self.op.mode)
10623
10624     # instance disk type verification
10625     # TODO: Implement export support for file-based disks
10626     for disk in self.instance.disks:
10627       if disk.dev_type == constants.LD_FILE:
10628         raise errors.OpPrereqError("Export not supported for instances with"
10629                                    " file-based disks", errors.ECODE_INVAL)
10630
10631   def _CleanupExports(self, feedback_fn):
10632     """Removes exports of current instance from all other nodes.
10633
10634     If an instance in a cluster with nodes A..D was exported to node C, its
10635     exports will be removed from the nodes A, B and D.
10636
10637     """
10638     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10639
10640     nodelist = self.cfg.GetNodeList()
10641     nodelist.remove(self.dst_node.name)
10642
10643     # on one-node clusters nodelist will be empty after the removal
10644     # if we proceed the backup would be removed because OpBackupQuery
10645     # substitutes an empty list with the full cluster node list.
10646     iname = self.instance.name
10647     if nodelist:
10648       feedback_fn("Removing old exports for instance %s" % iname)
10649       exportlist = self.rpc.call_export_list(nodelist)
10650       for node in exportlist:
10651         if exportlist[node].fail_msg:
10652           continue
10653         if iname in exportlist[node].payload:
10654           msg = self.rpc.call_export_remove(node, iname).fail_msg
10655           if msg:
10656             self.LogWarning("Could not remove older export for instance %s"
10657                             " on node %s: %s", iname, node, msg)
10658
10659   def Exec(self, feedback_fn):
10660     """Export an instance to an image in the cluster.
10661
10662     """
10663     assert self.op.mode in constants.EXPORT_MODES
10664
10665     instance = self.instance
10666     src_node = instance.primary_node
10667
10668     if self.op.shutdown:
10669       # shutdown the instance, but not the disks
10670       feedback_fn("Shutting down instance %s" % instance.name)
10671       result = self.rpc.call_instance_shutdown(src_node, instance,
10672                                                self.op.shutdown_timeout)
10673       # TODO: Maybe ignore failures if ignore_remove_failures is set
10674       result.Raise("Could not shutdown instance %s on"
10675                    " node %s" % (instance.name, src_node))
10676
10677     # set the disks ID correctly since call_instance_start needs the
10678     # correct drbd minor to create the symlinks
10679     for disk in instance.disks:
10680       self.cfg.SetDiskID(disk, src_node)
10681
10682     activate_disks = (not instance.admin_up)
10683
10684     if activate_disks:
10685       # Activate the instance disks if we'exporting a stopped instance
10686       feedback_fn("Activating disks for %s" % instance.name)
10687       _StartInstanceDisks(self, instance, None)
10688
10689     try:
10690       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10691                                                      instance)
10692
10693       helper.CreateSnapshots()
10694       try:
10695         if (self.op.shutdown and instance.admin_up and
10696             not self.op.remove_instance):
10697           assert not activate_disks
10698           feedback_fn("Starting instance %s" % instance.name)
10699           result = self.rpc.call_instance_start(src_node, instance, None, None)
10700           msg = result.fail_msg
10701           if msg:
10702             feedback_fn("Failed to start instance: %s" % msg)
10703             _ShutdownInstanceDisks(self, instance)
10704             raise errors.OpExecError("Could not start instance: %s" % msg)
10705
10706         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10707           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10708         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10709           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10710           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10711
10712           (key_name, _, _) = self.x509_key_name
10713
10714           dest_ca_pem = \
10715             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10716                                             self.dest_x509_ca)
10717
10718           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10719                                                      key_name, dest_ca_pem,
10720                                                      timeouts)
10721       finally:
10722         helper.Cleanup()
10723
10724       # Check for backwards compatibility
10725       assert len(dresults) == len(instance.disks)
10726       assert compat.all(isinstance(i, bool) for i in dresults), \
10727              "Not all results are boolean: %r" % dresults
10728
10729     finally:
10730       if activate_disks:
10731         feedback_fn("Deactivating disks for %s" % instance.name)
10732         _ShutdownInstanceDisks(self, instance)
10733
10734     if not (compat.all(dresults) and fin_resu):
10735       failures = []
10736       if not fin_resu:
10737         failures.append("export finalization")
10738       if not compat.all(dresults):
10739         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10740                                if not dsk)
10741         failures.append("disk export: disk(s) %s" % fdsk)
10742
10743       raise errors.OpExecError("Export failed, errors in %s" %
10744                                utils.CommaJoin(failures))
10745
10746     # At this point, the export was successful, we can cleanup/finish
10747
10748     # Remove instance if requested
10749     if self.op.remove_instance:
10750       feedback_fn("Removing instance %s" % instance.name)
10751       _RemoveInstance(self, feedback_fn, instance,
10752                       self.op.ignore_remove_failures)
10753
10754     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10755       self._CleanupExports(feedback_fn)
10756
10757     return fin_resu, dresults
10758
10759
10760 class LUBackupRemove(NoHooksLU):
10761   """Remove exports related to the named instance.
10762
10763   """
10764   REQ_BGL = False
10765
10766   def ExpandNames(self):
10767     self.needed_locks = {}
10768     # We need all nodes to be locked in order for RemoveExport to work, but we
10769     # don't need to lock the instance itself, as nothing will happen to it (and
10770     # we can remove exports also for a removed instance)
10771     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10772
10773   def Exec(self, feedback_fn):
10774     """Remove any export.
10775
10776     """
10777     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10778     # If the instance was not found we'll try with the name that was passed in.
10779     # This will only work if it was an FQDN, though.
10780     fqdn_warn = False
10781     if not instance_name:
10782       fqdn_warn = True
10783       instance_name = self.op.instance_name
10784
10785     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10786     exportlist = self.rpc.call_export_list(locked_nodes)
10787     found = False
10788     for node in exportlist:
10789       msg = exportlist[node].fail_msg
10790       if msg:
10791         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10792         continue
10793       if instance_name in exportlist[node].payload:
10794         found = True
10795         result = self.rpc.call_export_remove(node, instance_name)
10796         msg = result.fail_msg
10797         if msg:
10798           logging.error("Could not remove export for instance %s"
10799                         " on node %s: %s", instance_name, node, msg)
10800
10801     if fqdn_warn and not found:
10802       feedback_fn("Export not found. If trying to remove an export belonging"
10803                   " to a deleted instance please use its Fully Qualified"
10804                   " Domain Name.")
10805
10806
10807 class LUGroupAdd(LogicalUnit):
10808   """Logical unit for creating node groups.
10809
10810   """
10811   HPATH = "group-add"
10812   HTYPE = constants.HTYPE_GROUP
10813   REQ_BGL = False
10814
10815   def ExpandNames(self):
10816     # We need the new group's UUID here so that we can create and acquire the
10817     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10818     # that it should not check whether the UUID exists in the configuration.
10819     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10820     self.needed_locks = {}
10821     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10822
10823   def CheckPrereq(self):
10824     """Check prerequisites.
10825
10826     This checks that the given group name is not an existing node group
10827     already.
10828
10829     """
10830     try:
10831       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10832     except errors.OpPrereqError:
10833       pass
10834     else:
10835       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10836                                  " node group (UUID: %s)" %
10837                                  (self.op.group_name, existing_uuid),
10838                                  errors.ECODE_EXISTS)
10839
10840     if self.op.ndparams:
10841       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10842
10843   def BuildHooksEnv(self):
10844     """Build hooks env.
10845
10846     """
10847     return {
10848       "GROUP_NAME": self.op.group_name,
10849       }
10850
10851   def BuildHooksNodes(self):
10852     """Build hooks nodes.
10853
10854     """
10855     mn = self.cfg.GetMasterNode()
10856     return ([mn], [mn])
10857
10858   def Exec(self, feedback_fn):
10859     """Add the node group to the cluster.
10860
10861     """
10862     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10863                                   uuid=self.group_uuid,
10864                                   alloc_policy=self.op.alloc_policy,
10865                                   ndparams=self.op.ndparams)
10866
10867     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10868     del self.remove_locks[locking.LEVEL_NODEGROUP]
10869
10870
10871 class LUGroupAssignNodes(NoHooksLU):
10872   """Logical unit for assigning nodes to groups.
10873
10874   """
10875   REQ_BGL = False
10876
10877   def ExpandNames(self):
10878     # These raise errors.OpPrereqError on their own:
10879     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10880     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10881
10882     # We want to lock all the affected nodes and groups. We have readily
10883     # available the list of nodes, and the *destination* group. To gather the
10884     # list of "source" groups, we need to fetch node information.
10885     self.node_data = self.cfg.GetAllNodesInfo()
10886     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10887     affected_groups.add(self.group_uuid)
10888
10889     self.needed_locks = {
10890       locking.LEVEL_NODEGROUP: list(affected_groups),
10891       locking.LEVEL_NODE: self.op.nodes,
10892       }
10893
10894   def CheckPrereq(self):
10895     """Check prerequisites.
10896
10897     """
10898     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10899     instance_data = self.cfg.GetAllInstancesInfo()
10900
10901     if self.group is None:
10902       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10903                                (self.op.group_name, self.group_uuid))
10904
10905     (new_splits, previous_splits) = \
10906       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10907                                              for node in self.op.nodes],
10908                                             self.node_data, instance_data)
10909
10910     if new_splits:
10911       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10912
10913       if not self.op.force:
10914         raise errors.OpExecError("The following instances get split by this"
10915                                  " change and --force was not given: %s" %
10916                                  fmt_new_splits)
10917       else:
10918         self.LogWarning("This operation will split the following instances: %s",
10919                         fmt_new_splits)
10920
10921         if previous_splits:
10922           self.LogWarning("In addition, these already-split instances continue"
10923                           " to be split across groups: %s",
10924                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10925
10926   def Exec(self, feedback_fn):
10927     """Assign nodes to a new group.
10928
10929     """
10930     for node in self.op.nodes:
10931       self.node_data[node].group = self.group_uuid
10932
10933     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10934
10935   @staticmethod
10936   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10937     """Check for split instances after a node assignment.
10938
10939     This method considers a series of node assignments as an atomic operation,
10940     and returns information about split instances after applying the set of
10941     changes.
10942
10943     In particular, it returns information about newly split instances, and
10944     instances that were already split, and remain so after the change.
10945
10946     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10947     considered.
10948
10949     @type changes: list of (node_name, new_group_uuid) pairs.
10950     @param changes: list of node assignments to consider.
10951     @param node_data: a dict with data for all nodes
10952     @param instance_data: a dict with all instances to consider
10953     @rtype: a two-tuple
10954     @return: a list of instances that were previously okay and result split as a
10955       consequence of this change, and a list of instances that were previously
10956       split and this change does not fix.
10957
10958     """
10959     changed_nodes = dict((node, group) for node, group in changes
10960                          if node_data[node].group != group)
10961
10962     all_split_instances = set()
10963     previously_split_instances = set()
10964
10965     def InstanceNodes(instance):
10966       return [instance.primary_node] + list(instance.secondary_nodes)
10967
10968     for inst in instance_data.values():
10969       if inst.disk_template not in constants.DTS_INT_MIRROR:
10970         continue
10971
10972       instance_nodes = InstanceNodes(inst)
10973
10974       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10975         previously_split_instances.add(inst.name)
10976
10977       if len(set(changed_nodes.get(node, node_data[node].group)
10978                  for node in instance_nodes)) > 1:
10979         all_split_instances.add(inst.name)
10980
10981     return (list(all_split_instances - previously_split_instances),
10982             list(previously_split_instances & all_split_instances))
10983
10984
10985 class _GroupQuery(_QueryBase):
10986   FIELDS = query.GROUP_FIELDS
10987
10988   def ExpandNames(self, lu):
10989     lu.needed_locks = {}
10990
10991     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10992     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10993
10994     if not self.names:
10995       self.wanted = [name_to_uuid[name]
10996                      for name in utils.NiceSort(name_to_uuid.keys())]
10997     else:
10998       # Accept names to be either names or UUIDs.
10999       missing = []
11000       self.wanted = []
11001       all_uuid = frozenset(self._all_groups.keys())
11002
11003       for name in self.names:
11004         if name in all_uuid:
11005           self.wanted.append(name)
11006         elif name in name_to_uuid:
11007           self.wanted.append(name_to_uuid[name])
11008         else:
11009           missing.append(name)
11010
11011       if missing:
11012         raise errors.OpPrereqError("Some groups do not exist: %s" %
11013                                    utils.CommaJoin(missing),
11014                                    errors.ECODE_NOENT)
11015
11016   def DeclareLocks(self, lu, level):
11017     pass
11018
11019   def _GetQueryData(self, lu):
11020     """Computes the list of node groups and their attributes.
11021
11022     """
11023     do_nodes = query.GQ_NODE in self.requested_data
11024     do_instances = query.GQ_INST in self.requested_data
11025
11026     group_to_nodes = None
11027     group_to_instances = None
11028
11029     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11030     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11031     # latter GetAllInstancesInfo() is not enough, for we have to go through
11032     # instance->node. Hence, we will need to process nodes even if we only need
11033     # instance information.
11034     if do_nodes or do_instances:
11035       all_nodes = lu.cfg.GetAllNodesInfo()
11036       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11037       node_to_group = {}
11038
11039       for node in all_nodes.values():
11040         if node.group in group_to_nodes:
11041           group_to_nodes[node.group].append(node.name)
11042           node_to_group[node.name] = node.group
11043
11044       if do_instances:
11045         all_instances = lu.cfg.GetAllInstancesInfo()
11046         group_to_instances = dict((uuid, []) for uuid in self.wanted)
11047
11048         for instance in all_instances.values():
11049           node = instance.primary_node
11050           if node in node_to_group:
11051             group_to_instances[node_to_group[node]].append(instance.name)
11052
11053         if not do_nodes:
11054           # Do not pass on node information if it was not requested.
11055           group_to_nodes = None
11056
11057     return query.GroupQueryData([self._all_groups[uuid]
11058                                  for uuid in self.wanted],
11059                                 group_to_nodes, group_to_instances)
11060
11061
11062 class LUGroupQuery(NoHooksLU):
11063   """Logical unit for querying node groups.
11064
11065   """
11066   REQ_BGL = False
11067
11068   def CheckArguments(self):
11069     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11070                           self.op.output_fields, False)
11071
11072   def ExpandNames(self):
11073     self.gq.ExpandNames(self)
11074
11075   def Exec(self, feedback_fn):
11076     return self.gq.OldStyleQuery(self)
11077
11078
11079 class LUGroupSetParams(LogicalUnit):
11080   """Modifies the parameters of a node group.
11081
11082   """
11083   HPATH = "group-modify"
11084   HTYPE = constants.HTYPE_GROUP
11085   REQ_BGL = False
11086
11087   def CheckArguments(self):
11088     all_changes = [
11089       self.op.ndparams,
11090       self.op.alloc_policy,
11091       ]
11092
11093     if all_changes.count(None) == len(all_changes):
11094       raise errors.OpPrereqError("Please pass at least one modification",
11095                                  errors.ECODE_INVAL)
11096
11097   def ExpandNames(self):
11098     # This raises errors.OpPrereqError on its own:
11099     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11100
11101     self.needed_locks = {
11102       locking.LEVEL_NODEGROUP: [self.group_uuid],
11103       }
11104
11105   def CheckPrereq(self):
11106     """Check prerequisites.
11107
11108     """
11109     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11110
11111     if self.group is None:
11112       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11113                                (self.op.group_name, self.group_uuid))
11114
11115     if self.op.ndparams:
11116       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11117       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11118       self.new_ndparams = new_ndparams
11119
11120   def BuildHooksEnv(self):
11121     """Build hooks env.
11122
11123     """
11124     return {
11125       "GROUP_NAME": self.op.group_name,
11126       "NEW_ALLOC_POLICY": self.op.alloc_policy,
11127       }
11128
11129   def BuildHooksNodes(self):
11130     """Build hooks nodes.
11131
11132     """
11133     mn = self.cfg.GetMasterNode()
11134     return ([mn], [mn])
11135
11136   def Exec(self, feedback_fn):
11137     """Modifies the node group.
11138
11139     """
11140     result = []
11141
11142     if self.op.ndparams:
11143       self.group.ndparams = self.new_ndparams
11144       result.append(("ndparams", str(self.group.ndparams)))
11145
11146     if self.op.alloc_policy:
11147       self.group.alloc_policy = self.op.alloc_policy
11148
11149     self.cfg.Update(self.group, feedback_fn)
11150     return result
11151
11152
11153
11154 class LUGroupRemove(LogicalUnit):
11155   HPATH = "group-remove"
11156   HTYPE = constants.HTYPE_GROUP
11157   REQ_BGL = False
11158
11159   def ExpandNames(self):
11160     # This will raises errors.OpPrereqError on its own:
11161     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11162     self.needed_locks = {
11163       locking.LEVEL_NODEGROUP: [self.group_uuid],
11164       }
11165
11166   def CheckPrereq(self):
11167     """Check prerequisites.
11168
11169     This checks that the given group name exists as a node group, that is
11170     empty (i.e., contains no nodes), and that is not the last group of the
11171     cluster.
11172
11173     """
11174     # Verify that the group is empty.
11175     group_nodes = [node.name
11176                    for node in self.cfg.GetAllNodesInfo().values()
11177                    if node.group == self.group_uuid]
11178
11179     if group_nodes:
11180       raise errors.OpPrereqError("Group '%s' not empty, has the following"
11181                                  " nodes: %s" %
11182                                  (self.op.group_name,
11183                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
11184                                  errors.ECODE_STATE)
11185
11186     # Verify the cluster would not be left group-less.
11187     if len(self.cfg.GetNodeGroupList()) == 1:
11188       raise errors.OpPrereqError("Group '%s' is the only group,"
11189                                  " cannot be removed" %
11190                                  self.op.group_name,
11191                                  errors.ECODE_STATE)
11192
11193   def BuildHooksEnv(self):
11194     """Build hooks env.
11195
11196     """
11197     return {
11198       "GROUP_NAME": self.op.group_name,
11199       }
11200
11201   def BuildHooksNodes(self):
11202     """Build hooks nodes.
11203
11204     """
11205     mn = self.cfg.GetMasterNode()
11206     return ([mn], [mn])
11207
11208   def Exec(self, feedback_fn):
11209     """Remove the node group.
11210
11211     """
11212     try:
11213       self.cfg.RemoveNodeGroup(self.group_uuid)
11214     except errors.ConfigurationError:
11215       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11216                                (self.op.group_name, self.group_uuid))
11217
11218     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11219
11220
11221 class LUGroupRename(LogicalUnit):
11222   HPATH = "group-rename"
11223   HTYPE = constants.HTYPE_GROUP
11224   REQ_BGL = False
11225
11226   def ExpandNames(self):
11227     # This raises errors.OpPrereqError on its own:
11228     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11229
11230     self.needed_locks = {
11231       locking.LEVEL_NODEGROUP: [self.group_uuid],
11232       }
11233
11234   def CheckPrereq(self):
11235     """Check prerequisites.
11236
11237     Ensures requested new name is not yet used.
11238
11239     """
11240     try:
11241       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11242     except errors.OpPrereqError:
11243       pass
11244     else:
11245       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11246                                  " node group (UUID: %s)" %
11247                                  (self.op.new_name, new_name_uuid),
11248                                  errors.ECODE_EXISTS)
11249
11250   def BuildHooksEnv(self):
11251     """Build hooks env.
11252
11253     """
11254     return {
11255       "OLD_NAME": self.op.group_name,
11256       "NEW_NAME": self.op.new_name,
11257       }
11258
11259   def BuildHooksNodes(self):
11260     """Build hooks nodes.
11261
11262     """
11263     mn = self.cfg.GetMasterNode()
11264
11265     all_nodes = self.cfg.GetAllNodesInfo()
11266     all_nodes.pop(mn, None)
11267
11268     run_nodes = [mn]
11269     run_nodes.extend(node.name for node in all_nodes.values()
11270                      if node.group == self.group_uuid)
11271
11272     return (run_nodes, run_nodes)
11273
11274   def Exec(self, feedback_fn):
11275     """Rename the node group.
11276
11277     """
11278     group = self.cfg.GetNodeGroup(self.group_uuid)
11279
11280     if group is None:
11281       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11282                                (self.op.group_name, self.group_uuid))
11283
11284     group.name = self.op.new_name
11285     self.cfg.Update(group, feedback_fn)
11286
11287     return self.op.new_name
11288
11289
11290 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11291   """Generic tags LU.
11292
11293   This is an abstract class which is the parent of all the other tags LUs.
11294
11295   """
11296   def ExpandNames(self):
11297     self.group_uuid = None
11298     self.needed_locks = {}
11299     if self.op.kind == constants.TAG_NODE:
11300       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11301       self.needed_locks[locking.LEVEL_NODE] = self.op.name
11302     elif self.op.kind == constants.TAG_INSTANCE:
11303       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11304       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11305     elif self.op.kind == constants.TAG_NODEGROUP:
11306       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11307
11308     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11309     # not possible to acquire the BGL based on opcode parameters)
11310
11311   def CheckPrereq(self):
11312     """Check prerequisites.
11313
11314     """
11315     if self.op.kind == constants.TAG_CLUSTER:
11316       self.target = self.cfg.GetClusterInfo()
11317     elif self.op.kind == constants.TAG_NODE:
11318       self.target = self.cfg.GetNodeInfo(self.op.name)
11319     elif self.op.kind == constants.TAG_INSTANCE:
11320       self.target = self.cfg.GetInstanceInfo(self.op.name)
11321     elif self.op.kind == constants.TAG_NODEGROUP:
11322       self.target = self.cfg.GetNodeGroup(self.group_uuid)
11323     else:
11324       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11325                                  str(self.op.kind), errors.ECODE_INVAL)
11326
11327
11328 class LUTagsGet(TagsLU):
11329   """Returns the tags of a given object.
11330
11331   """
11332   REQ_BGL = False
11333
11334   def ExpandNames(self):
11335     TagsLU.ExpandNames(self)
11336
11337     # Share locks as this is only a read operation
11338     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11339
11340   def Exec(self, feedback_fn):
11341     """Returns the tag list.
11342
11343     """
11344     return list(self.target.GetTags())
11345
11346
11347 class LUTagsSearch(NoHooksLU):
11348   """Searches the tags for a given pattern.
11349
11350   """
11351   REQ_BGL = False
11352
11353   def ExpandNames(self):
11354     self.needed_locks = {}
11355
11356   def CheckPrereq(self):
11357     """Check prerequisites.
11358
11359     This checks the pattern passed for validity by compiling it.
11360
11361     """
11362     try:
11363       self.re = re.compile(self.op.pattern)
11364     except re.error, err:
11365       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11366                                  (self.op.pattern, err), errors.ECODE_INVAL)
11367
11368   def Exec(self, feedback_fn):
11369     """Returns the tag list.
11370
11371     """
11372     cfg = self.cfg
11373     tgts = [("/cluster", cfg.GetClusterInfo())]
11374     ilist = cfg.GetAllInstancesInfo().values()
11375     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11376     nlist = cfg.GetAllNodesInfo().values()
11377     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11378     tgts.extend(("/nodegroup/%s" % n.name, n)
11379                 for n in cfg.GetAllNodeGroupsInfo().values())
11380     results = []
11381     for path, target in tgts:
11382       for tag in target.GetTags():
11383         if self.re.search(tag):
11384           results.append((path, tag))
11385     return results
11386
11387
11388 class LUTagsSet(TagsLU):
11389   """Sets a tag on a given object.
11390
11391   """
11392   REQ_BGL = False
11393
11394   def CheckPrereq(self):
11395     """Check prerequisites.
11396
11397     This checks the type and length of the tag name and value.
11398
11399     """
11400     TagsLU.CheckPrereq(self)
11401     for tag in self.op.tags:
11402       objects.TaggableObject.ValidateTag(tag)
11403
11404   def Exec(self, feedback_fn):
11405     """Sets the tag.
11406
11407     """
11408     try:
11409       for tag in self.op.tags:
11410         self.target.AddTag(tag)
11411     except errors.TagError, err:
11412       raise errors.OpExecError("Error while setting tag: %s" % str(err))
11413     self.cfg.Update(self.target, feedback_fn)
11414
11415
11416 class LUTagsDel(TagsLU):
11417   """Delete a list of tags from a given object.
11418
11419   """
11420   REQ_BGL = False
11421
11422   def CheckPrereq(self):
11423     """Check prerequisites.
11424
11425     This checks that we have the given tag.
11426
11427     """
11428     TagsLU.CheckPrereq(self)
11429     for tag in self.op.tags:
11430       objects.TaggableObject.ValidateTag(tag)
11431     del_tags = frozenset(self.op.tags)
11432     cur_tags = self.target.GetTags()
11433
11434     diff_tags = del_tags - cur_tags
11435     if diff_tags:
11436       diff_names = ("'%s'" % i for i in sorted(diff_tags))
11437       raise errors.OpPrereqError("Tag(s) %s not found" %
11438                                  (utils.CommaJoin(diff_names), ),
11439                                  errors.ECODE_NOENT)
11440
11441   def Exec(self, feedback_fn):
11442     """Remove the tag from the object.
11443
11444     """
11445     for tag in self.op.tags:
11446       self.target.RemoveTag(tag)
11447     self.cfg.Update(self.target, feedback_fn)
11448
11449
11450 class LUTestDelay(NoHooksLU):
11451   """Sleep for a specified amount of time.
11452
11453   This LU sleeps on the master and/or nodes for a specified amount of
11454   time.
11455
11456   """
11457   REQ_BGL = False
11458
11459   def ExpandNames(self):
11460     """Expand names and set required locks.
11461
11462     This expands the node list, if any.
11463
11464     """
11465     self.needed_locks = {}
11466     if self.op.on_nodes:
11467       # _GetWantedNodes can be used here, but is not always appropriate to use
11468       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11469       # more information.
11470       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11471       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11472
11473   def _TestDelay(self):
11474     """Do the actual sleep.
11475
11476     """
11477     if self.op.on_master:
11478       if not utils.TestDelay(self.op.duration):
11479         raise errors.OpExecError("Error during master delay test")
11480     if self.op.on_nodes:
11481       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11482       for node, node_result in result.items():
11483         node_result.Raise("Failure during rpc call to node %s" % node)
11484
11485   def Exec(self, feedback_fn):
11486     """Execute the test delay opcode, with the wanted repetitions.
11487
11488     """
11489     if self.op.repeat == 0:
11490       self._TestDelay()
11491     else:
11492       top_value = self.op.repeat - 1
11493       for i in range(self.op.repeat):
11494         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11495         self._TestDelay()
11496
11497
11498 class LUTestJqueue(NoHooksLU):
11499   """Utility LU to test some aspects of the job queue.
11500
11501   """
11502   REQ_BGL = False
11503
11504   # Must be lower than default timeout for WaitForJobChange to see whether it
11505   # notices changed jobs
11506   _CLIENT_CONNECT_TIMEOUT = 20.0
11507   _CLIENT_CONFIRM_TIMEOUT = 60.0
11508
11509   @classmethod
11510   def _NotifyUsingSocket(cls, cb, errcls):
11511     """Opens a Unix socket and waits for another program to connect.
11512
11513     @type cb: callable
11514     @param cb: Callback to send socket name to client
11515     @type errcls: class
11516     @param errcls: Exception class to use for errors
11517
11518     """
11519     # Using a temporary directory as there's no easy way to create temporary
11520     # sockets without writing a custom loop around tempfile.mktemp and
11521     # socket.bind
11522     tmpdir = tempfile.mkdtemp()
11523     try:
11524       tmpsock = utils.PathJoin(tmpdir, "sock")
11525
11526       logging.debug("Creating temporary socket at %s", tmpsock)
11527       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11528       try:
11529         sock.bind(tmpsock)
11530         sock.listen(1)
11531
11532         # Send details to client
11533         cb(tmpsock)
11534
11535         # Wait for client to connect before continuing
11536         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11537         try:
11538           (conn, _) = sock.accept()
11539         except socket.error, err:
11540           raise errcls("Client didn't connect in time (%s)" % err)
11541       finally:
11542         sock.close()
11543     finally:
11544       # Remove as soon as client is connected
11545       shutil.rmtree(tmpdir)
11546
11547     # Wait for client to close
11548     try:
11549       try:
11550         # pylint: disable-msg=E1101
11551         # Instance of '_socketobject' has no ... member
11552         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11553         conn.recv(1)
11554       except socket.error, err:
11555         raise errcls("Client failed to confirm notification (%s)" % err)
11556     finally:
11557       conn.close()
11558
11559   def _SendNotification(self, test, arg, sockname):
11560     """Sends a notification to the client.
11561
11562     @type test: string
11563     @param test: Test name
11564     @param arg: Test argument (depends on test)
11565     @type sockname: string
11566     @param sockname: Socket path
11567
11568     """
11569     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11570
11571   def _Notify(self, prereq, test, arg):
11572     """Notifies the client of a test.
11573
11574     @type prereq: bool
11575     @param prereq: Whether this is a prereq-phase test
11576     @type test: string
11577     @param test: Test name
11578     @param arg: Test argument (depends on test)
11579
11580     """
11581     if prereq:
11582       errcls = errors.OpPrereqError
11583     else:
11584       errcls = errors.OpExecError
11585
11586     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11587                                                   test, arg),
11588                                    errcls)
11589
11590   def CheckArguments(self):
11591     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11592     self.expandnames_calls = 0
11593
11594   def ExpandNames(self):
11595     checkargs_calls = getattr(self, "checkargs_calls", 0)
11596     if checkargs_calls < 1:
11597       raise errors.ProgrammerError("CheckArguments was not called")
11598
11599     self.expandnames_calls += 1
11600
11601     if self.op.notify_waitlock:
11602       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11603
11604     self.LogInfo("Expanding names")
11605
11606     # Get lock on master node (just to get a lock, not for a particular reason)
11607     self.needed_locks = {
11608       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11609       }
11610
11611   def Exec(self, feedback_fn):
11612     if self.expandnames_calls < 1:
11613       raise errors.ProgrammerError("ExpandNames was not called")
11614
11615     if self.op.notify_exec:
11616       self._Notify(False, constants.JQT_EXEC, None)
11617
11618     self.LogInfo("Executing")
11619
11620     if self.op.log_messages:
11621       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11622       for idx, msg in enumerate(self.op.log_messages):
11623         self.LogInfo("Sending log message %s", idx + 1)
11624         feedback_fn(constants.JQT_MSGPREFIX + msg)
11625         # Report how many test messages have been sent
11626         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11627
11628     if self.op.fail:
11629       raise errors.OpExecError("Opcode failure was requested")
11630
11631     return True
11632
11633
11634 class IAllocator(object):
11635   """IAllocator framework.
11636
11637   An IAllocator instance has three sets of attributes:
11638     - cfg that is needed to query the cluster
11639     - input data (all members of the _KEYS class attribute are required)
11640     - four buffer attributes (in|out_data|text), that represent the
11641       input (to the external script) in text and data structure format,
11642       and the output from it, again in two formats
11643     - the result variables from the script (success, info, nodes) for
11644       easy usage
11645
11646   """
11647   # pylint: disable-msg=R0902
11648   # lots of instance attributes
11649   _ALLO_KEYS = [
11650     "name", "mem_size", "disks", "disk_template",
11651     "os", "tags", "nics", "vcpus", "hypervisor",
11652     ]
11653   _RELO_KEYS = [
11654     "name", "relocate_from",
11655     ]
11656   _EVAC_KEYS = [
11657     "evac_nodes",
11658     ]
11659
11660   def __init__(self, cfg, rpc, mode, **kwargs):
11661     self.cfg = cfg
11662     self.rpc = rpc
11663     # init buffer variables
11664     self.in_text = self.out_text = self.in_data = self.out_data = None
11665     # init all input fields so that pylint is happy
11666     self.mode = mode
11667     self.mem_size = self.disks = self.disk_template = None
11668     self.os = self.tags = self.nics = self.vcpus = None
11669     self.hypervisor = None
11670     self.relocate_from = None
11671     self.name = None
11672     self.evac_nodes = None
11673     # computed fields
11674     self.required_nodes = None
11675     # init result fields
11676     self.success = self.info = self.result = None
11677     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11678       keyset = self._ALLO_KEYS
11679       fn = self._AddNewInstance
11680     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11681       keyset = self._RELO_KEYS
11682       fn = self._AddRelocateInstance
11683     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11684       keyset = self._EVAC_KEYS
11685       fn = self._AddEvacuateNodes
11686     else:
11687       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11688                                    " IAllocator" % self.mode)
11689     for key in kwargs:
11690       if key not in keyset:
11691         raise errors.ProgrammerError("Invalid input parameter '%s' to"
11692                                      " IAllocator" % key)
11693       setattr(self, key, kwargs[key])
11694
11695     for key in keyset:
11696       if key not in kwargs:
11697         raise errors.ProgrammerError("Missing input parameter '%s' to"
11698                                      " IAllocator" % key)
11699     self._BuildInputData(fn)
11700
11701   def _ComputeClusterData(self):
11702     """Compute the generic allocator input data.
11703
11704     This is the data that is independent of the actual operation.
11705
11706     """
11707     cfg = self.cfg
11708     cluster_info = cfg.GetClusterInfo()
11709     # cluster data
11710     data = {
11711       "version": constants.IALLOCATOR_VERSION,
11712       "cluster_name": cfg.GetClusterName(),
11713       "cluster_tags": list(cluster_info.GetTags()),
11714       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11715       # we don't have job IDs
11716       }
11717     ninfo = cfg.GetAllNodesInfo()
11718     iinfo = cfg.GetAllInstancesInfo().values()
11719     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11720
11721     # node data
11722     node_list = [n.name for n in ninfo.values() if n.vm_capable]
11723
11724     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11725       hypervisor_name = self.hypervisor
11726     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11727       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11728     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11729       hypervisor_name = cluster_info.enabled_hypervisors[0]
11730
11731     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11732                                         hypervisor_name)
11733     node_iinfo = \
11734       self.rpc.call_all_instances_info(node_list,
11735                                        cluster_info.enabled_hypervisors)
11736
11737     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11738
11739     config_ndata = self._ComputeBasicNodeData(ninfo)
11740     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11741                                                  i_list, config_ndata)
11742     assert len(data["nodes"]) == len(ninfo), \
11743         "Incomplete node data computed"
11744
11745     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11746
11747     self.in_data = data
11748
11749   @staticmethod
11750   def _ComputeNodeGroupData(cfg):
11751     """Compute node groups data.
11752
11753     """
11754     ng = {}
11755     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11756       ng[guuid] = {
11757         "name": gdata.name,
11758         "alloc_policy": gdata.alloc_policy,
11759         }
11760     return ng
11761
11762   @staticmethod
11763   def _ComputeBasicNodeData(node_cfg):
11764     """Compute global node data.
11765
11766     @rtype: dict
11767     @returns: a dict of name: (node dict, node config)
11768
11769     """
11770     node_results = {}
11771     for ninfo in node_cfg.values():
11772       # fill in static (config-based) values
11773       pnr = {
11774         "tags": list(ninfo.GetTags()),
11775         "primary_ip": ninfo.primary_ip,
11776         "secondary_ip": ninfo.secondary_ip,
11777         "offline": ninfo.offline,
11778         "drained": ninfo.drained,
11779         "master_candidate": ninfo.master_candidate,
11780         "group": ninfo.group,
11781         "master_capable": ninfo.master_capable,
11782         "vm_capable": ninfo.vm_capable,
11783         }
11784
11785       node_results[ninfo.name] = pnr
11786
11787     return node_results
11788
11789   @staticmethod
11790   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11791                               node_results):
11792     """Compute global node data.
11793
11794     @param node_results: the basic node structures as filled from the config
11795
11796     """
11797     # make a copy of the current dict
11798     node_results = dict(node_results)
11799     for nname, nresult in node_data.items():
11800       assert nname in node_results, "Missing basic data for node %s" % nname
11801       ninfo = node_cfg[nname]
11802
11803       if not (ninfo.offline or ninfo.drained):
11804         nresult.Raise("Can't get data for node %s" % nname)
11805         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11806                                 nname)
11807         remote_info = nresult.payload
11808
11809         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11810                      'vg_size', 'vg_free', 'cpu_total']:
11811           if attr not in remote_info:
11812             raise errors.OpExecError("Node '%s' didn't return attribute"
11813                                      " '%s'" % (nname, attr))
11814           if not isinstance(remote_info[attr], int):
11815             raise errors.OpExecError("Node '%s' returned invalid value"
11816                                      " for '%s': %s" %
11817                                      (nname, attr, remote_info[attr]))
11818         # compute memory used by primary instances
11819         i_p_mem = i_p_up_mem = 0
11820         for iinfo, beinfo in i_list:
11821           if iinfo.primary_node == nname:
11822             i_p_mem += beinfo[constants.BE_MEMORY]
11823             if iinfo.name not in node_iinfo[nname].payload:
11824               i_used_mem = 0
11825             else:
11826               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11827             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11828             remote_info['memory_free'] -= max(0, i_mem_diff)
11829
11830             if iinfo.admin_up:
11831               i_p_up_mem += beinfo[constants.BE_MEMORY]
11832
11833         # compute memory used by instances
11834         pnr_dyn = {
11835           "total_memory": remote_info['memory_total'],
11836           "reserved_memory": remote_info['memory_dom0'],
11837           "free_memory": remote_info['memory_free'],
11838           "total_disk": remote_info['vg_size'],
11839           "free_disk": remote_info['vg_free'],
11840           "total_cpus": remote_info['cpu_total'],
11841           "i_pri_memory": i_p_mem,
11842           "i_pri_up_memory": i_p_up_mem,
11843           }
11844         pnr_dyn.update(node_results[nname])
11845         node_results[nname] = pnr_dyn
11846
11847     return node_results
11848
11849   @staticmethod
11850   def _ComputeInstanceData(cluster_info, i_list):
11851     """Compute global instance data.
11852
11853     """
11854     instance_data = {}
11855     for iinfo, beinfo in i_list:
11856       nic_data = []
11857       for nic in iinfo.nics:
11858         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11859         nic_dict = {"mac": nic.mac,
11860                     "ip": nic.ip,
11861                     "mode": filled_params[constants.NIC_MODE],
11862                     "link": filled_params[constants.NIC_LINK],
11863                    }
11864         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11865           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11866         nic_data.append(nic_dict)
11867       pir = {
11868         "tags": list(iinfo.GetTags()),
11869         "admin_up": iinfo.admin_up,
11870         "vcpus": beinfo[constants.BE_VCPUS],
11871         "memory": beinfo[constants.BE_MEMORY],
11872         "os": iinfo.os,
11873         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11874         "nics": nic_data,
11875         "disks": [{constants.IDISK_SIZE: dsk.size,
11876                    constants.IDISK_MODE: dsk.mode}
11877                   for dsk in iinfo.disks],
11878         "disk_template": iinfo.disk_template,
11879         "hypervisor": iinfo.hypervisor,
11880         }
11881       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11882                                                  pir["disks"])
11883       instance_data[iinfo.name] = pir
11884
11885     return instance_data
11886
11887   def _AddNewInstance(self):
11888     """Add new instance data to allocator structure.
11889
11890     This in combination with _AllocatorGetClusterData will create the
11891     correct structure needed as input for the allocator.
11892
11893     The checks for the completeness of the opcode must have already been
11894     done.
11895
11896     """
11897     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11898
11899     if self.disk_template in constants.DTS_INT_MIRROR:
11900       self.required_nodes = 2
11901     else:
11902       self.required_nodes = 1
11903     request = {
11904       "name": self.name,
11905       "disk_template": self.disk_template,
11906       "tags": self.tags,
11907       "os": self.os,
11908       "vcpus": self.vcpus,
11909       "memory": self.mem_size,
11910       "disks": self.disks,
11911       "disk_space_total": disk_space,
11912       "nics": self.nics,
11913       "required_nodes": self.required_nodes,
11914       }
11915     return request
11916
11917   def _AddRelocateInstance(self):
11918     """Add relocate instance data to allocator structure.
11919
11920     This in combination with _IAllocatorGetClusterData will create the
11921     correct structure needed as input for the allocator.
11922
11923     The checks for the completeness of the opcode must have already been
11924     done.
11925
11926     """
11927     instance = self.cfg.GetInstanceInfo(self.name)
11928     if instance is None:
11929       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11930                                    " IAllocator" % self.name)
11931
11932     if instance.disk_template not in constants.DTS_MIRRORED:
11933       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11934                                  errors.ECODE_INVAL)
11935
11936     if instance.disk_template in constants.DTS_INT_MIRROR and \
11937         len(instance.secondary_nodes) != 1:
11938       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11939                                  errors.ECODE_STATE)
11940
11941     self.required_nodes = 1
11942     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11943     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11944
11945     request = {
11946       "name": self.name,
11947       "disk_space_total": disk_space,
11948       "required_nodes": self.required_nodes,
11949       "relocate_from": self.relocate_from,
11950       }
11951     return request
11952
11953   def _AddEvacuateNodes(self):
11954     """Add evacuate nodes data to allocator structure.
11955
11956     """
11957     request = {
11958       "evac_nodes": self.evac_nodes
11959       }
11960     return request
11961
11962   def _BuildInputData(self, fn):
11963     """Build input data structures.
11964
11965     """
11966     self._ComputeClusterData()
11967
11968     request = fn()
11969     request["type"] = self.mode
11970     self.in_data["request"] = request
11971
11972     self.in_text = serializer.Dump(self.in_data)
11973
11974   def Run(self, name, validate=True, call_fn=None):
11975     """Run an instance allocator and return the results.
11976
11977     """
11978     if call_fn is None:
11979       call_fn = self.rpc.call_iallocator_runner
11980
11981     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11982     result.Raise("Failure while running the iallocator script")
11983
11984     self.out_text = result.payload
11985     if validate:
11986       self._ValidateResult()
11987
11988   def _ValidateResult(self):
11989     """Process the allocator results.
11990
11991     This will process and if successful save the result in
11992     self.out_data and the other parameters.
11993
11994     """
11995     try:
11996       rdict = serializer.Load(self.out_text)
11997     except Exception, err:
11998       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11999
12000     if not isinstance(rdict, dict):
12001       raise errors.OpExecError("Can't parse iallocator results: not a dict")
12002
12003     # TODO: remove backwards compatiblity in later versions
12004     if "nodes" in rdict and "result" not in rdict:
12005       rdict["result"] = rdict["nodes"]
12006       del rdict["nodes"]
12007
12008     for key in "success", "info", "result":
12009       if key not in rdict:
12010         raise errors.OpExecError("Can't parse iallocator results:"
12011                                  " missing key '%s'" % key)
12012       setattr(self, key, rdict[key])
12013
12014     if not isinstance(rdict["result"], list):
12015       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
12016                                " is not a list")
12017
12018     if self.mode == constants.IALLOCATOR_MODE_RELOC:
12019       assert self.relocate_from is not None
12020       assert self.required_nodes == 1
12021
12022       node2group = dict((name, ndata["group"])
12023                         for (name, ndata) in self.in_data["nodes"].items())
12024
12025       fn = compat.partial(self._NodesToGroups, node2group,
12026                           self.in_data["nodegroups"])
12027
12028       request_groups = fn(self.relocate_from)
12029       result_groups = fn(rdict["result"])
12030
12031       if result_groups != request_groups:
12032         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12033                                  " differ from original groups (%s)" %
12034                                  (utils.CommaJoin(result_groups),
12035                                   utils.CommaJoin(request_groups)))
12036
12037     self.out_data = rdict
12038
12039   @staticmethod
12040   def _NodesToGroups(node2group, groups, nodes):
12041     """Returns a list of unique group names for a list of nodes.
12042
12043     @type node2group: dict
12044     @param node2group: Map from node name to group UUID
12045     @type groups: dict
12046     @param groups: Group information
12047     @type nodes: list
12048     @param nodes: Node names
12049
12050     """
12051     result = set()
12052
12053     for node in nodes:
12054       try:
12055         group_uuid = node2group[node]
12056       except KeyError:
12057         # Ignore unknown node
12058         pass
12059       else:
12060         try:
12061           group = groups[group_uuid]
12062         except KeyError:
12063           # Can't find group, let's use UUID
12064           group_name = group_uuid
12065         else:
12066           group_name = group["name"]
12067
12068         result.add(group_name)
12069
12070     return sorted(result)
12071
12072
12073 class LUTestAllocator(NoHooksLU):
12074   """Run allocator tests.
12075
12076   This LU runs the allocator tests
12077
12078   """
12079   def CheckPrereq(self):
12080     """Check prerequisites.
12081
12082     This checks the opcode parameters depending on the director and mode test.
12083
12084     """
12085     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12086       for attr in ["mem_size", "disks", "disk_template",
12087                    "os", "tags", "nics", "vcpus"]:
12088         if not hasattr(self.op, attr):
12089           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12090                                      attr, errors.ECODE_INVAL)
12091       iname = self.cfg.ExpandInstanceName(self.op.name)
12092       if iname is not None:
12093         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12094                                    iname, errors.ECODE_EXISTS)
12095       if not isinstance(self.op.nics, list):
12096         raise errors.OpPrereqError("Invalid parameter 'nics'",
12097                                    errors.ECODE_INVAL)
12098       if not isinstance(self.op.disks, list):
12099         raise errors.OpPrereqError("Invalid parameter 'disks'",
12100                                    errors.ECODE_INVAL)
12101       for row in self.op.disks:
12102         if (not isinstance(row, dict) or
12103             "size" not in row or
12104             not isinstance(row["size"], int) or
12105             "mode" not in row or
12106             row["mode"] not in ['r', 'w']):
12107           raise errors.OpPrereqError("Invalid contents of the 'disks'"
12108                                      " parameter", errors.ECODE_INVAL)
12109       if self.op.hypervisor is None:
12110         self.op.hypervisor = self.cfg.GetHypervisorType()
12111     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12112       fname = _ExpandInstanceName(self.cfg, self.op.name)
12113       self.op.name = fname
12114       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12115     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12116       if not hasattr(self.op, "evac_nodes"):
12117         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12118                                    " opcode input", errors.ECODE_INVAL)
12119     else:
12120       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12121                                  self.op.mode, errors.ECODE_INVAL)
12122
12123     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12124       if self.op.allocator is None:
12125         raise errors.OpPrereqError("Missing allocator name",
12126                                    errors.ECODE_INVAL)
12127     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12128       raise errors.OpPrereqError("Wrong allocator test '%s'" %
12129                                  self.op.direction, errors.ECODE_INVAL)
12130
12131   def Exec(self, feedback_fn):
12132     """Run the allocator test.
12133
12134     """
12135     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12136       ial = IAllocator(self.cfg, self.rpc,
12137                        mode=self.op.mode,
12138                        name=self.op.name,
12139                        mem_size=self.op.mem_size,
12140                        disks=self.op.disks,
12141                        disk_template=self.op.disk_template,
12142                        os=self.op.os,
12143                        tags=self.op.tags,
12144                        nics=self.op.nics,
12145                        vcpus=self.op.vcpus,
12146                        hypervisor=self.op.hypervisor,
12147                        )
12148     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12149       ial = IAllocator(self.cfg, self.rpc,
12150                        mode=self.op.mode,
12151                        name=self.op.name,
12152                        relocate_from=list(self.relocate_from),
12153                        )
12154     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12155       ial = IAllocator(self.cfg, self.rpc,
12156                        mode=self.op.mode,
12157                        evac_nodes=self.op.evac_nodes)
12158     else:
12159       raise errors.ProgrammerError("Uncatched mode %s in"
12160                                    " LUTestAllocator.Exec", self.op.mode)
12161
12162     if self.op.direction == constants.IALLOCATOR_DIR_IN:
12163       result = ial.in_text
12164     else:
12165       ial.Run(self.op.allocator, validate=False)
12166       result = ial.out_text
12167     return result
12168
12169
12170 #: Query type implementations
12171 _QUERY_IMPL = {
12172   constants.QR_INSTANCE: _InstanceQuery,
12173   constants.QR_NODE: _NodeQuery,
12174   constants.QR_GROUP: _GroupQuery,
12175   constants.QR_OS: _OsQuery,
12176   }
12177
12178 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12179
12180
12181 def _GetQueryImplementation(name):
12182   """Returns the implemtnation for a query type.
12183
12184   @param name: Query type, must be one of L{constants.QR_VIA_OP}
12185
12186   """
12187   try:
12188     return _QUERY_IMPL[name]
12189   except KeyError:
12190     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12191                                errors.ECODE_INVAL)