code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 class ResultWithJobs:
  78   """Data container for LU results with jobs.
  79
  80   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  81   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  82   contained in the C{jobs} attribute and include the job IDs in the opcode
  83   result.
  84
  85   """
  86   def __init__(self, jobs, **kwargs):
  87     """Initializes this class.
  88
  89     Additional return values can be specified as keyword arguments.
  90
  91     @type jobs: list of lists of L{opcode.OpCode}
  92     @param jobs: A list of lists of opcode objects
  93
  94     """
  95     self.jobs = jobs
  96     self.other = kwargs
  97
  98
  99 class LogicalUnit(object):
 100   """Logical Unit base class.
 101
 102   Subclasses must follow these rules:
 103     - implement ExpandNames
 104     - implement CheckPrereq (except when tasklets are used)
 105     - implement Exec (except when tasklets are used)
 106     - implement BuildHooksEnv
 107     - implement BuildHooksNodes
 108     - redefine HPATH and HTYPE
 109     - optionally redefine their run requirements:
 110         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 111
 112   Note that all commands require root permissions.
 113
 114   @ivar dry_run_result: the value (if any) that will be returned to the caller
 115       in dry-run mode (signalled by opcode dry_run parameter)
 116
 117   """
 118   HPATH = None
 119   HTYPE = None
 120   REQ_BGL = True
 121
 122   def __init__(self, processor, op, context, rpc):
 123     """Constructor for LogicalUnit.
 124
 125     This needs to be overridden in derived classes in order to check op
 126     validity.
 127
 128     """
 129     self.proc = processor
 130     self.op = op
 131     self.cfg = context.cfg
 132     self.context = context
 133     self.rpc = rpc
 134     # Dicts used to declare locking needs to mcpu
 135     self.needed_locks = None
 136     self.acquired_locks = {}
 137     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 138     self.add_locks = {}
 139     self.remove_locks = {}
 140     # Used to force good behavior when calling helper functions
 141     self.recalculate_locks = {}
 142     self.__ssh = None
 143     # logging
 144     self.Log = processor.Log # pylint: disable-msg=C0103
 145     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 146     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 147     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 148     # support for dry-run
 149     self.dry_run_result = None
 150     # support for generic debug attribute
 151     if (not hasattr(self.op, "debug_level") or
 152         not isinstance(self.op.debug_level, int)):
 153       self.op.debug_level = 0
 154
 155     # Tasklets
 156     self.tasklets = None
 157
 158     # Validate opcode parameters and set defaults
 159     self.op.Validate(True)
 160
 161     self.CheckArguments()
 162
 163   def __GetSSH(self):
 164     """Returns the SshRunner object
 165
 166     """
 167     if not self.__ssh:
 168       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 169     return self.__ssh
 170
 171   ssh = property(fget=__GetSSH)
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that level
 205       - don't put anything for the BGL level
 206       - if you want all locks at a level use locking.ALL_SET as a value
 207
 208     If you need to share locks (rather than acquire them exclusively) at one
 209     level you can modify self.share_locks, setting a true value (usually 1) for
 210     that level. By default locks are not shared.
 211
 212     This function can also define a list of tasklets, which then will be
 213     executed in order instead of the usual LU-level CheckPrereq and Exec
 214     functions, if those are not defined by the LU.
 215
 216     Examples::
 217
 218       # Acquire all nodes and one instance
 219       self.needed_locks = {
 220         locking.LEVEL_NODE: locking.ALL_SET,
 221         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 222       }
 223       # Acquire just two nodes
 224       self.needed_locks = {
 225         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 226       }
 227       # Acquire no locks
 228       self.needed_locks = {} # No, you can't leave it to the default value None
 229
 230     """
 231     # The implementation of this method is mandatory only if the new LU is
 232     # concurrent, so that old LUs don't need to be changed all at the same
 233     # time.
 234     if self.REQ_BGL:
 235       self.needed_locks = {} # Exclusive LUs don't need locks.
 236     else:
 237       raise NotImplementedError
 238
 239   def DeclareLocks(self, level):
 240     """Declare LU locking needs for a level
 241
 242     While most LUs can just declare their locking needs at ExpandNames time,
 243     sometimes there's the need to calculate some locks after having acquired
 244     the ones before. This function is called just before acquiring locks at a
 245     particular level, but after acquiring the ones at lower levels, and permits
 246     such calculations. It can be used to modify self.needed_locks, and by
 247     default it does nothing.
 248
 249     This function is only called if you have something already set in
 250     self.needed_locks for the level.
 251
 252     @param level: Locking level which is going to be locked
 253     @type level: member of ganeti.locking.LEVELS
 254
 255     """
 256
 257   def CheckPrereq(self):
 258     """Check prerequisites for this LU.
 259
 260     This method should check that the prerequisites for the execution
 261     of this LU are fulfilled. It can do internode communication, but
 262     it should be idempotent - no cluster or system changes are
 263     allowed.
 264
 265     The method should raise errors.OpPrereqError in case something is
 266     not fulfilled. Its return value is ignored.
 267
 268     This method should also update all the parameters of the opcode to
 269     their canonical form if it hasn't been done by ExpandNames before.
 270
 271     """
 272     if self.tasklets is not None:
 273       for (idx, tl) in enumerate(self.tasklets):
 274         logging.debug("Checking prerequisites for tasklet %s/%s",
 275                       idx + 1, len(self.tasklets))
 276         tl.CheckPrereq()
 277     else:
 278       pass
 279
 280   def Exec(self, feedback_fn):
 281     """Execute the LU.
 282
 283     This method should implement the actual work. It should raise
 284     errors.OpExecError for failures that are somewhat dealt with in
 285     code, or expected.
 286
 287     """
 288     if self.tasklets is not None:
 289       for (idx, tl) in enumerate(self.tasklets):
 290         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 291         tl.Exec(feedback_fn)
 292     else:
 293       raise NotImplementedError
 294
 295   def BuildHooksEnv(self):
 296     """Build hooks environment for this LU.
 297
 298     @rtype: dict
 299     @return: Dictionary containing the environment that will be used for
 300       running the hooks for this LU. The keys of the dict must not be prefixed
 301       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 302       will extend the environment with additional variables. If no environment
 303       should be defined, an empty dictionary should be returned (not C{None}).
 304     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 305       will not be called.
 306
 307     """
 308     raise NotImplementedError
 309
 310   def BuildHooksNodes(self):
 311     """Build list of nodes to run LU's hooks.
 312
 313     @rtype: tuple; (list, list)
 314     @return: Tuple containing a list of node names on which the hook
 315       should run before the execution and a list of node names on which the
 316       hook should run after the execution. No nodes should be returned as an
 317       empty list (and not None).
 318     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 319       will not be called.
 320
 321     """
 322     raise NotImplementedError
 323
 324   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 325     """Notify the LU about the results of its hooks.
 326
 327     This method is called every time a hooks phase is executed, and notifies
 328     the Logical Unit about the hooks' result. The LU can then use it to alter
 329     its result based on the hooks.  By default the method does nothing and the
 330     previous result is passed back unchanged but any LU can define it if it
 331     wants to use the local cluster hook-scripts somehow.
 332
 333     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 334         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 335     @param hook_results: the results of the multi-node hooks rpc call
 336     @param feedback_fn: function used send feedback back to the caller
 337     @param lu_result: the previous Exec result this LU had, or None
 338         in the PRE phase
 339     @return: the new Exec result, based on the previous result
 340         and hook results
 341
 342     """
 343     # API must be kept, thus we ignore the unused argument and could
 344     # be a function warnings
 345     # pylint: disable-msg=W0613,R0201
 346     return lu_result
 347
 348   def _ExpandAndLockInstance(self):
 349     """Helper function to expand and lock an instance.
 350
 351     Many LUs that work on an instance take its name in self.op.instance_name
 352     and need to expand it and then declare the expanded name for locking. This
 353     function does it, and then updates self.op.instance_name to the expanded
 354     name. It also initializes needed_locks as a dict, if this hasn't been done
 355     before.
 356
 357     """
 358     if self.needed_locks is None:
 359       self.needed_locks = {}
 360     else:
 361       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 362         "_ExpandAndLockInstance called with instance-level locks set"
 363     self.op.instance_name = _ExpandInstanceName(self.cfg,
 364                                                 self.op.instance_name)
 365     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 366
 367   def _LockInstancesNodes(self, primary_only=False):
 368     """Helper function to declare instances' nodes for locking.
 369
 370     This function should be called after locking one or more instances to lock
 371     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 372     with all primary or secondary nodes for instances already locked and
 373     present in self.needed_locks[locking.LEVEL_INSTANCE].
 374
 375     It should be called from DeclareLocks, and for safety only works if
 376     self.recalculate_locks[locking.LEVEL_NODE] is set.
 377
 378     In the future it may grow parameters to just lock some instance's nodes, or
 379     to just lock primaries or secondary nodes, if needed.
 380
 381     If should be called in DeclareLocks in a way similar to::
 382
 383       if level == locking.LEVEL_NODE:
 384         self._LockInstancesNodes()
 385
 386     @type primary_only: boolean
 387     @param primary_only: only lock primary nodes of locked instances
 388
 389     """
 390     assert locking.LEVEL_NODE in self.recalculate_locks, \
 391       "_LockInstancesNodes helper function called with no nodes to recalculate"
 392
 393     # TODO: check if we're really been called with the instance locks held
 394
 395     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 396     # future we might want to have different behaviors depending on the value
 397     # of self.recalculate_locks[locking.LEVEL_NODE]
 398     wanted_nodes = []
 399     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 400       instance = self.context.cfg.GetInstanceInfo(instance_name)
 401       wanted_nodes.append(instance.primary_node)
 402       if not primary_only:
 403         wanted_nodes.extend(instance.secondary_nodes)
 404
 405     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 406       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 407     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 408       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 409
 410     del self.recalculate_locks[locking.LEVEL_NODE]
 411
 412
 413 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 414   """Simple LU which runs no hooks.
 415
 416   This LU is intended as a parent for other LogicalUnits which will
 417   run no hooks, in order to reduce duplicate code.
 418
 419   """
 420   HPATH = None
 421   HTYPE = None
 422
 423   def BuildHooksEnv(self):
 424     """Empty BuildHooksEnv for NoHooksLu.
 425
 426     This just raises an error.
 427
 428     """
 429     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 430
 431   def BuildHooksNodes(self):
 432     """Empty BuildHooksNodes for NoHooksLU.
 433
 434     """
 435     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 436
 437
 438 class Tasklet:
 439   """Tasklet base class.
 440
 441   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 442   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 443   tasklets know nothing about locks.
 444
 445   Subclasses must follow these rules:
 446     - Implement CheckPrereq
 447     - Implement Exec
 448
 449   """
 450   def __init__(self, lu):
 451     self.lu = lu
 452
 453     # Shortcuts
 454     self.cfg = lu.cfg
 455     self.rpc = lu.rpc
 456
 457   def CheckPrereq(self):
 458     """Check prerequisites for this tasklets.
 459
 460     This method should check whether the prerequisites for the execution of
 461     this tasklet are fulfilled. It can do internode communication, but it
 462     should be idempotent - no cluster or system changes are allowed.
 463
 464     The method should raise errors.OpPrereqError in case something is not
 465     fulfilled. Its return value is ignored.
 466
 467     This method should also update all parameters to their canonical form if it
 468     hasn't been done before.
 469
 470     """
 471     pass
 472
 473   def Exec(self, feedback_fn):
 474     """Execute the tasklet.
 475
 476     This method should implement the actual work. It should raise
 477     errors.OpExecError for failures that are somewhat dealt with in code, or
 478     expected.
 479
 480     """
 481     raise NotImplementedError
 482
 483
 484 class _QueryBase:
 485   """Base for query utility classes.
 486
 487   """
 488   #: Attribute holding field definitions
 489   FIELDS = None
 490
 491   def __init__(self, filter_, fields, use_locking):
 492     """Initializes this class.
 493
 494     """
 495     self.use_locking = use_locking
 496
 497     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 498                              namefield="name")
 499     self.requested_data = self.query.RequestedData()
 500     self.names = self.query.RequestedNames()
 501
 502     # Sort only if no names were requested
 503     self.sort_by_name = not self.names
 504
 505     self.do_locking = None
 506     self.wanted = None
 507
 508   def _GetNames(self, lu, all_names, lock_level):
 509     """Helper function to determine names asked for in the query.
 510
 511     """
 512     if self.do_locking:
 513       names = lu.acquired_locks[lock_level]
 514     else:
 515       names = all_names
 516
 517     if self.wanted == locking.ALL_SET:
 518       assert not self.names
 519       # caller didn't specify names, so ordering is not important
 520       return utils.NiceSort(names)
 521
 522     # caller specified names and we must keep the same order
 523     assert self.names
 524     assert not self.do_locking or lu.acquired_locks[lock_level]
 525
 526     missing = set(self.wanted).difference(names)
 527     if missing:
 528       raise errors.OpExecError("Some items were removed before retrieving"
 529                                " their data: %s" % missing)
 530
 531     # Return expanded names
 532     return self.wanted
 533
 534   def ExpandNames(self, lu):
 535     """Expand names for this query.
 536
 537     See L{LogicalUnit.ExpandNames}.
 538
 539     """
 540     raise NotImplementedError()
 541
 542   def DeclareLocks(self, lu, level):
 543     """Declare locks for this query.
 544
 545     See L{LogicalUnit.DeclareLocks}.
 546
 547     """
 548     raise NotImplementedError()
 549
 550   def _GetQueryData(self, lu):
 551     """Collects all data for this query.
 552
 553     @return: Query data object
 554
 555     """
 556     raise NotImplementedError()
 557
 558   def NewStyleQuery(self, lu):
 559     """Collect data and execute query.
 560
 561     """
 562     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 563                                   sort_by_name=self.sort_by_name)
 564
 565   def OldStyleQuery(self, lu):
 566     """Collect data and execute query.
 567
 568     """
 569     return self.query.OldStyleQuery(self._GetQueryData(lu),
 570                                     sort_by_name=self.sort_by_name)
 571
 572
 573 def _GetWantedNodes(lu, nodes):
 574   """Returns list of checked and expanded node names.
 575
 576   @type lu: L{LogicalUnit}
 577   @param lu: the logical unit on whose behalf we execute
 578   @type nodes: list
 579   @param nodes: list of node names or None for all nodes
 580   @rtype: list
 581   @return: the list of nodes, sorted
 582   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 583
 584   """
 585   if nodes:
 586     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 587
 588   return utils.NiceSort(lu.cfg.GetNodeList())
 589
 590
 591 def _GetWantedInstances(lu, instances):
 592   """Returns list of checked and expanded instance names.
 593
 594   @type lu: L{LogicalUnit}
 595   @param lu: the logical unit on whose behalf we execute
 596   @type instances: list
 597   @param instances: list of instance names or None for all instances
 598   @rtype: list
 599   @return: the list of instances, sorted
 600   @raise errors.OpPrereqError: if the instances parameter is wrong type
 601   @raise errors.OpPrereqError: if any of the passed instances is not found
 602
 603   """
 604   if instances:
 605     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 606   else:
 607     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 608   return wanted
 609
 610
 611 def _GetUpdatedParams(old_params, update_dict,
 612                       use_default=True, use_none=False):
 613   """Return the new version of a parameter dictionary.
 614
 615   @type old_params: dict
 616   @param old_params: old parameters
 617   @type update_dict: dict
 618   @param update_dict: dict containing new parameter values, or
 619       constants.VALUE_DEFAULT to reset the parameter to its default
 620       value
 621   @param use_default: boolean
 622   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 623       values as 'to be deleted' values
 624   @param use_none: boolean
 625   @type use_none: whether to recognise C{None} values as 'to be
 626       deleted' values
 627   @rtype: dict
 628   @return: the new parameter dictionary
 629
 630   """
 631   params_copy = copy.deepcopy(old_params)
 632   for key, val in update_dict.iteritems():
 633     if ((use_default and val == constants.VALUE_DEFAULT) or
 634         (use_none and val is None)):
 635       try:
 636         del params_copy[key]
 637       except KeyError:
 638         pass
 639     else:
 640       params_copy[key] = val
 641   return params_copy
 642
 643
 644 def _RunPostHook(lu, node_name):
 645   """Runs the post-hook for an opcode on a single node.
 646
 647   """
 648   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 649   try:
 650     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 651   except:
 652     # pylint: disable-msg=W0702
 653     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 654
 655
 656 def _CheckOutputFields(static, dynamic, selected):
 657   """Checks whether all selected fields are valid.
 658
 659   @type static: L{utils.FieldSet}
 660   @param static: static fields set
 661   @type dynamic: L{utils.FieldSet}
 662   @param dynamic: dynamic fields set
 663
 664   """
 665   f = utils.FieldSet()
 666   f.Extend(static)
 667   f.Extend(dynamic)
 668
 669   delta = f.NonMatching(selected)
 670   if delta:
 671     raise errors.OpPrereqError("Unknown output fields selected: %s"
 672                                % ",".join(delta), errors.ECODE_INVAL)
 673
 674
 675 def _CheckGlobalHvParams(params):
 676   """Validates that given hypervisor params are not global ones.
 677
 678   This will ensure that instances don't get customised versions of
 679   global params.
 680
 681   """
 682   used_globals = constants.HVC_GLOBALS.intersection(params)
 683   if used_globals:
 684     msg = ("The following hypervisor parameters are global and cannot"
 685            " be customized at instance level, please modify them at"
 686            " cluster level: %s" % utils.CommaJoin(used_globals))
 687     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 688
 689
 690 def _CheckNodeOnline(lu, node, msg=None):
 691   """Ensure that a given node is online.
 692
 693   @param lu: the LU on behalf of which we make the check
 694   @param node: the node to check
 695   @param msg: if passed, should be a message to replace the default one
 696   @raise errors.OpPrereqError: if the node is offline
 697
 698   """
 699   if msg is None:
 700     msg = "Can't use offline node"
 701   if lu.cfg.GetNodeInfo(node).offline:
 702     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 703
 704
 705 def _CheckNodeNotDrained(lu, node):
 706   """Ensure that a given node is not drained.
 707
 708   @param lu: the LU on behalf of which we make the check
 709   @param node: the node to check
 710   @raise errors.OpPrereqError: if the node is drained
 711
 712   """
 713   if lu.cfg.GetNodeInfo(node).drained:
 714     raise errors.OpPrereqError("Can't use drained node %s" % node,
 715                                errors.ECODE_STATE)
 716
 717
 718 def _CheckNodeVmCapable(lu, node):
 719   """Ensure that a given node is vm capable.
 720
 721   @param lu: the LU on behalf of which we make the check
 722   @param node: the node to check
 723   @raise errors.OpPrereqError: if the node is not vm capable
 724
 725   """
 726   if not lu.cfg.GetNodeInfo(node).vm_capable:
 727     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 728                                errors.ECODE_STATE)
 729
 730
 731 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 732   """Ensure that a node supports a given OS.
 733
 734   @param lu: the LU on behalf of which we make the check
 735   @param node: the node to check
 736   @param os_name: the OS to query about
 737   @param force_variant: whether to ignore variant errors
 738   @raise errors.OpPrereqError: if the node is not supporting the OS
 739
 740   """
 741   result = lu.rpc.call_os_get(node, os_name)
 742   result.Raise("OS '%s' not in supported OS list for node %s" %
 743                (os_name, node),
 744                prereq=True, ecode=errors.ECODE_INVAL)
 745   if not force_variant:
 746     _CheckOSVariant(result.payload, os_name)
 747
 748
 749 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 750   """Ensure that a node has the given secondary ip.
 751
 752   @type lu: L{LogicalUnit}
 753   @param lu: the LU on behalf of which we make the check
 754   @type node: string
 755   @param node: the node to check
 756   @type secondary_ip: string
 757   @param secondary_ip: the ip to check
 758   @type prereq: boolean
 759   @param prereq: whether to throw a prerequisite or an execute error
 760   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 761   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 762
 763   """
 764   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 765   result.Raise("Failure checking secondary ip on node %s" % node,
 766                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 767   if not result.payload:
 768     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 769            " please fix and re-run this command" % secondary_ip)
 770     if prereq:
 771       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 772     else:
 773       raise errors.OpExecError(msg)
 774
 775
 776 def _GetClusterDomainSecret():
 777   """Reads the cluster domain secret.
 778
 779   """
 780   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 781                                strict=True)
 782
 783
 784 def _CheckInstanceDown(lu, instance, reason):
 785   """Ensure that an instance is not running."""
 786   if instance.admin_up:
 787     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 788                                (instance.name, reason), errors.ECODE_STATE)
 789
 790   pnode = instance.primary_node
 791   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 792   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 793               prereq=True, ecode=errors.ECODE_ENVIRON)
 794
 795   if instance.name in ins_l.payload:
 796     raise errors.OpPrereqError("Instance %s is running, %s" %
 797                                (instance.name, reason), errors.ECODE_STATE)
 798
 799
 800 def _ExpandItemName(fn, name, kind):
 801   """Expand an item name.
 802
 803   @param fn: the function to use for expansion
 804   @param name: requested item name
 805   @param kind: text description ('Node' or 'Instance')
 806   @return: the resolved (full) name
 807   @raise errors.OpPrereqError: if the item is not found
 808
 809   """
 810   full_name = fn(name)
 811   if full_name is None:
 812     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 813                                errors.ECODE_NOENT)
 814   return full_name
 815
 816
 817 def _ExpandNodeName(cfg, name):
 818   """Wrapper over L{_ExpandItemName} for nodes."""
 819   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 820
 821
 822 def _ExpandInstanceName(cfg, name):
 823   """Wrapper over L{_ExpandItemName} for instance."""
 824   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 825
 826
 827 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 828                           memory, vcpus, nics, disk_template, disks,
 829                           bep, hvp, hypervisor_name):
 830   """Builds instance related env variables for hooks
 831
 832   This builds the hook environment from individual variables.
 833
 834   @type name: string
 835   @param name: the name of the instance
 836   @type primary_node: string
 837   @param primary_node: the name of the instance's primary node
 838   @type secondary_nodes: list
 839   @param secondary_nodes: list of secondary nodes as strings
 840   @type os_type: string
 841   @param os_type: the name of the instance's OS
 842   @type status: boolean
 843   @param status: the should_run status of the instance
 844   @type memory: string
 845   @param memory: the memory size of the instance
 846   @type vcpus: string
 847   @param vcpus: the count of VCPUs the instance has
 848   @type nics: list
 849   @param nics: list of tuples (ip, mac, mode, link) representing
 850       the NICs the instance has
 851   @type disk_template: string
 852   @param disk_template: the disk template of the instance
 853   @type disks: list
 854   @param disks: the list of (size, mode) pairs
 855   @type bep: dict
 856   @param bep: the backend parameters for the instance
 857   @type hvp: dict
 858   @param hvp: the hypervisor parameters for the instance
 859   @type hypervisor_name: string
 860   @param hypervisor_name: the hypervisor for the instance
 861   @rtype: dict
 862   @return: the hook environment for this instance
 863
 864   """
 865   if status:
 866     str_status = "up"
 867   else:
 868     str_status = "down"
 869   env = {
 870     "OP_TARGET": name,
 871     "INSTANCE_NAME": name,
 872     "INSTANCE_PRIMARY": primary_node,
 873     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 874     "INSTANCE_OS_TYPE": os_type,
 875     "INSTANCE_STATUS": str_status,
 876     "INSTANCE_MEMORY": memory,
 877     "INSTANCE_VCPUS": vcpus,
 878     "INSTANCE_DISK_TEMPLATE": disk_template,
 879     "INSTANCE_HYPERVISOR": hypervisor_name,
 880   }
 881
 882   if nics:
 883     nic_count = len(nics)
 884     for idx, (ip, mac, mode, link) in enumerate(nics):
 885       if ip is None:
 886         ip = ""
 887       env["INSTANCE_NIC%d_IP" % idx] = ip
 888       env["INSTANCE_NIC%d_MAC" % idx] = mac
 889       env["INSTANCE_NIC%d_MODE" % idx] = mode
 890       env["INSTANCE_NIC%d_LINK" % idx] = link
 891       if mode == constants.NIC_MODE_BRIDGED:
 892         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 893   else:
 894     nic_count = 0
 895
 896   env["INSTANCE_NIC_COUNT"] = nic_count
 897
 898   if disks:
 899     disk_count = len(disks)
 900     for idx, (size, mode) in enumerate(disks):
 901       env["INSTANCE_DISK%d_SIZE" % idx] = size
 902       env["INSTANCE_DISK%d_MODE" % idx] = mode
 903   else:
 904     disk_count = 0
 905
 906   env["INSTANCE_DISK_COUNT"] = disk_count
 907
 908   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 909     for key, value in source.items():
 910       env["INSTANCE_%s_%s" % (kind, key)] = value
 911
 912   return env
 913
 914
 915 def _NICListToTuple(lu, nics):
 916   """Build a list of nic information tuples.
 917
 918   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 919   value in LUInstanceQueryData.
 920
 921   @type lu:  L{LogicalUnit}
 922   @param lu: the logical unit on whose behalf we execute
 923   @type nics: list of L{objects.NIC}
 924   @param nics: list of nics to convert to hooks tuples
 925
 926   """
 927   hooks_nics = []
 928   cluster = lu.cfg.GetClusterInfo()
 929   for nic in nics:
 930     ip = nic.ip
 931     mac = nic.mac
 932     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 933     mode = filled_params[constants.NIC_MODE]
 934     link = filled_params[constants.NIC_LINK]
 935     hooks_nics.append((ip, mac, mode, link))
 936   return hooks_nics
 937
 938
 939 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 940   """Builds instance related env variables for hooks from an object.
 941
 942   @type lu: L{LogicalUnit}
 943   @param lu: the logical unit on whose behalf we execute
 944   @type instance: L{objects.Instance}
 945   @param instance: the instance for which we should build the
 946       environment
 947   @type override: dict
 948   @param override: dictionary with key/values that will override
 949       our values
 950   @rtype: dict
 951   @return: the hook environment dictionary
 952
 953   """
 954   cluster = lu.cfg.GetClusterInfo()
 955   bep = cluster.FillBE(instance)
 956   hvp = cluster.FillHV(instance)
 957   args = {
 958     'name': instance.name,
 959     'primary_node': instance.primary_node,
 960     'secondary_nodes': instance.secondary_nodes,
 961     'os_type': instance.os,
 962     'status': instance.admin_up,
 963     'memory': bep[constants.BE_MEMORY],
 964     'vcpus': bep[constants.BE_VCPUS],
 965     'nics': _NICListToTuple(lu, instance.nics),
 966     'disk_template': instance.disk_template,
 967     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 968     'bep': bep,
 969     'hvp': hvp,
 970     'hypervisor_name': instance.hypervisor,
 971   }
 972   if override:
 973     args.update(override)
 974   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 975
 976
 977 def _AdjustCandidatePool(lu, exceptions):
 978   """Adjust the candidate pool after node operations.
 979
 980   """
 981   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 982   if mod_list:
 983     lu.LogInfo("Promoted nodes to master candidate role: %s",
 984                utils.CommaJoin(node.name for node in mod_list))
 985     for name in mod_list:
 986       lu.context.ReaddNode(name)
 987   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 988   if mc_now > mc_max:
 989     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 990                (mc_now, mc_max))
 991
 992
 993 def _DecideSelfPromotion(lu, exceptions=None):
 994   """Decide whether I should promote myself as a master candidate.
 995
 996   """
 997   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 998   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 999   # the new node will increase mc_max with one, so:
1000   mc_should = min(mc_should + 1, cp_size)
1001   return mc_now < mc_should
1002
1003
1004 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1005   """Check that the brigdes needed by a list of nics exist.
1006
1007   """
1008   cluster = lu.cfg.GetClusterInfo()
1009   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1010   brlist = [params[constants.NIC_LINK] for params in paramslist
1011             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1012   if brlist:
1013     result = lu.rpc.call_bridges_exist(target_node, brlist)
1014     result.Raise("Error checking bridges on destination node '%s'" %
1015                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1016
1017
1018 def _CheckInstanceBridgesExist(lu, instance, node=None):
1019   """Check that the brigdes needed by an instance exist.
1020
1021   """
1022   if node is None:
1023     node = instance.primary_node
1024   _CheckNicsBridgesExist(lu, instance.nics, node)
1025
1026
1027 def _CheckOSVariant(os_obj, name):
1028   """Check whether an OS name conforms to the os variants specification.
1029
1030   @type os_obj: L{objects.OS}
1031   @param os_obj: OS object to check
1032   @type name: string
1033   @param name: OS name passed by the user, to check for validity
1034
1035   """
1036   if not os_obj.supported_variants:
1037     return
1038   variant = objects.OS.GetVariant(name)
1039   if not variant:
1040     raise errors.OpPrereqError("OS name must include a variant",
1041                                errors.ECODE_INVAL)
1042
1043   if variant not in os_obj.supported_variants:
1044     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1045
1046
1047 def _GetNodeInstancesInner(cfg, fn):
1048   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1049
1050
1051 def _GetNodeInstances(cfg, node_name):
1052   """Returns a list of all primary and secondary instances on a node.
1053
1054   """
1055
1056   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1057
1058
1059 def _GetNodePrimaryInstances(cfg, node_name):
1060   """Returns primary instances on a node.
1061
1062   """
1063   return _GetNodeInstancesInner(cfg,
1064                                 lambda inst: node_name == inst.primary_node)
1065
1066
1067 def _GetNodeSecondaryInstances(cfg, node_name):
1068   """Returns secondary instances on a node.
1069
1070   """
1071   return _GetNodeInstancesInner(cfg,
1072                                 lambda inst: node_name in inst.secondary_nodes)
1073
1074
1075 def _GetStorageTypeArgs(cfg, storage_type):
1076   """Returns the arguments for a storage type.
1077
1078   """
1079   # Special case for file storage
1080   if storage_type == constants.ST_FILE:
1081     # storage.FileStorage wants a list of storage directories
1082     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1083
1084   return []
1085
1086
1087 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1088   faulty = []
1089
1090   for dev in instance.disks:
1091     cfg.SetDiskID(dev, node_name)
1092
1093   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1094   result.Raise("Failed to get disk status from node %s" % node_name,
1095                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1096
1097   for idx, bdev_status in enumerate(result.payload):
1098     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1099       faulty.append(idx)
1100
1101   return faulty
1102
1103
1104 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1105   """Check the sanity of iallocator and node arguments and use the
1106   cluster-wide iallocator if appropriate.
1107
1108   Check that at most one of (iallocator, node) is specified. If none is
1109   specified, then the LU's opcode's iallocator slot is filled with the
1110   cluster-wide default iallocator.
1111
1112   @type iallocator_slot: string
1113   @param iallocator_slot: the name of the opcode iallocator slot
1114   @type node_slot: string
1115   @param node_slot: the name of the opcode target node slot
1116
1117   """
1118   node = getattr(lu.op, node_slot, None)
1119   iallocator = getattr(lu.op, iallocator_slot, None)
1120
1121   if node is not None and iallocator is not None:
1122     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1123                                errors.ECODE_INVAL)
1124   elif node is None and iallocator is None:
1125     default_iallocator = lu.cfg.GetDefaultIAllocator()
1126     if default_iallocator:
1127       setattr(lu.op, iallocator_slot, default_iallocator)
1128     else:
1129       raise errors.OpPrereqError("No iallocator or node given and no"
1130                                  " cluster-wide default iallocator found."
1131                                  " Please specify either an iallocator or a"
1132                                  " node, or set a cluster-wide default"
1133                                  " iallocator.")
1134
1135
1136 class LUClusterPostInit(LogicalUnit):
1137   """Logical unit for running hooks after cluster initialization.
1138
1139   """
1140   HPATH = "cluster-init"
1141   HTYPE = constants.HTYPE_CLUSTER
1142
1143   def BuildHooksEnv(self):
1144     """Build hooks env.
1145
1146     """
1147     return {
1148       "OP_TARGET": self.cfg.GetClusterName(),
1149       }
1150
1151   def BuildHooksNodes(self):
1152     """Build hooks nodes.
1153
1154     """
1155     return ([], [self.cfg.GetMasterNode()])
1156
1157   def Exec(self, feedback_fn):
1158     """Nothing to do.
1159
1160     """
1161     return True
1162
1163
1164 class LUClusterDestroy(LogicalUnit):
1165   """Logical unit for destroying the cluster.
1166
1167   """
1168   HPATH = "cluster-destroy"
1169   HTYPE = constants.HTYPE_CLUSTER
1170
1171   def BuildHooksEnv(self):
1172     """Build hooks env.
1173
1174     """
1175     return {
1176       "OP_TARGET": self.cfg.GetClusterName(),
1177       }
1178
1179   def BuildHooksNodes(self):
1180     """Build hooks nodes.
1181
1182     """
1183     return ([], [])
1184
1185   def CheckPrereq(self):
1186     """Check prerequisites.
1187
1188     This checks whether the cluster is empty.
1189
1190     Any errors are signaled by raising errors.OpPrereqError.
1191
1192     """
1193     master = self.cfg.GetMasterNode()
1194
1195     nodelist = self.cfg.GetNodeList()
1196     if len(nodelist) != 1 or nodelist[0] != master:
1197       raise errors.OpPrereqError("There are still %d node(s) in"
1198                                  " this cluster." % (len(nodelist) - 1),
1199                                  errors.ECODE_INVAL)
1200     instancelist = self.cfg.GetInstanceList()
1201     if instancelist:
1202       raise errors.OpPrereqError("There are still %d instance(s) in"
1203                                  " this cluster." % len(instancelist),
1204                                  errors.ECODE_INVAL)
1205
1206   def Exec(self, feedback_fn):
1207     """Destroys the cluster.
1208
1209     """
1210     master = self.cfg.GetMasterNode()
1211
1212     # Run post hooks on master node before it's removed
1213     _RunPostHook(self, master)
1214
1215     result = self.rpc.call_node_stop_master(master, False)
1216     result.Raise("Could not disable the master role")
1217
1218     return master
1219
1220
1221 def _VerifyCertificate(filename):
1222   """Verifies a certificate for LUClusterVerify.
1223
1224   @type filename: string
1225   @param filename: Path to PEM file
1226
1227   """
1228   try:
1229     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1230                                            utils.ReadFile(filename))
1231   except Exception, err: # pylint: disable-msg=W0703
1232     return (LUClusterVerify.ETYPE_ERROR,
1233             "Failed to load X509 certificate %s: %s" % (filename, err))
1234
1235   (errcode, msg) = \
1236     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1237                                 constants.SSL_CERT_EXPIRATION_ERROR)
1238
1239   if msg:
1240     fnamemsg = "While verifying %s: %s" % (filename, msg)
1241   else:
1242     fnamemsg = None
1243
1244   if errcode is None:
1245     return (None, fnamemsg)
1246   elif errcode == utils.CERT_WARNING:
1247     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1248   elif errcode == utils.CERT_ERROR:
1249     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1250
1251   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1252
1253
1254 class LUClusterVerify(LogicalUnit):
1255   """Verifies the cluster status.
1256
1257   """
1258   HPATH = "cluster-verify"
1259   HTYPE = constants.HTYPE_CLUSTER
1260   REQ_BGL = False
1261
1262   TCLUSTER = "cluster"
1263   TNODE = "node"
1264   TINSTANCE = "instance"
1265
1266   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1267   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1268   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1269   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1270   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1271   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1272   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1273   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1274   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1275   ENODEDRBD = (TNODE, "ENODEDRBD")
1276   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1277   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1278   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1279   ENODEHV = (TNODE, "ENODEHV")
1280   ENODELVM = (TNODE, "ENODELVM")
1281   ENODEN1 = (TNODE, "ENODEN1")
1282   ENODENET = (TNODE, "ENODENET")
1283   ENODEOS = (TNODE, "ENODEOS")
1284   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1285   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1286   ENODERPC = (TNODE, "ENODERPC")
1287   ENODESSH = (TNODE, "ENODESSH")
1288   ENODEVERSION = (TNODE, "ENODEVERSION")
1289   ENODESETUP = (TNODE, "ENODESETUP")
1290   ENODETIME = (TNODE, "ENODETIME")
1291   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1292
1293   ETYPE_FIELD = "code"
1294   ETYPE_ERROR = "ERROR"
1295   ETYPE_WARNING = "WARNING"
1296
1297   _HOOKS_INDENT_RE = re.compile("^", re.M)
1298
1299   class NodeImage(object):
1300     """A class representing the logical and physical status of a node.
1301
1302     @type name: string
1303     @ivar name: the node name to which this object refers
1304     @ivar volumes: a structure as returned from
1305         L{ganeti.backend.GetVolumeList} (runtime)
1306     @ivar instances: a list of running instances (runtime)
1307     @ivar pinst: list of configured primary instances (config)
1308     @ivar sinst: list of configured secondary instances (config)
1309     @ivar sbp: dictionary of {primary-node: list of instances} for all
1310         instances for which this node is secondary (config)
1311     @ivar mfree: free memory, as reported by hypervisor (runtime)
1312     @ivar dfree: free disk, as reported by the node (runtime)
1313     @ivar offline: the offline status (config)
1314     @type rpc_fail: boolean
1315     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1316         not whether the individual keys were correct) (runtime)
1317     @type lvm_fail: boolean
1318     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1319     @type hyp_fail: boolean
1320     @ivar hyp_fail: whether the RPC call didn't return the instance list
1321     @type ghost: boolean
1322     @ivar ghost: whether this is a known node or not (config)
1323     @type os_fail: boolean
1324     @ivar os_fail: whether the RPC call didn't return valid OS data
1325     @type oslist: list
1326     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1327     @type vm_capable: boolean
1328     @ivar vm_capable: whether the node can host instances
1329
1330     """
1331     def __init__(self, offline=False, name=None, vm_capable=True):
1332       self.name = name
1333       self.volumes = {}
1334       self.instances = []
1335       self.pinst = []
1336       self.sinst = []
1337       self.sbp = {}
1338       self.mfree = 0
1339       self.dfree = 0
1340       self.offline = offline
1341       self.vm_capable = vm_capable
1342       self.rpc_fail = False
1343       self.lvm_fail = False
1344       self.hyp_fail = False
1345       self.ghost = False
1346       self.os_fail = False
1347       self.oslist = {}
1348
1349   def ExpandNames(self):
1350     self.needed_locks = {
1351       locking.LEVEL_NODE: locking.ALL_SET,
1352       locking.LEVEL_INSTANCE: locking.ALL_SET,
1353     }
1354     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1355
1356   def _Error(self, ecode, item, msg, *args, **kwargs):
1357     """Format an error message.
1358
1359     Based on the opcode's error_codes parameter, either format a
1360     parseable error code, or a simpler error string.
1361
1362     This must be called only from Exec and functions called from Exec.
1363
1364     """
1365     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1366     itype, etxt = ecode
1367     # first complete the msg
1368     if args:
1369       msg = msg % args
1370     # then format the whole message
1371     if self.op.error_codes:
1372       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1373     else:
1374       if item:
1375         item = " " + item
1376       else:
1377         item = ""
1378       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1379     # and finally report it via the feedback_fn
1380     self._feedback_fn("  - %s" % msg)
1381
1382   def _ErrorIf(self, cond, *args, **kwargs):
1383     """Log an error message if the passed condition is True.
1384
1385     """
1386     cond = bool(cond) or self.op.debug_simulate_errors
1387     if cond:
1388       self._Error(*args, **kwargs)
1389     # do not mark the operation as failed for WARN cases only
1390     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1391       self.bad = self.bad or cond
1392
1393   def _VerifyNode(self, ninfo, nresult):
1394     """Perform some basic validation on data returned from a node.
1395
1396       - check the result data structure is well formed and has all the
1397         mandatory fields
1398       - check ganeti version
1399
1400     @type ninfo: L{objects.Node}
1401     @param ninfo: the node to check
1402     @param nresult: the results from the node
1403     @rtype: boolean
1404     @return: whether overall this call was successful (and we can expect
1405          reasonable values in the respose)
1406
1407     """
1408     node = ninfo.name
1409     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1410
1411     # main result, nresult should be a non-empty dict
1412     test = not nresult or not isinstance(nresult, dict)
1413     _ErrorIf(test, self.ENODERPC, node,
1414                   "unable to verify node: no data returned")
1415     if test:
1416       return False
1417
1418     # compares ganeti version
1419     local_version = constants.PROTOCOL_VERSION
1420     remote_version = nresult.get("version", None)
1421     test = not (remote_version and
1422                 isinstance(remote_version, (list, tuple)) and
1423                 len(remote_version) == 2)
1424     _ErrorIf(test, self.ENODERPC, node,
1425              "connection to node returned invalid data")
1426     if test:
1427       return False
1428
1429     test = local_version != remote_version[0]
1430     _ErrorIf(test, self.ENODEVERSION, node,
1431              "incompatible protocol versions: master %s,"
1432              " node %s", local_version, remote_version[0])
1433     if test:
1434       return False
1435
1436     # node seems compatible, we can actually try to look into its results
1437
1438     # full package version
1439     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1440                   self.ENODEVERSION, node,
1441                   "software version mismatch: master %s, node %s",
1442                   constants.RELEASE_VERSION, remote_version[1],
1443                   code=self.ETYPE_WARNING)
1444
1445     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1446     if ninfo.vm_capable and isinstance(hyp_result, dict):
1447       for hv_name, hv_result in hyp_result.iteritems():
1448         test = hv_result is not None
1449         _ErrorIf(test, self.ENODEHV, node,
1450                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1451
1452     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1453     if ninfo.vm_capable and isinstance(hvp_result, list):
1454       for item, hv_name, hv_result in hvp_result:
1455         _ErrorIf(True, self.ENODEHV, node,
1456                  "hypervisor %s parameter verify failure (source %s): %s",
1457                  hv_name, item, hv_result)
1458
1459     test = nresult.get(constants.NV_NODESETUP,
1460                            ["Missing NODESETUP results"])
1461     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1462              "; ".join(test))
1463
1464     return True
1465
1466   def _VerifyNodeTime(self, ninfo, nresult,
1467                       nvinfo_starttime, nvinfo_endtime):
1468     """Check the node time.
1469
1470     @type ninfo: L{objects.Node}
1471     @param ninfo: the node to check
1472     @param nresult: the remote results for the node
1473     @param nvinfo_starttime: the start time of the RPC call
1474     @param nvinfo_endtime: the end time of the RPC call
1475
1476     """
1477     node = ninfo.name
1478     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1479
1480     ntime = nresult.get(constants.NV_TIME, None)
1481     try:
1482       ntime_merged = utils.MergeTime(ntime)
1483     except (ValueError, TypeError):
1484       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1485       return
1486
1487     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1488       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1489     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1490       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1491     else:
1492       ntime_diff = None
1493
1494     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1495              "Node time diverges by at least %s from master node time",
1496              ntime_diff)
1497
1498   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1499     """Check the node time.
1500
1501     @type ninfo: L{objects.Node}
1502     @param ninfo: the node to check
1503     @param nresult: the remote results for the node
1504     @param vg_name: the configured VG name
1505
1506     """
1507     if vg_name is None:
1508       return
1509
1510     node = ninfo.name
1511     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1512
1513     # checks vg existence and size > 20G
1514     vglist = nresult.get(constants.NV_VGLIST, None)
1515     test = not vglist
1516     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1517     if not test:
1518       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1519                                             constants.MIN_VG_SIZE)
1520       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1521
1522     # check pv names
1523     pvlist = nresult.get(constants.NV_PVLIST, None)
1524     test = pvlist is None
1525     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1526     if not test:
1527       # check that ':' is not present in PV names, since it's a
1528       # special character for lvcreate (denotes the range of PEs to
1529       # use on the PV)
1530       for _, pvname, owner_vg in pvlist:
1531         test = ":" in pvname
1532         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1533                  " '%s' of VG '%s'", pvname, owner_vg)
1534
1535   def _VerifyNodeNetwork(self, ninfo, nresult):
1536     """Check the node time.
1537
1538     @type ninfo: L{objects.Node}
1539     @param ninfo: the node to check
1540     @param nresult: the remote results for the node
1541
1542     """
1543     node = ninfo.name
1544     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1545
1546     test = constants.NV_NODELIST not in nresult
1547     _ErrorIf(test, self.ENODESSH, node,
1548              "node hasn't returned node ssh connectivity data")
1549     if not test:
1550       if nresult[constants.NV_NODELIST]:
1551         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1552           _ErrorIf(True, self.ENODESSH, node,
1553                    "ssh communication with node '%s': %s", a_node, a_msg)
1554
1555     test = constants.NV_NODENETTEST not in nresult
1556     _ErrorIf(test, self.ENODENET, node,
1557              "node hasn't returned node tcp connectivity data")
1558     if not test:
1559       if nresult[constants.NV_NODENETTEST]:
1560         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1561         for anode in nlist:
1562           _ErrorIf(True, self.ENODENET, node,
1563                    "tcp communication with node '%s': %s",
1564                    anode, nresult[constants.NV_NODENETTEST][anode])
1565
1566     test = constants.NV_MASTERIP not in nresult
1567     _ErrorIf(test, self.ENODENET, node,
1568              "node hasn't returned node master IP reachability data")
1569     if not test:
1570       if not nresult[constants.NV_MASTERIP]:
1571         if node == self.master_node:
1572           msg = "the master node cannot reach the master IP (not configured?)"
1573         else:
1574           msg = "cannot reach the master IP"
1575         _ErrorIf(True, self.ENODENET, node, msg)
1576
1577   def _VerifyInstance(self, instance, instanceconfig, node_image,
1578                       diskstatus):
1579     """Verify an instance.
1580
1581     This function checks to see if the required block devices are
1582     available on the instance's node.
1583
1584     """
1585     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1586     node_current = instanceconfig.primary_node
1587
1588     node_vol_should = {}
1589     instanceconfig.MapLVsByNode(node_vol_should)
1590
1591     for node in node_vol_should:
1592       n_img = node_image[node]
1593       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1594         # ignore missing volumes on offline or broken nodes
1595         continue
1596       for volume in node_vol_should[node]:
1597         test = volume not in n_img.volumes
1598         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1599                  "volume %s missing on node %s", volume, node)
1600
1601     if instanceconfig.admin_up:
1602       pri_img = node_image[node_current]
1603       test = instance not in pri_img.instances and not pri_img.offline
1604       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1605                "instance not running on its primary node %s",
1606                node_current)
1607
1608     for node, n_img in node_image.items():
1609       if node != node_current:
1610         test = instance in n_img.instances
1611         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1612                  "instance should not run on node %s", node)
1613
1614     diskdata = [(nname, success, status, idx)
1615                 for (nname, disks) in diskstatus.items()
1616                 for idx, (success, status) in enumerate(disks)]
1617
1618     for nname, success, bdev_status, idx in diskdata:
1619       # the 'ghost node' construction in Exec() ensures that we have a
1620       # node here
1621       snode = node_image[nname]
1622       bad_snode = snode.ghost or snode.offline
1623       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1624                self.EINSTANCEFAULTYDISK, instance,
1625                "couldn't retrieve status for disk/%s on %s: %s",
1626                idx, nname, bdev_status)
1627       _ErrorIf((instanceconfig.admin_up and success and
1628                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1629                self.EINSTANCEFAULTYDISK, instance,
1630                "disk/%s on %s is faulty", idx, nname)
1631
1632   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1633     """Verify if there are any unknown volumes in the cluster.
1634
1635     The .os, .swap and backup volumes are ignored. All other volumes are
1636     reported as unknown.
1637
1638     @type reserved: L{ganeti.utils.FieldSet}
1639     @param reserved: a FieldSet of reserved volume names
1640
1641     """
1642     for node, n_img in node_image.items():
1643       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1644         # skip non-healthy nodes
1645         continue
1646       for volume in n_img.volumes:
1647         test = ((node not in node_vol_should or
1648                 volume not in node_vol_should[node]) and
1649                 not reserved.Matches(volume))
1650         self._ErrorIf(test, self.ENODEORPHANLV, node,
1651                       "volume %s is unknown", volume)
1652
1653   def _VerifyOrphanInstances(self, instancelist, node_image):
1654     """Verify the list of running instances.
1655
1656     This checks what instances are running but unknown to the cluster.
1657
1658     """
1659     for node, n_img in node_image.items():
1660       for o_inst in n_img.instances:
1661         test = o_inst not in instancelist
1662         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1663                       "instance %s on node %s should not exist", o_inst, node)
1664
1665   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1666     """Verify N+1 Memory Resilience.
1667
1668     Check that if one single node dies we can still start all the
1669     instances it was primary for.
1670
1671     """
1672     cluster_info = self.cfg.GetClusterInfo()
1673     for node, n_img in node_image.items():
1674       # This code checks that every node which is now listed as
1675       # secondary has enough memory to host all instances it is
1676       # supposed to should a single other node in the cluster fail.
1677       # FIXME: not ready for failover to an arbitrary node
1678       # FIXME: does not support file-backed instances
1679       # WARNING: we currently take into account down instances as well
1680       # as up ones, considering that even if they're down someone
1681       # might want to start them even in the event of a node failure.
1682       if n_img.offline:
1683         # we're skipping offline nodes from the N+1 warning, since
1684         # most likely we don't have good memory infromation from them;
1685         # we already list instances living on such nodes, and that's
1686         # enough warning
1687         continue
1688       for prinode, instances in n_img.sbp.items():
1689         needed_mem = 0
1690         for instance in instances:
1691           bep = cluster_info.FillBE(instance_cfg[instance])
1692           if bep[constants.BE_AUTO_BALANCE]:
1693             needed_mem += bep[constants.BE_MEMORY]
1694         test = n_img.mfree < needed_mem
1695         self._ErrorIf(test, self.ENODEN1, node,
1696                       "not enough memory to accomodate instance failovers"
1697                       " should node %s fail", prinode)
1698
1699   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1700                        master_files):
1701     """Verifies and computes the node required file checksums.
1702
1703     @type ninfo: L{objects.Node}
1704     @param ninfo: the node to check
1705     @param nresult: the remote results for the node
1706     @param file_list: required list of files
1707     @param local_cksum: dictionary of local files and their checksums
1708     @param master_files: list of files that only masters should have
1709
1710     """
1711     node = ninfo.name
1712     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1713
1714     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1715     test = not isinstance(remote_cksum, dict)
1716     _ErrorIf(test, self.ENODEFILECHECK, node,
1717              "node hasn't returned file checksum data")
1718     if test:
1719       return
1720
1721     for file_name in file_list:
1722       node_is_mc = ninfo.master_candidate
1723       must_have = (file_name not in master_files) or node_is_mc
1724       # missing
1725       test1 = file_name not in remote_cksum
1726       # invalid checksum
1727       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1728       # existing and good
1729       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1730       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1731                "file '%s' missing", file_name)
1732       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1733                "file '%s' has wrong checksum", file_name)
1734       # not candidate and this is not a must-have file
1735       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1736                "file '%s' should not exist on non master"
1737                " candidates (and the file is outdated)", file_name)
1738       # all good, except non-master/non-must have combination
1739       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1740                "file '%s' should not exist"
1741                " on non master candidates", file_name)
1742
1743   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1744                       drbd_map):
1745     """Verifies and the node DRBD status.
1746
1747     @type ninfo: L{objects.Node}
1748     @param ninfo: the node to check
1749     @param nresult: the remote results for the node
1750     @param instanceinfo: the dict of instances
1751     @param drbd_helper: the configured DRBD usermode helper
1752     @param drbd_map: the DRBD map as returned by
1753         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1754
1755     """
1756     node = ninfo.name
1757     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1758
1759     if drbd_helper:
1760       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1761       test = (helper_result == None)
1762       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1763                "no drbd usermode helper returned")
1764       if helper_result:
1765         status, payload = helper_result
1766         test = not status
1767         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1768                  "drbd usermode helper check unsuccessful: %s", payload)
1769         test = status and (payload != drbd_helper)
1770         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1771                  "wrong drbd usermode helper: %s", payload)
1772
1773     # compute the DRBD minors
1774     node_drbd = {}
1775     for minor, instance in drbd_map[node].items():
1776       test = instance not in instanceinfo
1777       _ErrorIf(test, self.ECLUSTERCFG, None,
1778                "ghost instance '%s' in temporary DRBD map", instance)
1779         # ghost instance should not be running, but otherwise we
1780         # don't give double warnings (both ghost instance and
1781         # unallocated minor in use)
1782       if test:
1783         node_drbd[minor] = (instance, False)
1784       else:
1785         instance = instanceinfo[instance]
1786         node_drbd[minor] = (instance.name, instance.admin_up)
1787
1788     # and now check them
1789     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1790     test = not isinstance(used_minors, (tuple, list))
1791     _ErrorIf(test, self.ENODEDRBD, node,
1792              "cannot parse drbd status file: %s", str(used_minors))
1793     if test:
1794       # we cannot check drbd status
1795       return
1796
1797     for minor, (iname, must_exist) in node_drbd.items():
1798       test = minor not in used_minors and must_exist
1799       _ErrorIf(test, self.ENODEDRBD, node,
1800                "drbd minor %d of instance %s is not active", minor, iname)
1801     for minor in used_minors:
1802       test = minor not in node_drbd
1803       _ErrorIf(test, self.ENODEDRBD, node,
1804                "unallocated drbd minor %d is in use", minor)
1805
1806   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1807     """Builds the node OS structures.
1808
1809     @type ninfo: L{objects.Node}
1810     @param ninfo: the node to check
1811     @param nresult: the remote results for the node
1812     @param nimg: the node image object
1813
1814     """
1815     node = ninfo.name
1816     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1817
1818     remote_os = nresult.get(constants.NV_OSLIST, None)
1819     test = (not isinstance(remote_os, list) or
1820             not compat.all(isinstance(v, list) and len(v) == 7
1821                            for v in remote_os))
1822
1823     _ErrorIf(test, self.ENODEOS, node,
1824              "node hasn't returned valid OS data")
1825
1826     nimg.os_fail = test
1827
1828     if test:
1829       return
1830
1831     os_dict = {}
1832
1833     for (name, os_path, status, diagnose,
1834          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1835
1836       if name not in os_dict:
1837         os_dict[name] = []
1838
1839       # parameters is a list of lists instead of list of tuples due to
1840       # JSON lacking a real tuple type, fix it:
1841       parameters = [tuple(v) for v in parameters]
1842       os_dict[name].append((os_path, status, diagnose,
1843                             set(variants), set(parameters), set(api_ver)))
1844
1845     nimg.oslist = os_dict
1846
1847   def _VerifyNodeOS(self, ninfo, nimg, base):
1848     """Verifies the node OS list.
1849
1850     @type ninfo: L{objects.Node}
1851     @param ninfo: the node to check
1852     @param nimg: the node image object
1853     @param base: the 'template' node we match against (e.g. from the master)
1854
1855     """
1856     node = ninfo.name
1857     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1858
1859     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1860
1861     for os_name, os_data in nimg.oslist.items():
1862       assert os_data, "Empty OS status for OS %s?!" % os_name
1863       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1864       _ErrorIf(not f_status, self.ENODEOS, node,
1865                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1866       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1867                "OS '%s' has multiple entries (first one shadows the rest): %s",
1868                os_name, utils.CommaJoin([v[0] for v in os_data]))
1869       # this will catched in backend too
1870       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1871                and not f_var, self.ENODEOS, node,
1872                "OS %s with API at least %d does not declare any variant",
1873                os_name, constants.OS_API_V15)
1874       # comparisons with the 'base' image
1875       test = os_name not in base.oslist
1876       _ErrorIf(test, self.ENODEOS, node,
1877                "Extra OS %s not present on reference node (%s)",
1878                os_name, base.name)
1879       if test:
1880         continue
1881       assert base.oslist[os_name], "Base node has empty OS status?"
1882       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1883       if not b_status:
1884         # base OS is invalid, skipping
1885         continue
1886       for kind, a, b in [("API version", f_api, b_api),
1887                          ("variants list", f_var, b_var),
1888                          ("parameters", f_param, b_param)]:
1889         _ErrorIf(a != b, self.ENODEOS, node,
1890                  "OS %s %s differs from reference node %s: %s vs. %s",
1891                  kind, os_name, base.name,
1892                  utils.CommaJoin(a), utils.CommaJoin(b))
1893
1894     # check any missing OSes
1895     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1896     _ErrorIf(missing, self.ENODEOS, node,
1897              "OSes present on reference node %s but missing on this node: %s",
1898              base.name, utils.CommaJoin(missing))
1899
1900   def _VerifyOob(self, ninfo, nresult):
1901     """Verifies out of band functionality of a node.
1902
1903     @type ninfo: L{objects.Node}
1904     @param ninfo: the node to check
1905     @param nresult: the remote results for the node
1906
1907     """
1908     node = ninfo.name
1909     # We just have to verify the paths on master and/or master candidates
1910     # as the oob helper is invoked on the master
1911     if ((ninfo.master_candidate or ninfo.master_capable) and
1912         constants.NV_OOB_PATHS in nresult):
1913       for path_result in nresult[constants.NV_OOB_PATHS]:
1914         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1915
1916   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1917     """Verifies and updates the node volume data.
1918
1919     This function will update a L{NodeImage}'s internal structures
1920     with data from the remote call.
1921
1922     @type ninfo: L{objects.Node}
1923     @param ninfo: the node to check
1924     @param nresult: the remote results for the node
1925     @param nimg: the node image object
1926     @param vg_name: the configured VG name
1927
1928     """
1929     node = ninfo.name
1930     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1931
1932     nimg.lvm_fail = True
1933     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1934     if vg_name is None:
1935       pass
1936     elif isinstance(lvdata, basestring):
1937       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1938                utils.SafeEncode(lvdata))
1939     elif not isinstance(lvdata, dict):
1940       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1941     else:
1942       nimg.volumes = lvdata
1943       nimg.lvm_fail = False
1944
1945   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1946     """Verifies and updates the node instance list.
1947
1948     If the listing was successful, then updates this node's instance
1949     list. Otherwise, it marks the RPC call as failed for the instance
1950     list key.
1951
1952     @type ninfo: L{objects.Node}
1953     @param ninfo: the node to check
1954     @param nresult: the remote results for the node
1955     @param nimg: the node image object
1956
1957     """
1958     idata = nresult.get(constants.NV_INSTANCELIST, None)
1959     test = not isinstance(idata, list)
1960     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1961                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1962     if test:
1963       nimg.hyp_fail = True
1964     else:
1965       nimg.instances = idata
1966
1967   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1968     """Verifies and computes a node information map
1969
1970     @type ninfo: L{objects.Node}
1971     @param ninfo: the node to check
1972     @param nresult: the remote results for the node
1973     @param nimg: the node image object
1974     @param vg_name: the configured VG name
1975
1976     """
1977     node = ninfo.name
1978     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1979
1980     # try to read free memory (from the hypervisor)
1981     hv_info = nresult.get(constants.NV_HVINFO, None)
1982     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1983     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1984     if not test:
1985       try:
1986         nimg.mfree = int(hv_info["memory_free"])
1987       except (ValueError, TypeError):
1988         _ErrorIf(True, self.ENODERPC, node,
1989                  "node returned invalid nodeinfo, check hypervisor")
1990
1991     # FIXME: devise a free space model for file based instances as well
1992     if vg_name is not None:
1993       test = (constants.NV_VGLIST not in nresult or
1994               vg_name not in nresult[constants.NV_VGLIST])
1995       _ErrorIf(test, self.ENODELVM, node,
1996                "node didn't return data for the volume group '%s'"
1997                " - it is either missing or broken", vg_name)
1998       if not test:
1999         try:
2000           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2001         except (ValueError, TypeError):
2002           _ErrorIf(True, self.ENODERPC, node,
2003                    "node returned invalid LVM info, check LVM status")
2004
2005   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2006     """Gets per-disk status information for all instances.
2007
2008     @type nodelist: list of strings
2009     @param nodelist: Node names
2010     @type node_image: dict of (name, L{objects.Node})
2011     @param node_image: Node objects
2012     @type instanceinfo: dict of (name, L{objects.Instance})
2013     @param instanceinfo: Instance objects
2014     @rtype: {instance: {node: [(succes, payload)]}}
2015     @return: a dictionary of per-instance dictionaries with nodes as
2016         keys and disk information as values; the disk information is a
2017         list of tuples (success, payload)
2018
2019     """
2020     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2021
2022     node_disks = {}
2023     node_disks_devonly = {}
2024     diskless_instances = set()
2025     diskless = constants.DT_DISKLESS
2026
2027     for nname in nodelist:
2028       node_instances = list(itertools.chain(node_image[nname].pinst,
2029                                             node_image[nname].sinst))
2030       diskless_instances.update(inst for inst in node_instances
2031                                 if instanceinfo[inst].disk_template == diskless)
2032       disks = [(inst, disk)
2033                for inst in node_instances
2034                for disk in instanceinfo[inst].disks]
2035
2036       if not disks:
2037         # No need to collect data
2038         continue
2039
2040       node_disks[nname] = disks
2041
2042       # Creating copies as SetDiskID below will modify the objects and that can
2043       # lead to incorrect data returned from nodes
2044       devonly = [dev.Copy() for (_, dev) in disks]
2045
2046       for dev in devonly:
2047         self.cfg.SetDiskID(dev, nname)
2048
2049       node_disks_devonly[nname] = devonly
2050
2051     assert len(node_disks) == len(node_disks_devonly)
2052
2053     # Collect data from all nodes with disks
2054     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2055                                                           node_disks_devonly)
2056
2057     assert len(result) == len(node_disks)
2058
2059     instdisk = {}
2060
2061     for (nname, nres) in result.items():
2062       disks = node_disks[nname]
2063
2064       if nres.offline:
2065         # No data from this node
2066         data = len(disks) * [(False, "node offline")]
2067       else:
2068         msg = nres.fail_msg
2069         _ErrorIf(msg, self.ENODERPC, nname,
2070                  "while getting disk information: %s", msg)
2071         if msg:
2072           # No data from this node
2073           data = len(disks) * [(False, msg)]
2074         else:
2075           data = []
2076           for idx, i in enumerate(nres.payload):
2077             if isinstance(i, (tuple, list)) and len(i) == 2:
2078               data.append(i)
2079             else:
2080               logging.warning("Invalid result from node %s, entry %d: %s",
2081                               nname, idx, i)
2082               data.append((False, "Invalid result from the remote node"))
2083
2084       for ((inst, _), status) in zip(disks, data):
2085         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2086
2087     # Add empty entries for diskless instances.
2088     for inst in diskless_instances:
2089       assert inst not in instdisk
2090       instdisk[inst] = {}
2091
2092     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2093                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2094                       compat.all(isinstance(s, (tuple, list)) and
2095                                  len(s) == 2 for s in statuses)
2096                       for inst, nnames in instdisk.items()
2097                       for nname, statuses in nnames.items())
2098     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2099
2100     return instdisk
2101
2102   def _VerifyHVP(self, hvp_data):
2103     """Verifies locally the syntax of the hypervisor parameters.
2104
2105     """
2106     for item, hv_name, hv_params in hvp_data:
2107       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2108              (item, hv_name))
2109       try:
2110         hv_class = hypervisor.GetHypervisor(hv_name)
2111         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2112         hv_class.CheckParameterSyntax(hv_params)
2113       except errors.GenericError, err:
2114         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2115
2116   def BuildHooksEnv(self):
2117     """Build hooks env.
2118
2119     Cluster-Verify hooks just ran in the post phase and their failure makes
2120     the output be logged in the verify output and the verification to fail.
2121
2122     """
2123     cfg = self.cfg
2124
2125     env = {
2126       "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2127       }
2128
2129     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2130                for node in cfg.GetAllNodesInfo().values())
2131
2132     return env
2133
2134   def BuildHooksNodes(self):
2135     """Build hooks nodes.
2136
2137     """
2138     return ([], self.cfg.GetNodeList())
2139
2140   def Exec(self, feedback_fn):
2141     """Verify integrity of cluster, performing various test on nodes.
2142
2143     """
2144     # This method has too many local variables. pylint: disable-msg=R0914
2145     self.bad = False
2146     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2147     verbose = self.op.verbose
2148     self._feedback_fn = feedback_fn
2149     feedback_fn("* Verifying global settings")
2150     for msg in self.cfg.VerifyConfig():
2151       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2152
2153     # Check the cluster certificates
2154     for cert_filename in constants.ALL_CERT_FILES:
2155       (errcode, msg) = _VerifyCertificate(cert_filename)
2156       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2157
2158     vg_name = self.cfg.GetVGName()
2159     drbd_helper = self.cfg.GetDRBDHelper()
2160     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2161     cluster = self.cfg.GetClusterInfo()
2162     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2163     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2164     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2165     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2166     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2167                         for iname in instancelist)
2168     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2169     i_non_redundant = [] # Non redundant instances
2170     i_non_a_balanced = [] # Non auto-balanced instances
2171     n_offline = 0 # Count of offline nodes
2172     n_drained = 0 # Count of nodes being drained
2173     node_vol_should = {}
2174
2175     # FIXME: verify OS list
2176     # do local checksums
2177     master_files = [constants.CLUSTER_CONF_FILE]
2178     master_node = self.master_node = self.cfg.GetMasterNode()
2179     master_ip = self.cfg.GetMasterIP()
2180
2181     file_names = ssconf.SimpleStore().GetFileList()
2182     file_names.extend(constants.ALL_CERT_FILES)
2183     file_names.extend(master_files)
2184     if cluster.modify_etc_hosts:
2185       file_names.append(constants.ETC_HOSTS)
2186
2187     local_checksums = utils.FingerprintFiles(file_names)
2188
2189     # Compute the set of hypervisor parameters
2190     hvp_data = []
2191     for hv_name in hypervisors:
2192       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2193     for os_name, os_hvp in cluster.os_hvp.items():
2194       for hv_name, hv_params in os_hvp.items():
2195         if not hv_params:
2196           continue
2197         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2198         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2199     # TODO: collapse identical parameter values in a single one
2200     for instance in instanceinfo.values():
2201       if not instance.hvparams:
2202         continue
2203       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2204                        cluster.FillHV(instance)))
2205     # and verify them locally
2206     self._VerifyHVP(hvp_data)
2207
2208     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2209     node_verify_param = {
2210       constants.NV_FILELIST: file_names,
2211       constants.NV_NODELIST: [node.name for node in nodeinfo
2212                               if not node.offline],
2213       constants.NV_HYPERVISOR: hypervisors,
2214       constants.NV_HVPARAMS: hvp_data,
2215       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2216                                   node.secondary_ip) for node in nodeinfo
2217                                  if not node.offline],
2218       constants.NV_INSTANCELIST: hypervisors,
2219       constants.NV_VERSION: None,
2220       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2221       constants.NV_NODESETUP: None,
2222       constants.NV_TIME: None,
2223       constants.NV_MASTERIP: (master_node, master_ip),
2224       constants.NV_OSLIST: None,
2225       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2226       }
2227
2228     if vg_name is not None:
2229       node_verify_param[constants.NV_VGLIST] = None
2230       node_verify_param[constants.NV_LVLIST] = vg_name
2231       node_verify_param[constants.NV_PVLIST] = [vg_name]
2232       node_verify_param[constants.NV_DRBDLIST] = None
2233
2234     if drbd_helper:
2235       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2236
2237     # Build our expected cluster state
2238     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2239                                                  name=node.name,
2240                                                  vm_capable=node.vm_capable))
2241                       for node in nodeinfo)
2242
2243     # Gather OOB paths
2244     oob_paths = []
2245     for node in nodeinfo:
2246       path = _SupportsOob(self.cfg, node)
2247       if path and path not in oob_paths:
2248         oob_paths.append(path)
2249
2250     if oob_paths:
2251       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2252
2253     for instance in instancelist:
2254       inst_config = instanceinfo[instance]
2255
2256       for nname in inst_config.all_nodes:
2257         if nname not in node_image:
2258           # ghost node
2259           gnode = self.NodeImage(name=nname)
2260           gnode.ghost = True
2261           node_image[nname] = gnode
2262
2263       inst_config.MapLVsByNode(node_vol_should)
2264
2265       pnode = inst_config.primary_node
2266       node_image[pnode].pinst.append(instance)
2267
2268       for snode in inst_config.secondary_nodes:
2269         nimg = node_image[snode]
2270         nimg.sinst.append(instance)
2271         if pnode not in nimg.sbp:
2272           nimg.sbp[pnode] = []
2273         nimg.sbp[pnode].append(instance)
2274
2275     # At this point, we have the in-memory data structures complete,
2276     # except for the runtime information, which we'll gather next
2277
2278     # Due to the way our RPC system works, exact response times cannot be
2279     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2280     # time before and after executing the request, we can at least have a time
2281     # window.
2282     nvinfo_starttime = time.time()
2283     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2284                                            self.cfg.GetClusterName())
2285     nvinfo_endtime = time.time()
2286
2287     all_drbd_map = self.cfg.ComputeDRBDMap()
2288
2289     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2290     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2291
2292     feedback_fn("* Verifying node status")
2293
2294     refos_img = None
2295
2296     for node_i in nodeinfo:
2297       node = node_i.name
2298       nimg = node_image[node]
2299
2300       if node_i.offline:
2301         if verbose:
2302           feedback_fn("* Skipping offline node %s" % (node,))
2303         n_offline += 1
2304         continue
2305
2306       if node == master_node:
2307         ntype = "master"
2308       elif node_i.master_candidate:
2309         ntype = "master candidate"
2310       elif node_i.drained:
2311         ntype = "drained"
2312         n_drained += 1
2313       else:
2314         ntype = "regular"
2315       if verbose:
2316         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2317
2318       msg = all_nvinfo[node].fail_msg
2319       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2320       if msg:
2321         nimg.rpc_fail = True
2322         continue
2323
2324       nresult = all_nvinfo[node].payload
2325
2326       nimg.call_ok = self._VerifyNode(node_i, nresult)
2327       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2328       self._VerifyNodeNetwork(node_i, nresult)
2329       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2330                             master_files)
2331
2332       self._VerifyOob(node_i, nresult)
2333
2334       if nimg.vm_capable:
2335         self._VerifyNodeLVM(node_i, nresult, vg_name)
2336         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2337                              all_drbd_map)
2338
2339         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2340         self._UpdateNodeInstances(node_i, nresult, nimg)
2341         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2342         self._UpdateNodeOS(node_i, nresult, nimg)
2343         if not nimg.os_fail:
2344           if refos_img is None:
2345             refos_img = nimg
2346           self._VerifyNodeOS(node_i, nimg, refos_img)
2347
2348     feedback_fn("* Verifying instance status")
2349     for instance in instancelist:
2350       if verbose:
2351         feedback_fn("* Verifying instance %s" % instance)
2352       inst_config = instanceinfo[instance]
2353       self._VerifyInstance(instance, inst_config, node_image,
2354                            instdisk[instance])
2355       inst_nodes_offline = []
2356
2357       pnode = inst_config.primary_node
2358       pnode_img = node_image[pnode]
2359       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2360                self.ENODERPC, pnode, "instance %s, connection to"
2361                " primary node failed", instance)
2362
2363       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2364                self.EINSTANCEBADNODE, instance,
2365                "instance is marked as running and lives on offline node %s",
2366                inst_config.primary_node)
2367
2368       # If the instance is non-redundant we cannot survive losing its primary
2369       # node, so we are not N+1 compliant. On the other hand we have no disk
2370       # templates with more than one secondary so that situation is not well
2371       # supported either.
2372       # FIXME: does not support file-backed instances
2373       if not inst_config.secondary_nodes:
2374         i_non_redundant.append(instance)
2375
2376       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2377                instance, "instance has multiple secondary nodes: %s",
2378                utils.CommaJoin(inst_config.secondary_nodes),
2379                code=self.ETYPE_WARNING)
2380
2381       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2382         pnode = inst_config.primary_node
2383         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2384         instance_groups = {}
2385
2386         for node in instance_nodes:
2387           instance_groups.setdefault(nodeinfo_byname[node].group,
2388                                      []).append(node)
2389
2390         pretty_list = [
2391           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2392           # Sort so that we always list the primary node first.
2393           for group, nodes in sorted(instance_groups.items(),
2394                                      key=lambda (_, nodes): pnode in nodes,
2395                                      reverse=True)]
2396
2397         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2398                       instance, "instance has primary and secondary nodes in"
2399                       " different groups: %s", utils.CommaJoin(pretty_list),
2400                       code=self.ETYPE_WARNING)
2401
2402       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2403         i_non_a_balanced.append(instance)
2404
2405       for snode in inst_config.secondary_nodes:
2406         s_img = node_image[snode]
2407         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2408                  "instance %s, connection to secondary node failed", instance)
2409
2410         if s_img.offline:
2411           inst_nodes_offline.append(snode)
2412
2413       # warn that the instance lives on offline nodes
2414       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2415                "instance has offline secondary node(s) %s",
2416                utils.CommaJoin(inst_nodes_offline))
2417       # ... or ghost/non-vm_capable nodes
2418       for node in inst_config.all_nodes:
2419         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2420                  "instance lives on ghost node %s", node)
2421         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2422                  instance, "instance lives on non-vm_capable node %s", node)
2423
2424     feedback_fn("* Verifying orphan volumes")
2425     reserved = utils.FieldSet(*cluster.reserved_lvs)
2426     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2427
2428     feedback_fn("* Verifying orphan instances")
2429     self._VerifyOrphanInstances(instancelist, node_image)
2430
2431     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2432       feedback_fn("* Verifying N+1 Memory redundancy")
2433       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2434
2435     feedback_fn("* Other Notes")
2436     if i_non_redundant:
2437       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2438                   % len(i_non_redundant))
2439
2440     if i_non_a_balanced:
2441       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2442                   % len(i_non_a_balanced))
2443
2444     if n_offline:
2445       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2446
2447     if n_drained:
2448       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2449
2450     return not self.bad
2451
2452   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2453     """Analyze the post-hooks' result
2454
2455     This method analyses the hook result, handles it, and sends some
2456     nicely-formatted feedback back to the user.
2457
2458     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2459         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2460     @param hooks_results: the results of the multi-node hooks rpc call
2461     @param feedback_fn: function used send feedback back to the caller
2462     @param lu_result: previous Exec result
2463     @return: the new Exec result, based on the previous result
2464         and hook results
2465
2466     """
2467     # We only really run POST phase hooks, and are only interested in
2468     # their results
2469     if phase == constants.HOOKS_PHASE_POST:
2470       # Used to change hooks' output to proper indentation
2471       feedback_fn("* Hooks Results")
2472       assert hooks_results, "invalid result from hooks"
2473
2474       for node_name in hooks_results:
2475         res = hooks_results[node_name]
2476         msg = res.fail_msg
2477         test = msg and not res.offline
2478         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2479                       "Communication failure in hooks execution: %s", msg)
2480         if res.offline or msg:
2481           # No need to investigate payload if node is offline or gave an error.
2482           # override manually lu_result here as _ErrorIf only
2483           # overrides self.bad
2484           lu_result = 1
2485           continue
2486         for script, hkr, output in res.payload:
2487           test = hkr == constants.HKR_FAIL
2488           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2489                         "Script %s failed, output:", script)
2490           if test:
2491             output = self._HOOKS_INDENT_RE.sub('      ', output)
2492             feedback_fn("%s" % output)
2493             lu_result = 0
2494
2495       return lu_result
2496
2497
2498 class LUClusterVerifyDisks(NoHooksLU):
2499   """Verifies the cluster disks status.
2500
2501   """
2502   REQ_BGL = False
2503
2504   def ExpandNames(self):
2505     self.needed_locks = {
2506       locking.LEVEL_NODE: locking.ALL_SET,
2507       locking.LEVEL_INSTANCE: locking.ALL_SET,
2508     }
2509     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2510
2511   def Exec(self, feedback_fn):
2512     """Verify integrity of cluster disks.
2513
2514     @rtype: tuple of three items
2515     @return: a tuple of (dict of node-to-node_error, list of instances
2516         which need activate-disks, dict of instance: (node, volume) for
2517         missing volumes
2518
2519     """
2520     result = res_nodes, res_instances, res_missing = {}, [], {}
2521
2522     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2523     instances = self.cfg.GetAllInstancesInfo().values()
2524
2525     nv_dict = {}
2526     for inst in instances:
2527       inst_lvs = {}
2528       if not inst.admin_up:
2529         continue
2530       inst.MapLVsByNode(inst_lvs)
2531       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2532       for node, vol_list in inst_lvs.iteritems():
2533         for vol in vol_list:
2534           nv_dict[(node, vol)] = inst
2535
2536     if not nv_dict:
2537       return result
2538
2539     node_lvs = self.rpc.call_lv_list(nodes, [])
2540     for node, node_res in node_lvs.items():
2541       if node_res.offline:
2542         continue
2543       msg = node_res.fail_msg
2544       if msg:
2545         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2546         res_nodes[node] = msg
2547         continue
2548
2549       lvs = node_res.payload
2550       for lv_name, (_, _, lv_online) in lvs.items():
2551         inst = nv_dict.pop((node, lv_name), None)
2552         if (not lv_online and inst is not None
2553             and inst.name not in res_instances):
2554           res_instances.append(inst.name)
2555
2556     # any leftover items in nv_dict are missing LVs, let's arrange the
2557     # data better
2558     for key, inst in nv_dict.iteritems():
2559       if inst.name not in res_missing:
2560         res_missing[inst.name] = []
2561       res_missing[inst.name].append(key)
2562
2563     return result
2564
2565
2566 class LUClusterRepairDiskSizes(NoHooksLU):
2567   """Verifies the cluster disks sizes.
2568
2569   """
2570   REQ_BGL = False
2571
2572   def ExpandNames(self):
2573     if self.op.instances:
2574       self.wanted_names = []
2575       for name in self.op.instances:
2576         full_name = _ExpandInstanceName(self.cfg, name)
2577         self.wanted_names.append(full_name)
2578       self.needed_locks = {
2579         locking.LEVEL_NODE: [],
2580         locking.LEVEL_INSTANCE: self.wanted_names,
2581         }
2582       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2583     else:
2584       self.wanted_names = None
2585       self.needed_locks = {
2586         locking.LEVEL_NODE: locking.ALL_SET,
2587         locking.LEVEL_INSTANCE: locking.ALL_SET,
2588         }
2589     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2590
2591   def DeclareLocks(self, level):
2592     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2593       self._LockInstancesNodes(primary_only=True)
2594
2595   def CheckPrereq(self):
2596     """Check prerequisites.
2597
2598     This only checks the optional instance list against the existing names.
2599
2600     """
2601     if self.wanted_names is None:
2602       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2603
2604     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2605                              in self.wanted_names]
2606
2607   def _EnsureChildSizes(self, disk):
2608     """Ensure children of the disk have the needed disk size.
2609
2610     This is valid mainly for DRBD8 and fixes an issue where the
2611     children have smaller disk size.
2612
2613     @param disk: an L{ganeti.objects.Disk} object
2614
2615     """
2616     if disk.dev_type == constants.LD_DRBD8:
2617       assert disk.children, "Empty children for DRBD8?"
2618       fchild = disk.children[0]
2619       mismatch = fchild.size < disk.size
2620       if mismatch:
2621         self.LogInfo("Child disk has size %d, parent %d, fixing",
2622                      fchild.size, disk.size)
2623         fchild.size = disk.size
2624
2625       # and we recurse on this child only, not on the metadev
2626       return self._EnsureChildSizes(fchild) or mismatch
2627     else:
2628       return False
2629
2630   def Exec(self, feedback_fn):
2631     """Verify the size of cluster disks.
2632
2633     """
2634     # TODO: check child disks too
2635     # TODO: check differences in size between primary/secondary nodes
2636     per_node_disks = {}
2637     for instance in self.wanted_instances:
2638       pnode = instance.primary_node
2639       if pnode not in per_node_disks:
2640         per_node_disks[pnode] = []
2641       for idx, disk in enumerate(instance.disks):
2642         per_node_disks[pnode].append((instance, idx, disk))
2643
2644     changed = []
2645     for node, dskl in per_node_disks.items():
2646       newl = [v[2].Copy() for v in dskl]
2647       for dsk in newl:
2648         self.cfg.SetDiskID(dsk, node)
2649       result = self.rpc.call_blockdev_getsize(node, newl)
2650       if result.fail_msg:
2651         self.LogWarning("Failure in blockdev_getsize call to node"
2652                         " %s, ignoring", node)
2653         continue
2654       if len(result.payload) != len(dskl):
2655         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2656                         " result.payload=%s", node, len(dskl), result.payload)
2657         self.LogWarning("Invalid result from node %s, ignoring node results",
2658                         node)
2659         continue
2660       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2661         if size is None:
2662           self.LogWarning("Disk %d of instance %s did not return size"
2663                           " information, ignoring", idx, instance.name)
2664           continue
2665         if not isinstance(size, (int, long)):
2666           self.LogWarning("Disk %d of instance %s did not return valid"
2667                           " size information, ignoring", idx, instance.name)
2668           continue
2669         size = size >> 20
2670         if size != disk.size:
2671           self.LogInfo("Disk %d of instance %s has mismatched size,"
2672                        " correcting: recorded %d, actual %d", idx,
2673                        instance.name, disk.size, size)
2674           disk.size = size
2675           self.cfg.Update(instance, feedback_fn)
2676           changed.append((instance.name, idx, size))
2677         if self._EnsureChildSizes(disk):
2678           self.cfg.Update(instance, feedback_fn)
2679           changed.append((instance.name, idx, disk.size))
2680     return changed
2681
2682
2683 class LUClusterRename(LogicalUnit):
2684   """Rename the cluster.
2685
2686   """
2687   HPATH = "cluster-rename"
2688   HTYPE = constants.HTYPE_CLUSTER
2689
2690   def BuildHooksEnv(self):
2691     """Build hooks env.
2692
2693     """
2694     return {
2695       "OP_TARGET": self.cfg.GetClusterName(),
2696       "NEW_NAME": self.op.name,
2697       }
2698
2699   def BuildHooksNodes(self):
2700     """Build hooks nodes.
2701
2702     """
2703     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2704
2705   def CheckPrereq(self):
2706     """Verify that the passed name is a valid one.
2707
2708     """
2709     hostname = netutils.GetHostname(name=self.op.name,
2710                                     family=self.cfg.GetPrimaryIPFamily())
2711
2712     new_name = hostname.name
2713     self.ip = new_ip = hostname.ip
2714     old_name = self.cfg.GetClusterName()
2715     old_ip = self.cfg.GetMasterIP()
2716     if new_name == old_name and new_ip == old_ip:
2717       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2718                                  " cluster has changed",
2719                                  errors.ECODE_INVAL)
2720     if new_ip != old_ip:
2721       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2722         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2723                                    " reachable on the network" %
2724                                    new_ip, errors.ECODE_NOTUNIQUE)
2725
2726     self.op.name = new_name
2727
2728   def Exec(self, feedback_fn):
2729     """Rename the cluster.
2730
2731     """
2732     clustername = self.op.name
2733     ip = self.ip
2734
2735     # shutdown the master IP
2736     master = self.cfg.GetMasterNode()
2737     result = self.rpc.call_node_stop_master(master, False)
2738     result.Raise("Could not disable the master role")
2739
2740     try:
2741       cluster = self.cfg.GetClusterInfo()
2742       cluster.cluster_name = clustername
2743       cluster.master_ip = ip
2744       self.cfg.Update(cluster, feedback_fn)
2745
2746       # update the known hosts file
2747       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2748       node_list = self.cfg.GetOnlineNodeList()
2749       try:
2750         node_list.remove(master)
2751       except ValueError:
2752         pass
2753       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2754     finally:
2755       result = self.rpc.call_node_start_master(master, False, False)
2756       msg = result.fail_msg
2757       if msg:
2758         self.LogWarning("Could not re-enable the master role on"
2759                         " the master, please restart manually: %s", msg)
2760
2761     return clustername
2762
2763
2764 class LUClusterSetParams(LogicalUnit):
2765   """Change the parameters of the cluster.
2766
2767   """
2768   HPATH = "cluster-modify"
2769   HTYPE = constants.HTYPE_CLUSTER
2770   REQ_BGL = False
2771
2772   def CheckArguments(self):
2773     """Check parameters
2774
2775     """
2776     if self.op.uid_pool:
2777       uidpool.CheckUidPool(self.op.uid_pool)
2778
2779     if self.op.add_uids:
2780       uidpool.CheckUidPool(self.op.add_uids)
2781
2782     if self.op.remove_uids:
2783       uidpool.CheckUidPool(self.op.remove_uids)
2784
2785   def ExpandNames(self):
2786     # FIXME: in the future maybe other cluster params won't require checking on
2787     # all nodes to be modified.
2788     self.needed_locks = {
2789       locking.LEVEL_NODE: locking.ALL_SET,
2790     }
2791     self.share_locks[locking.LEVEL_NODE] = 1
2792
2793   def BuildHooksEnv(self):
2794     """Build hooks env.
2795
2796     """
2797     return {
2798       "OP_TARGET": self.cfg.GetClusterName(),
2799       "NEW_VG_NAME": self.op.vg_name,
2800       }
2801
2802   def BuildHooksNodes(self):
2803     """Build hooks nodes.
2804
2805     """
2806     mn = self.cfg.GetMasterNode()
2807     return ([mn], [mn])
2808
2809   def CheckPrereq(self):
2810     """Check prerequisites.
2811
2812     This checks whether the given params don't conflict and
2813     if the given volume group is valid.
2814
2815     """
2816     if self.op.vg_name is not None and not self.op.vg_name:
2817       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2818         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2819                                    " instances exist", errors.ECODE_INVAL)
2820
2821     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2822       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2823         raise errors.OpPrereqError("Cannot disable drbd helper while"
2824                                    " drbd-based instances exist",
2825                                    errors.ECODE_INVAL)
2826
2827     node_list = self.acquired_locks[locking.LEVEL_NODE]
2828
2829     # if vg_name not None, checks given volume group on all nodes
2830     if self.op.vg_name:
2831       vglist = self.rpc.call_vg_list(node_list)
2832       for node in node_list:
2833         msg = vglist[node].fail_msg
2834         if msg:
2835           # ignoring down node
2836           self.LogWarning("Error while gathering data on node %s"
2837                           " (ignoring node): %s", node, msg)
2838           continue
2839         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2840                                               self.op.vg_name,
2841                                               constants.MIN_VG_SIZE)
2842         if vgstatus:
2843           raise errors.OpPrereqError("Error on node '%s': %s" %
2844                                      (node, vgstatus), errors.ECODE_ENVIRON)
2845
2846     if self.op.drbd_helper:
2847       # checks given drbd helper on all nodes
2848       helpers = self.rpc.call_drbd_helper(node_list)
2849       for node in node_list:
2850         ninfo = self.cfg.GetNodeInfo(node)
2851         if ninfo.offline:
2852           self.LogInfo("Not checking drbd helper on offline node %s", node)
2853           continue
2854         msg = helpers[node].fail_msg
2855         if msg:
2856           raise errors.OpPrereqError("Error checking drbd helper on node"
2857                                      " '%s': %s" % (node, msg),
2858                                      errors.ECODE_ENVIRON)
2859         node_helper = helpers[node].payload
2860         if node_helper != self.op.drbd_helper:
2861           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2862                                      (node, node_helper), errors.ECODE_ENVIRON)
2863
2864     self.cluster = cluster = self.cfg.GetClusterInfo()
2865     # validate params changes
2866     if self.op.beparams:
2867       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2868       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2869
2870     if self.op.ndparams:
2871       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2872       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2873
2874     if self.op.nicparams:
2875       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2876       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2877       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2878       nic_errors = []
2879
2880       # check all instances for consistency
2881       for instance in self.cfg.GetAllInstancesInfo().values():
2882         for nic_idx, nic in enumerate(instance.nics):
2883           params_copy = copy.deepcopy(nic.nicparams)
2884           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2885
2886           # check parameter syntax
2887           try:
2888             objects.NIC.CheckParameterSyntax(params_filled)
2889           except errors.ConfigurationError, err:
2890             nic_errors.append("Instance %s, nic/%d: %s" %
2891                               (instance.name, nic_idx, err))
2892
2893           # if we're moving instances to routed, check that they have an ip
2894           target_mode = params_filled[constants.NIC_MODE]
2895           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2896             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2897                               (instance.name, nic_idx))
2898       if nic_errors:
2899         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2900                                    "\n".join(nic_errors))
2901
2902     # hypervisor list/parameters
2903     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2904     if self.op.hvparams:
2905       for hv_name, hv_dict in self.op.hvparams.items():
2906         if hv_name not in self.new_hvparams:
2907           self.new_hvparams[hv_name] = hv_dict
2908         else:
2909           self.new_hvparams[hv_name].update(hv_dict)
2910
2911     # os hypervisor parameters
2912     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2913     if self.op.os_hvp:
2914       for os_name, hvs in self.op.os_hvp.items():
2915         if os_name not in self.new_os_hvp:
2916           self.new_os_hvp[os_name] = hvs
2917         else:
2918           for hv_name, hv_dict in hvs.items():
2919             if hv_name not in self.new_os_hvp[os_name]:
2920               self.new_os_hvp[os_name][hv_name] = hv_dict
2921             else:
2922               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2923
2924     # os parameters
2925     self.new_osp = objects.FillDict(cluster.osparams, {})
2926     if self.op.osparams:
2927       for os_name, osp in self.op.osparams.items():
2928         if os_name not in self.new_osp:
2929           self.new_osp[os_name] = {}
2930
2931         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2932                                                   use_none=True)
2933
2934         if not self.new_osp[os_name]:
2935           # we removed all parameters
2936           del self.new_osp[os_name]
2937         else:
2938           # check the parameter validity (remote check)
2939           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2940                          os_name, self.new_osp[os_name])
2941
2942     # changes to the hypervisor list
2943     if self.op.enabled_hypervisors is not None:
2944       self.hv_list = self.op.enabled_hypervisors
2945       for hv in self.hv_list:
2946         # if the hypervisor doesn't already exist in the cluster
2947         # hvparams, we initialize it to empty, and then (in both
2948         # cases) we make sure to fill the defaults, as we might not
2949         # have a complete defaults list if the hypervisor wasn't
2950         # enabled before
2951         if hv not in new_hvp:
2952           new_hvp[hv] = {}
2953         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2954         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2955     else:
2956       self.hv_list = cluster.enabled_hypervisors
2957
2958     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2959       # either the enabled list has changed, or the parameters have, validate
2960       for hv_name, hv_params in self.new_hvparams.items():
2961         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2962             (self.op.enabled_hypervisors and
2963              hv_name in self.op.enabled_hypervisors)):
2964           # either this is a new hypervisor, or its parameters have changed
2965           hv_class = hypervisor.GetHypervisor(hv_name)
2966           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2967           hv_class.CheckParameterSyntax(hv_params)
2968           _CheckHVParams(self, node_list, hv_name, hv_params)
2969
2970     if self.op.os_hvp:
2971       # no need to check any newly-enabled hypervisors, since the
2972       # defaults have already been checked in the above code-block
2973       for os_name, os_hvp in self.new_os_hvp.items():
2974         for hv_name, hv_params in os_hvp.items():
2975           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2976           # we need to fill in the new os_hvp on top of the actual hv_p
2977           cluster_defaults = self.new_hvparams.get(hv_name, {})
2978           new_osp = objects.FillDict(cluster_defaults, hv_params)
2979           hv_class = hypervisor.GetHypervisor(hv_name)
2980           hv_class.CheckParameterSyntax(new_osp)
2981           _CheckHVParams(self, node_list, hv_name, new_osp)
2982
2983     if self.op.default_iallocator:
2984       alloc_script = utils.FindFile(self.op.default_iallocator,
2985                                     constants.IALLOCATOR_SEARCH_PATH,
2986                                     os.path.isfile)
2987       if alloc_script is None:
2988         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2989                                    " specified" % self.op.default_iallocator,
2990                                    errors.ECODE_INVAL)
2991
2992   def Exec(self, feedback_fn):
2993     """Change the parameters of the cluster.
2994
2995     """
2996     if self.op.vg_name is not None:
2997       new_volume = self.op.vg_name
2998       if not new_volume:
2999         new_volume = None
3000       if new_volume != self.cfg.GetVGName():
3001         self.cfg.SetVGName(new_volume)
3002       else:
3003         feedback_fn("Cluster LVM configuration already in desired"
3004                     " state, not changing")
3005     if self.op.drbd_helper is not None:
3006       new_helper = self.op.drbd_helper
3007       if not new_helper:
3008         new_helper = None
3009       if new_helper != self.cfg.GetDRBDHelper():
3010         self.cfg.SetDRBDHelper(new_helper)
3011       else:
3012         feedback_fn("Cluster DRBD helper already in desired state,"
3013                     " not changing")
3014     if self.op.hvparams:
3015       self.cluster.hvparams = self.new_hvparams
3016     if self.op.os_hvp:
3017       self.cluster.os_hvp = self.new_os_hvp
3018     if self.op.enabled_hypervisors is not None:
3019       self.cluster.hvparams = self.new_hvparams
3020       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3021     if self.op.beparams:
3022       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3023     if self.op.nicparams:
3024       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3025     if self.op.osparams:
3026       self.cluster.osparams = self.new_osp
3027     if self.op.ndparams:
3028       self.cluster.ndparams = self.new_ndparams
3029
3030     if self.op.candidate_pool_size is not None:
3031       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3032       # we need to update the pool size here, otherwise the save will fail
3033       _AdjustCandidatePool(self, [])
3034
3035     if self.op.maintain_node_health is not None:
3036       self.cluster.maintain_node_health = self.op.maintain_node_health
3037
3038     if self.op.prealloc_wipe_disks is not None:
3039       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3040
3041     if self.op.add_uids is not None:
3042       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3043
3044     if self.op.remove_uids is not None:
3045       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3046
3047     if self.op.uid_pool is not None:
3048       self.cluster.uid_pool = self.op.uid_pool
3049
3050     if self.op.default_iallocator is not None:
3051       self.cluster.default_iallocator = self.op.default_iallocator
3052
3053     if self.op.reserved_lvs is not None:
3054       self.cluster.reserved_lvs = self.op.reserved_lvs
3055
3056     def helper_os(aname, mods, desc):
3057       desc += " OS list"
3058       lst = getattr(self.cluster, aname)
3059       for key, val in mods:
3060         if key == constants.DDM_ADD:
3061           if val in lst:
3062             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3063           else:
3064             lst.append(val)
3065         elif key == constants.DDM_REMOVE:
3066           if val in lst:
3067             lst.remove(val)
3068           else:
3069             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3070         else:
3071           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3072
3073     if self.op.hidden_os:
3074       helper_os("hidden_os", self.op.hidden_os, "hidden")
3075
3076     if self.op.blacklisted_os:
3077       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3078
3079     if self.op.master_netdev:
3080       master = self.cfg.GetMasterNode()
3081       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3082                   self.cluster.master_netdev)
3083       result = self.rpc.call_node_stop_master(master, False)
3084       result.Raise("Could not disable the master ip")
3085       feedback_fn("Changing master_netdev from %s to %s" %
3086                   (self.cluster.master_netdev, self.op.master_netdev))
3087       self.cluster.master_netdev = self.op.master_netdev
3088
3089     self.cfg.Update(self.cluster, feedback_fn)
3090
3091     if self.op.master_netdev:
3092       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3093                   self.op.master_netdev)
3094       result = self.rpc.call_node_start_master(master, False, False)
3095       if result.fail_msg:
3096         self.LogWarning("Could not re-enable the master ip on"
3097                         " the master, please restart manually: %s",
3098                         result.fail_msg)
3099
3100
3101 def _UploadHelper(lu, nodes, fname):
3102   """Helper for uploading a file and showing warnings.
3103
3104   """
3105   if os.path.exists(fname):
3106     result = lu.rpc.call_upload_file(nodes, fname)
3107     for to_node, to_result in result.items():
3108       msg = to_result.fail_msg
3109       if msg:
3110         msg = ("Copy of file %s to node %s failed: %s" %
3111                (fname, to_node, msg))
3112         lu.proc.LogWarning(msg)
3113
3114
3115 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3116   """Distribute additional files which are part of the cluster configuration.
3117
3118   ConfigWriter takes care of distributing the config and ssconf files, but
3119   there are more files which should be distributed to all nodes. This function
3120   makes sure those are copied.
3121
3122   @param lu: calling logical unit
3123   @param additional_nodes: list of nodes not in the config to distribute to
3124   @type additional_vm: boolean
3125   @param additional_vm: whether the additional nodes are vm-capable or not
3126
3127   """
3128   # 1. Gather target nodes
3129   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3130   dist_nodes = lu.cfg.GetOnlineNodeList()
3131   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3132   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3133   if additional_nodes is not None:
3134     dist_nodes.extend(additional_nodes)
3135     if additional_vm:
3136       vm_nodes.extend(additional_nodes)
3137   if myself.name in dist_nodes:
3138     dist_nodes.remove(myself.name)
3139   if myself.name in vm_nodes:
3140     vm_nodes.remove(myself.name)
3141
3142   # 2. Gather files to distribute
3143   dist_files = set([constants.ETC_HOSTS,
3144                     constants.SSH_KNOWN_HOSTS_FILE,
3145                     constants.RAPI_CERT_FILE,
3146                     constants.RAPI_USERS_FILE,
3147                     constants.CONFD_HMAC_KEY,
3148                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3149                    ])
3150
3151   vm_files = set()
3152   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3153   for hv_name in enabled_hypervisors:
3154     hv_class = hypervisor.GetHypervisor(hv_name)
3155     vm_files.update(hv_class.GetAncillaryFiles())
3156
3157   # 3. Perform the files upload
3158   for fname in dist_files:
3159     _UploadHelper(lu, dist_nodes, fname)
3160   for fname in vm_files:
3161     _UploadHelper(lu, vm_nodes, fname)
3162
3163
3164 class LUClusterRedistConf(NoHooksLU):
3165   """Force the redistribution of cluster configuration.
3166
3167   This is a very simple LU.
3168
3169   """
3170   REQ_BGL = False
3171
3172   def ExpandNames(self):
3173     self.needed_locks = {
3174       locking.LEVEL_NODE: locking.ALL_SET,
3175     }
3176     self.share_locks[locking.LEVEL_NODE] = 1
3177
3178   def Exec(self, feedback_fn):
3179     """Redistribute the configuration.
3180
3181     """
3182     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3183     _RedistributeAncillaryFiles(self)
3184
3185
3186 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3187   """Sleep and poll for an instance's disk to sync.
3188
3189   """
3190   if not instance.disks or disks is not None and not disks:
3191     return True
3192
3193   disks = _ExpandCheckDisks(instance, disks)
3194
3195   if not oneshot:
3196     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3197
3198   node = instance.primary_node
3199
3200   for dev in disks:
3201     lu.cfg.SetDiskID(dev, node)
3202
3203   # TODO: Convert to utils.Retry
3204
3205   retries = 0
3206   degr_retries = 10 # in seconds, as we sleep 1 second each time
3207   while True:
3208     max_time = 0
3209     done = True
3210     cumul_degraded = False
3211     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3212     msg = rstats.fail_msg
3213     if msg:
3214       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3215       retries += 1
3216       if retries >= 10:
3217         raise errors.RemoteError("Can't contact node %s for mirror data,"
3218                                  " aborting." % node)
3219       time.sleep(6)
3220       continue
3221     rstats = rstats.payload
3222     retries = 0
3223     for i, mstat in enumerate(rstats):
3224       if mstat is None:
3225         lu.LogWarning("Can't compute data for node %s/%s",
3226                            node, disks[i].iv_name)
3227         continue
3228
3229       cumul_degraded = (cumul_degraded or
3230                         (mstat.is_degraded and mstat.sync_percent is None))
3231       if mstat.sync_percent is not None:
3232         done = False
3233         if mstat.estimated_time is not None:
3234           rem_time = ("%s remaining (estimated)" %
3235                       utils.FormatSeconds(mstat.estimated_time))
3236           max_time = mstat.estimated_time
3237         else:
3238           rem_time = "no time estimate"
3239         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3240                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3241
3242     # if we're done but degraded, let's do a few small retries, to
3243     # make sure we see a stable and not transient situation; therefore
3244     # we force restart of the loop
3245     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3246       logging.info("Degraded disks found, %d retries left", degr_retries)
3247       degr_retries -= 1
3248       time.sleep(1)
3249       continue
3250
3251     if done or oneshot:
3252       break
3253
3254     time.sleep(min(60, max_time))
3255
3256   if done:
3257     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3258   return not cumul_degraded
3259
3260
3261 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3262   """Check that mirrors are not degraded.
3263
3264   The ldisk parameter, if True, will change the test from the
3265   is_degraded attribute (which represents overall non-ok status for
3266   the device(s)) to the ldisk (representing the local storage status).
3267
3268   """
3269   lu.cfg.SetDiskID(dev, node)
3270
3271   result = True
3272
3273   if on_primary or dev.AssembleOnSecondary():
3274     rstats = lu.rpc.call_blockdev_find(node, dev)
3275     msg = rstats.fail_msg
3276     if msg:
3277       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3278       result = False
3279     elif not rstats.payload:
3280       lu.LogWarning("Can't find disk on node %s", node)
3281       result = False
3282     else:
3283       if ldisk:
3284         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3285       else:
3286         result = result and not rstats.payload.is_degraded
3287
3288   if dev.children:
3289     for child in dev.children:
3290       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3291
3292   return result
3293
3294
3295 class LUOobCommand(NoHooksLU):
3296   """Logical unit for OOB handling.
3297
3298   """
3299   REG_BGL = False
3300   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3301
3302   def CheckPrereq(self):
3303     """Check prerequisites.
3304
3305     This checks:
3306      - the node exists in the configuration
3307      - OOB is supported
3308
3309     Any errors are signaled by raising errors.OpPrereqError.
3310
3311     """
3312     self.nodes = []
3313     self.master_node = self.cfg.GetMasterNode()
3314
3315     assert self.op.power_delay >= 0.0
3316
3317     if self.op.node_names:
3318       if self.op.command in self._SKIP_MASTER:
3319         if self.master_node in self.op.node_names:
3320           master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3321           master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3322
3323           if master_oob_handler:
3324             additional_text = ("Run '%s %s %s' if you want to operate on the"
3325                                " master regardless") % (master_oob_handler,
3326                                                         self.op.command,
3327                                                         self.master_node)
3328           else:
3329             additional_text = "The master node does not support out-of-band"
3330
3331           raise errors.OpPrereqError(("Operating on the master node %s is not"
3332                                       " allowed for %s\n%s") %
3333                                      (self.master_node, self.op.command,
3334                                       additional_text), errors.ECODE_INVAL)
3335     else:
3336       self.op.node_names = self.cfg.GetNodeList()
3337       if self.op.command in self._SKIP_MASTER:
3338         self.op.node_names.remove(self.master_node)
3339
3340     if self.op.command in self._SKIP_MASTER:
3341       assert self.master_node not in self.op.node_names
3342
3343     for node_name in self.op.node_names:
3344       node = self.cfg.GetNodeInfo(node_name)
3345
3346       if node is None:
3347         raise errors.OpPrereqError("Node %s not found" % node_name,
3348                                    errors.ECODE_NOENT)
3349       else:
3350         self.nodes.append(node)
3351
3352       if (not self.op.ignore_status and
3353           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3354         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3355                                     " not marked offline") % node_name,
3356                                    errors.ECODE_STATE)
3357
3358   def ExpandNames(self):
3359     """Gather locks we need.
3360
3361     """
3362     if self.op.node_names:
3363       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3364                             for name in self.op.node_names]
3365       lock_names = self.op.node_names
3366     else:
3367       lock_names = locking.ALL_SET
3368
3369     self.needed_locks = {
3370       locking.LEVEL_NODE: lock_names,
3371       }
3372
3373   def Exec(self, feedback_fn):
3374     """Execute OOB and return result if we expect any.
3375
3376     """
3377     master_node = self.master_node
3378     ret = []
3379
3380     for idx, node in enumerate(self.nodes):
3381       node_entry = [(constants.RS_NORMAL, node.name)]
3382       ret.append(node_entry)
3383
3384       oob_program = _SupportsOob(self.cfg, node)
3385
3386       if not oob_program:
3387         node_entry.append((constants.RS_UNAVAIL, None))
3388         continue
3389
3390       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3391                    self.op.command, oob_program, node.name)
3392       result = self.rpc.call_run_oob(master_node, oob_program,
3393                                      self.op.command, node.name,
3394                                      self.op.timeout)
3395
3396       if result.fail_msg:
3397         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3398                         node.name, result.fail_msg)
3399         node_entry.append((constants.RS_NODATA, None))
3400       else:
3401         try:
3402           self._CheckPayload(result)
3403         except errors.OpExecError, err:
3404           self.LogWarning("The payload returned by '%s' is not valid: %s",
3405                           node.name, err)
3406           node_entry.append((constants.RS_NODATA, None))
3407         else:
3408           if self.op.command == constants.OOB_HEALTH:
3409             # For health we should log important events
3410             for item, status in result.payload:
3411               if status in [constants.OOB_STATUS_WARNING,
3412                             constants.OOB_STATUS_CRITICAL]:
3413                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3414                                 node.name, item, status)
3415
3416           if self.op.command == constants.OOB_POWER_ON:
3417             node.powered = True
3418           elif self.op.command == constants.OOB_POWER_OFF:
3419             node.powered = False
3420           elif self.op.command == constants.OOB_POWER_STATUS:
3421             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3422             if powered != node.powered:
3423               logging.warning(("Recorded power state (%s) of node '%s' does not"
3424                                " match actual power state (%s)"), node.powered,
3425                               node.name, powered)
3426
3427           # For configuration changing commands we should update the node
3428           if self.op.command in (constants.OOB_POWER_ON,
3429                                  constants.OOB_POWER_OFF):
3430             self.cfg.Update(node, feedback_fn)
3431
3432           node_entry.append((constants.RS_NORMAL, result.payload))
3433
3434           if (self.op.command == constants.OOB_POWER_ON and
3435               idx < len(self.nodes) - 1):
3436             time.sleep(self.op.power_delay)
3437
3438     return ret
3439
3440   def _CheckPayload(self, result):
3441     """Checks if the payload is valid.
3442
3443     @param result: RPC result
3444     @raises errors.OpExecError: If payload is not valid
3445
3446     """
3447     errs = []
3448     if self.op.command == constants.OOB_HEALTH:
3449       if not isinstance(result.payload, list):
3450         errs.append("command 'health' is expected to return a list but got %s" %
3451                     type(result.payload))
3452       else:
3453         for item, status in result.payload:
3454           if status not in constants.OOB_STATUSES:
3455             errs.append("health item '%s' has invalid status '%s'" %
3456                         (item, status))
3457
3458     if self.op.command == constants.OOB_POWER_STATUS:
3459       if not isinstance(result.payload, dict):
3460         errs.append("power-status is expected to return a dict but got %s" %
3461                     type(result.payload))
3462
3463     if self.op.command in [
3464         constants.OOB_POWER_ON,
3465         constants.OOB_POWER_OFF,
3466         constants.OOB_POWER_CYCLE,
3467         ]:
3468       if result.payload is not None:
3469         errs.append("%s is expected to not return payload but got '%s'" %
3470                     (self.op.command, result.payload))
3471
3472     if errs:
3473       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3474                                utils.CommaJoin(errs))
3475
3476 class _OsQuery(_QueryBase):
3477   FIELDS = query.OS_FIELDS
3478
3479   def ExpandNames(self, lu):
3480     # Lock all nodes in shared mode
3481     # Temporary removal of locks, should be reverted later
3482     # TODO: reintroduce locks when they are lighter-weight
3483     lu.needed_locks = {}
3484     #self.share_locks[locking.LEVEL_NODE] = 1
3485     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3486
3487     # The following variables interact with _QueryBase._GetNames
3488     if self.names:
3489       self.wanted = self.names
3490     else:
3491       self.wanted = locking.ALL_SET
3492
3493     self.do_locking = self.use_locking
3494
3495   def DeclareLocks(self, lu, level):
3496     pass
3497
3498   @staticmethod
3499   def _DiagnoseByOS(rlist):
3500     """Remaps a per-node return list into an a per-os per-node dictionary
3501
3502     @param rlist: a map with node names as keys and OS objects as values
3503
3504     @rtype: dict
3505     @return: a dictionary with osnames as keys and as value another
3506         map, with nodes as keys and tuples of (path, status, diagnose,
3507         variants, parameters, api_versions) as values, eg::
3508
3509           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3510                                      (/srv/..., False, "invalid api")],
3511                            "node2": [(/srv/..., True, "", [], [])]}
3512           }
3513
3514     """
3515     all_os = {}
3516     # we build here the list of nodes that didn't fail the RPC (at RPC
3517     # level), so that nodes with a non-responding node daemon don't
3518     # make all OSes invalid
3519     good_nodes = [node_name for node_name in rlist
3520                   if not rlist[node_name].fail_msg]
3521     for node_name, nr in rlist.items():
3522       if nr.fail_msg or not nr.payload:
3523         continue
3524       for (name, path, status, diagnose, variants,
3525            params, api_versions) in nr.payload:
3526         if name not in all_os:
3527           # build a list of nodes for this os containing empty lists
3528           # for each node in node_list
3529           all_os[name] = {}
3530           for nname in good_nodes:
3531             all_os[name][nname] = []
3532         # convert params from [name, help] to (name, help)
3533         params = [tuple(v) for v in params]
3534         all_os[name][node_name].append((path, status, diagnose,
3535                                         variants, params, api_versions))
3536     return all_os
3537
3538   def _GetQueryData(self, lu):
3539     """Computes the list of nodes and their attributes.
3540
3541     """
3542     # Locking is not used
3543     assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3544
3545     valid_nodes = [node.name
3546                    for node in lu.cfg.GetAllNodesInfo().values()
3547                    if not node.offline and node.vm_capable]
3548     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3549     cluster = lu.cfg.GetClusterInfo()
3550
3551     data = {}
3552
3553     for (os_name, os_data) in pol.items():
3554       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3555                           hidden=(os_name in cluster.hidden_os),
3556                           blacklisted=(os_name in cluster.blacklisted_os))
3557
3558       variants = set()
3559       parameters = set()
3560       api_versions = set()
3561
3562       for idx, osl in enumerate(os_data.values()):
3563         info.valid = bool(info.valid and osl and osl[0][1])
3564         if not info.valid:
3565           break
3566
3567         (node_variants, node_params, node_api) = osl[0][3:6]
3568         if idx == 0:
3569           # First entry
3570           variants.update(node_variants)
3571           parameters.update(node_params)
3572           api_versions.update(node_api)
3573         else:
3574           # Filter out inconsistent values
3575           variants.intersection_update(node_variants)
3576           parameters.intersection_update(node_params)
3577           api_versions.intersection_update(node_api)
3578
3579       info.variants = list(variants)
3580       info.parameters = list(parameters)
3581       info.api_versions = list(api_versions)
3582
3583       data[os_name] = info
3584
3585     # Prepare data in requested order
3586     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3587             if name in data]
3588
3589
3590 class LUOsDiagnose(NoHooksLU):
3591   """Logical unit for OS diagnose/query.
3592
3593   """
3594   REQ_BGL = False
3595
3596   @staticmethod
3597   def _BuildFilter(fields, names):
3598     """Builds a filter for querying OSes.
3599
3600     """
3601     name_filter = qlang.MakeSimpleFilter("name", names)
3602
3603     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3604     # respective field is not requested
3605     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3606                      for fname in ["hidden", "blacklisted"]
3607                      if fname not in fields]
3608     if "valid" not in fields:
3609       status_filter.append([qlang.OP_TRUE, "valid"])
3610
3611     if status_filter:
3612       status_filter.insert(0, qlang.OP_AND)
3613     else:
3614       status_filter = None
3615
3616     if name_filter and status_filter:
3617       return [qlang.OP_AND, name_filter, status_filter]
3618     elif name_filter:
3619       return name_filter
3620     else:
3621       return status_filter
3622
3623   def CheckArguments(self):
3624     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3625                        self.op.output_fields, False)
3626
3627   def ExpandNames(self):
3628     self.oq.ExpandNames(self)
3629
3630   def Exec(self, feedback_fn):
3631     return self.oq.OldStyleQuery(self)
3632
3633
3634 class LUNodeRemove(LogicalUnit):
3635   """Logical unit for removing a node.
3636
3637   """
3638   HPATH = "node-remove"
3639   HTYPE = constants.HTYPE_NODE
3640
3641   def BuildHooksEnv(self):
3642     """Build hooks env.
3643
3644     This doesn't run on the target node in the pre phase as a failed
3645     node would then be impossible to remove.
3646
3647     """
3648     return {
3649       "OP_TARGET": self.op.node_name,
3650       "NODE_NAME": self.op.node_name,
3651       }
3652
3653   def BuildHooksNodes(self):
3654     """Build hooks nodes.
3655
3656     """
3657     all_nodes = self.cfg.GetNodeList()
3658     try:
3659       all_nodes.remove(self.op.node_name)
3660     except ValueError:
3661       logging.warning("Node '%s', which is about to be removed, was not found"
3662                       " in the list of all nodes", self.op.node_name)
3663     return (all_nodes, all_nodes)
3664
3665   def CheckPrereq(self):
3666     """Check prerequisites.
3667
3668     This checks:
3669      - the node exists in the configuration
3670      - it does not have primary or secondary instances
3671      - it's not the master
3672
3673     Any errors are signaled by raising errors.OpPrereqError.
3674
3675     """
3676     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3677     node = self.cfg.GetNodeInfo(self.op.node_name)
3678     assert node is not None
3679
3680     instance_list = self.cfg.GetInstanceList()
3681
3682     masternode = self.cfg.GetMasterNode()
3683     if node.name == masternode:
3684       raise errors.OpPrereqError("Node is the master node,"
3685                                  " you need to failover first.",
3686                                  errors.ECODE_INVAL)
3687
3688     for instance_name in instance_list:
3689       instance = self.cfg.GetInstanceInfo(instance_name)
3690       if node.name in instance.all_nodes:
3691         raise errors.OpPrereqError("Instance %s is still running on the node,"
3692                                    " please remove first." % instance_name,
3693                                    errors.ECODE_INVAL)
3694     self.op.node_name = node.name
3695     self.node = node
3696
3697   def Exec(self, feedback_fn):
3698     """Removes the node from the cluster.
3699
3700     """
3701     node = self.node
3702     logging.info("Stopping the node daemon and removing configs from node %s",
3703                  node.name)
3704
3705     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3706
3707     # Promote nodes to master candidate as needed
3708     _AdjustCandidatePool(self, exceptions=[node.name])
3709     self.context.RemoveNode(node.name)
3710
3711     # Run post hooks on the node before it's removed
3712     _RunPostHook(self, node.name)
3713
3714     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3715     msg = result.fail_msg
3716     if msg:
3717       self.LogWarning("Errors encountered on the remote node while leaving"
3718                       " the cluster: %s", msg)
3719
3720     # Remove node from our /etc/hosts
3721     if self.cfg.GetClusterInfo().modify_etc_hosts:
3722       master_node = self.cfg.GetMasterNode()
3723       result = self.rpc.call_etc_hosts_modify(master_node,
3724                                               constants.ETC_HOSTS_REMOVE,
3725                                               node.name, None)
3726       result.Raise("Can't update hosts file with new host data")
3727       _RedistributeAncillaryFiles(self)
3728
3729
3730 class _NodeQuery(_QueryBase):
3731   FIELDS = query.NODE_FIELDS
3732
3733   def ExpandNames(self, lu):
3734     lu.needed_locks = {}
3735     lu.share_locks[locking.LEVEL_NODE] = 1
3736
3737     if self.names:
3738       self.wanted = _GetWantedNodes(lu, self.names)
3739     else:
3740       self.wanted = locking.ALL_SET
3741
3742     self.do_locking = (self.use_locking and
3743                        query.NQ_LIVE in self.requested_data)
3744
3745     if self.do_locking:
3746       # if we don't request only static fields, we need to lock the nodes
3747       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3748
3749   def DeclareLocks(self, lu, level):
3750     pass
3751
3752   def _GetQueryData(self, lu):
3753     """Computes the list of nodes and their attributes.
3754
3755     """
3756     all_info = lu.cfg.GetAllNodesInfo()
3757
3758     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3759
3760     # Gather data as requested
3761     if query.NQ_LIVE in self.requested_data:
3762       # filter out non-vm_capable nodes
3763       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3764
3765       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3766                                         lu.cfg.GetHypervisorType())
3767       live_data = dict((name, nresult.payload)
3768                        for (name, nresult) in node_data.items()
3769                        if not nresult.fail_msg and nresult.payload)
3770     else:
3771       live_data = None
3772
3773     if query.NQ_INST in self.requested_data:
3774       node_to_primary = dict([(name, set()) for name in nodenames])
3775       node_to_secondary = dict([(name, set()) for name in nodenames])
3776
3777       inst_data = lu.cfg.GetAllInstancesInfo()
3778
3779       for inst in inst_data.values():
3780         if inst.primary_node in node_to_primary:
3781           node_to_primary[inst.primary_node].add(inst.name)
3782         for secnode in inst.secondary_nodes:
3783           if secnode in node_to_secondary:
3784             node_to_secondary[secnode].add(inst.name)
3785     else:
3786       node_to_primary = None
3787       node_to_secondary = None
3788
3789     if query.NQ_OOB in self.requested_data:
3790       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3791                          for name, node in all_info.iteritems())
3792     else:
3793       oob_support = None
3794
3795     if query.NQ_GROUP in self.requested_data:
3796       groups = lu.cfg.GetAllNodeGroupsInfo()
3797     else:
3798       groups = {}
3799
3800     return query.NodeQueryData([all_info[name] for name in nodenames],
3801                                live_data, lu.cfg.GetMasterNode(),
3802                                node_to_primary, node_to_secondary, groups,
3803                                oob_support, lu.cfg.GetClusterInfo())
3804
3805
3806 class LUNodeQuery(NoHooksLU):
3807   """Logical unit for querying nodes.
3808
3809   """
3810   # pylint: disable-msg=W0142
3811   REQ_BGL = False
3812
3813   def CheckArguments(self):
3814     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3815                          self.op.output_fields, self.op.use_locking)
3816
3817   def ExpandNames(self):
3818     self.nq.ExpandNames(self)
3819
3820   def Exec(self, feedback_fn):
3821     return self.nq.OldStyleQuery(self)
3822
3823
3824 class LUNodeQueryvols(NoHooksLU):
3825   """Logical unit for getting volumes on node(s).
3826
3827   """
3828   REQ_BGL = False
3829   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3830   _FIELDS_STATIC = utils.FieldSet("node")
3831
3832   def CheckArguments(self):
3833     _CheckOutputFields(static=self._FIELDS_STATIC,
3834                        dynamic=self._FIELDS_DYNAMIC,
3835                        selected=self.op.output_fields)
3836
3837   def ExpandNames(self):
3838     self.needed_locks = {}
3839     self.share_locks[locking.LEVEL_NODE] = 1
3840     if not self.op.nodes:
3841       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3842     else:
3843       self.needed_locks[locking.LEVEL_NODE] = \
3844         _GetWantedNodes(self, self.op.nodes)
3845
3846   def Exec(self, feedback_fn):
3847     """Computes the list of nodes and their attributes.
3848
3849     """
3850     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3851     volumes = self.rpc.call_node_volumes(nodenames)
3852
3853     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3854              in self.cfg.GetInstanceList()]
3855
3856     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3857
3858     output = []
3859     for node in nodenames:
3860       nresult = volumes[node]
3861       if nresult.offline:
3862         continue
3863       msg = nresult.fail_msg
3864       if msg:
3865         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3866         continue
3867
3868       node_vols = nresult.payload[:]
3869       node_vols.sort(key=lambda vol: vol['dev'])
3870
3871       for vol in node_vols:
3872         node_output = []
3873         for field in self.op.output_fields:
3874           if field == "node":
3875             val = node
3876           elif field == "phys":
3877             val = vol['dev']
3878           elif field == "vg":
3879             val = vol['vg']
3880           elif field == "name":
3881             val = vol['name']
3882           elif field == "size":
3883             val = int(float(vol['size']))
3884           elif field == "instance":
3885             for inst in ilist:
3886               if node not in lv_by_node[inst]:
3887                 continue
3888               if vol['name'] in lv_by_node[inst][node]:
3889                 val = inst.name
3890                 break
3891             else:
3892               val = '-'
3893           else:
3894             raise errors.ParameterError(field)
3895           node_output.append(str(val))
3896
3897         output.append(node_output)
3898
3899     return output
3900
3901
3902 class LUNodeQueryStorage(NoHooksLU):
3903   """Logical unit for getting information on storage units on node(s).
3904
3905   """
3906   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3907   REQ_BGL = False
3908
3909   def CheckArguments(self):
3910     _CheckOutputFields(static=self._FIELDS_STATIC,
3911                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3912                        selected=self.op.output_fields)
3913
3914   def ExpandNames(self):
3915     self.needed_locks = {}
3916     self.share_locks[locking.LEVEL_NODE] = 1
3917
3918     if self.op.nodes:
3919       self.needed_locks[locking.LEVEL_NODE] = \
3920         _GetWantedNodes(self, self.op.nodes)
3921     else:
3922       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3923
3924   def Exec(self, feedback_fn):
3925     """Computes the list of nodes and their attributes.
3926
3927     """
3928     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3929
3930     # Always get name to sort by
3931     if constants.SF_NAME in self.op.output_fields:
3932       fields = self.op.output_fields[:]
3933     else:
3934       fields = [constants.SF_NAME] + self.op.output_fields
3935
3936     # Never ask for node or type as it's only known to the LU
3937     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3938       while extra in fields:
3939         fields.remove(extra)
3940
3941     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3942     name_idx = field_idx[constants.SF_NAME]
3943
3944     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3945     data = self.rpc.call_storage_list(self.nodes,
3946                                       self.op.storage_type, st_args,
3947                                       self.op.name, fields)
3948
3949     result = []
3950
3951     for node in utils.NiceSort(self.nodes):
3952       nresult = data[node]
3953       if nresult.offline:
3954         continue
3955
3956       msg = nresult.fail_msg
3957       if msg:
3958         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3959         continue
3960
3961       rows = dict([(row[name_idx], row) for row in nresult.payload])
3962
3963       for name in utils.NiceSort(rows.keys()):
3964         row = rows[name]
3965
3966         out = []
3967
3968         for field in self.op.output_fields:
3969           if field == constants.SF_NODE:
3970             val = node
3971           elif field == constants.SF_TYPE:
3972             val = self.op.storage_type
3973           elif field in field_idx:
3974             val = row[field_idx[field]]
3975           else:
3976             raise errors.ParameterError(field)
3977
3978           out.append(val)
3979
3980         result.append(out)
3981
3982     return result
3983
3984
3985 class _InstanceQuery(_QueryBase):
3986   FIELDS = query.INSTANCE_FIELDS
3987
3988   def ExpandNames(self, lu):
3989     lu.needed_locks = {}
3990     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3991     lu.share_locks[locking.LEVEL_NODE] = 1
3992
3993     if self.names:
3994       self.wanted = _GetWantedInstances(lu, self.names)
3995     else:
3996       self.wanted = locking.ALL_SET
3997
3998     self.do_locking = (self.use_locking and
3999                        query.IQ_LIVE in self.requested_data)
4000     if self.do_locking:
4001       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4002       lu.needed_locks[locking.LEVEL_NODE] = []
4003       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4004
4005   def DeclareLocks(self, lu, level):
4006     if level == locking.LEVEL_NODE and self.do_locking:
4007       lu._LockInstancesNodes() # pylint: disable-msg=W0212
4008
4009   def _GetQueryData(self, lu):
4010     """Computes the list of instances and their attributes.
4011
4012     """
4013     cluster = lu.cfg.GetClusterInfo()
4014     all_info = lu.cfg.GetAllInstancesInfo()
4015
4016     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4017
4018     instance_list = [all_info[name] for name in instance_names]
4019     nodes = frozenset(itertools.chain(*(inst.all_nodes
4020                                         for inst in instance_list)))
4021     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4022     bad_nodes = []
4023     offline_nodes = []
4024     wrongnode_inst = set()
4025
4026     # Gather data as requested
4027     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4028       live_data = {}
4029       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4030       for name in nodes:
4031         result = node_data[name]
4032         if result.offline:
4033           # offline nodes will be in both lists
4034           assert result.fail_msg
4035           offline_nodes.append(name)
4036         if result.fail_msg:
4037           bad_nodes.append(name)
4038         elif result.payload:
4039           for inst in result.payload:
4040             if all_info[inst].primary_node == name:
4041               live_data.update(result.payload)
4042             else:
4043               wrongnode_inst.add(inst)
4044         # else no instance is alive
4045     else:
4046       live_data = {}
4047
4048     if query.IQ_DISKUSAGE in self.requested_data:
4049       disk_usage = dict((inst.name,
4050                          _ComputeDiskSize(inst.disk_template,
4051                                           [{"size": disk.size}
4052                                            for disk in inst.disks]))
4053                         for inst in instance_list)
4054     else:
4055       disk_usage = None
4056
4057     if query.IQ_CONSOLE in self.requested_data:
4058       consinfo = {}
4059       for inst in instance_list:
4060         if inst.name in live_data:
4061           # Instance is running
4062           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4063         else:
4064           consinfo[inst.name] = None
4065       assert set(consinfo.keys()) == set(instance_names)
4066     else:
4067       consinfo = None
4068
4069     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4070                                    disk_usage, offline_nodes, bad_nodes,
4071                                    live_data, wrongnode_inst, consinfo)
4072
4073
4074 class LUQuery(NoHooksLU):
4075   """Query for resources/items of a certain kind.
4076
4077   """
4078   # pylint: disable-msg=W0142
4079   REQ_BGL = False
4080
4081   def CheckArguments(self):
4082     qcls = _GetQueryImplementation(self.op.what)
4083
4084     self.impl = qcls(self.op.filter, self.op.fields, False)
4085
4086   def ExpandNames(self):
4087     self.impl.ExpandNames(self)
4088
4089   def DeclareLocks(self, level):
4090     self.impl.DeclareLocks(self, level)
4091
4092   def Exec(self, feedback_fn):
4093     return self.impl.NewStyleQuery(self)
4094
4095
4096 class LUQueryFields(NoHooksLU):
4097   """Query for resources/items of a certain kind.
4098
4099   """
4100   # pylint: disable-msg=W0142
4101   REQ_BGL = False
4102
4103   def CheckArguments(self):
4104     self.qcls = _GetQueryImplementation(self.op.what)
4105
4106   def ExpandNames(self):
4107     self.needed_locks = {}
4108
4109   def Exec(self, feedback_fn):
4110     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4111
4112
4113 class LUNodeModifyStorage(NoHooksLU):
4114   """Logical unit for modifying a storage volume on a node.
4115
4116   """
4117   REQ_BGL = False
4118
4119   def CheckArguments(self):
4120     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4121
4122     storage_type = self.op.storage_type
4123
4124     try:
4125       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4126     except KeyError:
4127       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4128                                  " modified" % storage_type,
4129                                  errors.ECODE_INVAL)
4130
4131     diff = set(self.op.changes.keys()) - modifiable
4132     if diff:
4133       raise errors.OpPrereqError("The following fields can not be modified for"
4134                                  " storage units of type '%s': %r" %
4135                                  (storage_type, list(diff)),
4136                                  errors.ECODE_INVAL)
4137
4138   def ExpandNames(self):
4139     self.needed_locks = {
4140       locking.LEVEL_NODE: self.op.node_name,
4141       }
4142
4143   def Exec(self, feedback_fn):
4144     """Computes the list of nodes and their attributes.
4145
4146     """
4147     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4148     result = self.rpc.call_storage_modify(self.op.node_name,
4149                                           self.op.storage_type, st_args,
4150                                           self.op.name, self.op.changes)
4151     result.Raise("Failed to modify storage unit '%s' on %s" %
4152                  (self.op.name, self.op.node_name))
4153
4154
4155 class LUNodeAdd(LogicalUnit):
4156   """Logical unit for adding node to the cluster.
4157
4158   """
4159   HPATH = "node-add"
4160   HTYPE = constants.HTYPE_NODE
4161   _NFLAGS = ["master_capable", "vm_capable"]
4162
4163   def CheckArguments(self):
4164     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4165     # validate/normalize the node name
4166     self.hostname = netutils.GetHostname(name=self.op.node_name,
4167                                          family=self.primary_ip_family)
4168     self.op.node_name = self.hostname.name
4169     if self.op.readd and self.op.group:
4170       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4171                                  " being readded", errors.ECODE_INVAL)
4172
4173   def BuildHooksEnv(self):
4174     """Build hooks env.
4175
4176     This will run on all nodes before, and on all nodes + the new node after.
4177
4178     """
4179     return {
4180       "OP_TARGET": self.op.node_name,
4181       "NODE_NAME": self.op.node_name,
4182       "NODE_PIP": self.op.primary_ip,
4183       "NODE_SIP": self.op.secondary_ip,
4184       "MASTER_CAPABLE": str(self.op.master_capable),
4185       "VM_CAPABLE": str(self.op.vm_capable),
4186       }
4187
4188   def BuildHooksNodes(self):
4189     """Build hooks nodes.
4190
4191     """
4192     # Exclude added node
4193     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4194     post_nodes = pre_nodes + [self.op.node_name, ]
4195
4196     return (pre_nodes, post_nodes)
4197
4198   def CheckPrereq(self):
4199     """Check prerequisites.
4200
4201     This checks:
4202      - the new node is not already in the config
4203      - it is resolvable
4204      - its parameters (single/dual homed) matches the cluster
4205
4206     Any errors are signaled by raising errors.OpPrereqError.
4207
4208     """
4209     cfg = self.cfg
4210     hostname = self.hostname
4211     node = hostname.name
4212     primary_ip = self.op.primary_ip = hostname.ip
4213     if self.op.secondary_ip is None:
4214       if self.primary_ip_family == netutils.IP6Address.family:
4215         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4216                                    " IPv4 address must be given as secondary",
4217                                    errors.ECODE_INVAL)
4218       self.op.secondary_ip = primary_ip
4219
4220     secondary_ip = self.op.secondary_ip
4221     if not netutils.IP4Address.IsValid(secondary_ip):
4222       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4223                                  " address" % secondary_ip, errors.ECODE_INVAL)
4224
4225     node_list = cfg.GetNodeList()
4226     if not self.op.readd and node in node_list:
4227       raise errors.OpPrereqError("Node %s is already in the configuration" %
4228                                  node, errors.ECODE_EXISTS)
4229     elif self.op.readd and node not in node_list:
4230       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4231                                  errors.ECODE_NOENT)
4232
4233     self.changed_primary_ip = False
4234
4235     for existing_node_name in node_list:
4236       existing_node = cfg.GetNodeInfo(existing_node_name)
4237
4238       if self.op.readd and node == existing_node_name:
4239         if existing_node.secondary_ip != secondary_ip:
4240           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4241                                      " address configuration as before",
4242                                      errors.ECODE_INVAL)
4243         if existing_node.primary_ip != primary_ip:
4244           self.changed_primary_ip = True
4245
4246         continue
4247
4248       if (existing_node.primary_ip == primary_ip or
4249           existing_node.secondary_ip == primary_ip or
4250           existing_node.primary_ip == secondary_ip or
4251           existing_node.secondary_ip == secondary_ip):
4252         raise errors.OpPrereqError("New node ip address(es) conflict with"
4253                                    " existing node %s" % existing_node.name,
4254                                    errors.ECODE_NOTUNIQUE)
4255
4256     # After this 'if' block, None is no longer a valid value for the
4257     # _capable op attributes
4258     if self.op.readd:
4259       old_node = self.cfg.GetNodeInfo(node)
4260       assert old_node is not None, "Can't retrieve locked node %s" % node
4261       for attr in self._NFLAGS:
4262         if getattr(self.op, attr) is None:
4263           setattr(self.op, attr, getattr(old_node, attr))
4264     else:
4265       for attr in self._NFLAGS:
4266         if getattr(self.op, attr) is None:
4267           setattr(self.op, attr, True)
4268
4269     if self.op.readd and not self.op.vm_capable:
4270       pri, sec = cfg.GetNodeInstances(node)
4271       if pri or sec:
4272         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4273                                    " flag set to false, but it already holds"
4274                                    " instances" % node,
4275                                    errors.ECODE_STATE)
4276
4277     # check that the type of the node (single versus dual homed) is the
4278     # same as for the master
4279     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4280     master_singlehomed = myself.secondary_ip == myself.primary_ip
4281     newbie_singlehomed = secondary_ip == primary_ip
4282     if master_singlehomed != newbie_singlehomed:
4283       if master_singlehomed:
4284         raise errors.OpPrereqError("The master has no secondary ip but the"
4285                                    " new node has one",
4286                                    errors.ECODE_INVAL)
4287       else:
4288         raise errors.OpPrereqError("The master has a secondary ip but the"
4289                                    " new node doesn't have one",
4290                                    errors.ECODE_INVAL)
4291
4292     # checks reachability
4293     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4294       raise errors.OpPrereqError("Node not reachable by ping",
4295                                  errors.ECODE_ENVIRON)
4296
4297     if not newbie_singlehomed:
4298       # check reachability from my secondary ip to newbie's secondary ip
4299       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4300                            source=myself.secondary_ip):
4301         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4302                                    " based ping to node daemon port",
4303                                    errors.ECODE_ENVIRON)
4304
4305     if self.op.readd:
4306       exceptions = [node]
4307     else:
4308       exceptions = []
4309
4310     if self.op.master_capable:
4311       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4312     else:
4313       self.master_candidate = False
4314
4315     if self.op.readd:
4316       self.new_node = old_node
4317     else:
4318       node_group = cfg.LookupNodeGroup(self.op.group)
4319       self.new_node = objects.Node(name=node,
4320                                    primary_ip=primary_ip,
4321                                    secondary_ip=secondary_ip,
4322                                    master_candidate=self.master_candidate,
4323                                    offline=False, drained=False,
4324                                    group=node_group)
4325
4326     if self.op.ndparams:
4327       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4328
4329   def Exec(self, feedback_fn):
4330     """Adds the new node to the cluster.
4331
4332     """
4333     new_node = self.new_node
4334     node = new_node.name
4335
4336     # We adding a new node so we assume it's powered
4337     new_node.powered = True
4338
4339     # for re-adds, reset the offline/drained/master-candidate flags;
4340     # we need to reset here, otherwise offline would prevent RPC calls
4341     # later in the procedure; this also means that if the re-add
4342     # fails, we are left with a non-offlined, broken node
4343     if self.op.readd:
4344       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4345       self.LogInfo("Readding a node, the offline/drained flags were reset")
4346       # if we demote the node, we do cleanup later in the procedure
4347       new_node.master_candidate = self.master_candidate
4348       if self.changed_primary_ip:
4349         new_node.primary_ip = self.op.primary_ip
4350
4351     # copy the master/vm_capable flags
4352     for attr in self._NFLAGS:
4353       setattr(new_node, attr, getattr(self.op, attr))
4354
4355     # notify the user about any possible mc promotion
4356     if new_node.master_candidate:
4357       self.LogInfo("Node will be a master candidate")
4358
4359     if self.op.ndparams:
4360       new_node.ndparams = self.op.ndparams
4361     else:
4362       new_node.ndparams = {}
4363
4364     # check connectivity
4365     result = self.rpc.call_version([node])[node]
4366     result.Raise("Can't get version information from node %s" % node)
4367     if constants.PROTOCOL_VERSION == result.payload:
4368       logging.info("Communication to node %s fine, sw version %s match",
4369                    node, result.payload)
4370     else:
4371       raise errors.OpExecError("Version mismatch master version %s,"
4372                                " node version %s" %
4373                                (constants.PROTOCOL_VERSION, result.payload))
4374
4375     # Add node to our /etc/hosts, and add key to known_hosts
4376     if self.cfg.GetClusterInfo().modify_etc_hosts:
4377       master_node = self.cfg.GetMasterNode()
4378       result = self.rpc.call_etc_hosts_modify(master_node,
4379                                               constants.ETC_HOSTS_ADD,
4380                                               self.hostname.name,
4381                                               self.hostname.ip)
4382       result.Raise("Can't update hosts file with new host data")
4383
4384     if new_node.secondary_ip != new_node.primary_ip:
4385       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4386                                False)
4387
4388     node_verify_list = [self.cfg.GetMasterNode()]
4389     node_verify_param = {
4390       constants.NV_NODELIST: [node],
4391       # TODO: do a node-net-test as well?
4392     }
4393
4394     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4395                                        self.cfg.GetClusterName())
4396     for verifier in node_verify_list:
4397       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4398       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4399       if nl_payload:
4400         for failed in nl_payload:
4401           feedback_fn("ssh/hostname verification failed"
4402                       " (checking from %s): %s" %
4403                       (verifier, nl_payload[failed]))
4404         raise errors.OpExecError("ssh/hostname verification failed.")
4405
4406     if self.op.readd:
4407       _RedistributeAncillaryFiles(self)
4408       self.context.ReaddNode(new_node)
4409       # make sure we redistribute the config
4410       self.cfg.Update(new_node, feedback_fn)
4411       # and make sure the new node will not have old files around
4412       if not new_node.master_candidate:
4413         result = self.rpc.call_node_demote_from_mc(new_node.name)
4414         msg = result.fail_msg
4415         if msg:
4416           self.LogWarning("Node failed to demote itself from master"
4417                           " candidate status: %s" % msg)
4418     else:
4419       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4420                                   additional_vm=self.op.vm_capable)
4421       self.context.AddNode(new_node, self.proc.GetECId())
4422
4423
4424 class LUNodeSetParams(LogicalUnit):
4425   """Modifies the parameters of a node.
4426
4427   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4428       to the node role (as _ROLE_*)
4429   @cvar _R2F: a dictionary from node role to tuples of flags
4430   @cvar _FLAGS: a list of attribute names corresponding to the flags
4431
4432   """
4433   HPATH = "node-modify"
4434   HTYPE = constants.HTYPE_NODE
4435   REQ_BGL = False
4436   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4437   _F2R = {
4438     (True, False, False): _ROLE_CANDIDATE,
4439     (False, True, False): _ROLE_DRAINED,
4440     (False, False, True): _ROLE_OFFLINE,
4441     (False, False, False): _ROLE_REGULAR,
4442     }
4443   _R2F = dict((v, k) for k, v in _F2R.items())
4444   _FLAGS = ["master_candidate", "drained", "offline"]
4445
4446   def CheckArguments(self):
4447     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4448     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4449                 self.op.master_capable, self.op.vm_capable,
4450                 self.op.secondary_ip, self.op.ndparams]
4451     if all_mods.count(None) == len(all_mods):
4452       raise errors.OpPrereqError("Please pass at least one modification",
4453                                  errors.ECODE_INVAL)
4454     if all_mods.count(True) > 1:
4455       raise errors.OpPrereqError("Can't set the node into more than one"
4456                                  " state at the same time",
4457                                  errors.ECODE_INVAL)
4458
4459     # Boolean value that tells us whether we might be demoting from MC
4460     self.might_demote = (self.op.master_candidate == False or
4461                          self.op.offline == True or
4462                          self.op.drained == True or
4463                          self.op.master_capable == False)
4464
4465     if self.op.secondary_ip:
4466       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4467         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4468                                    " address" % self.op.secondary_ip,
4469                                    errors.ECODE_INVAL)
4470
4471     self.lock_all = self.op.auto_promote and self.might_demote
4472     self.lock_instances = self.op.secondary_ip is not None
4473
4474   def ExpandNames(self):
4475     if self.lock_all:
4476       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4477     else:
4478       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4479
4480     if self.lock_instances:
4481       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4482
4483   def DeclareLocks(self, level):
4484     # If we have locked all instances, before waiting to lock nodes, release
4485     # all the ones living on nodes unrelated to the current operation.
4486     if level == locking.LEVEL_NODE and self.lock_instances:
4487       instances_release = []
4488       instances_keep = []
4489       self.affected_instances = []
4490       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4491         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4492           instance = self.context.cfg.GetInstanceInfo(instance_name)
4493           i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4494           if i_mirrored and self.op.node_name in instance.all_nodes:
4495             instances_keep.append(instance_name)
4496             self.affected_instances.append(instance)
4497           else:
4498             instances_release.append(instance_name)
4499         if instances_release:
4500           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4501           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4502
4503   def BuildHooksEnv(self):
4504     """Build hooks env.
4505
4506     This runs on the master node.
4507
4508     """
4509     return {
4510       "OP_TARGET": self.op.node_name,
4511       "MASTER_CANDIDATE": str(self.op.master_candidate),
4512       "OFFLINE": str(self.op.offline),
4513       "DRAINED": str(self.op.drained),
4514       "MASTER_CAPABLE": str(self.op.master_capable),
4515       "VM_CAPABLE": str(self.op.vm_capable),
4516       }
4517
4518   def BuildHooksNodes(self):
4519     """Build hooks nodes.
4520
4521     """
4522     nl = [self.cfg.GetMasterNode(), self.op.node_name]
4523     return (nl, nl)
4524
4525   def CheckPrereq(self):
4526     """Check prerequisites.
4527
4528     This only checks the instance list against the existing names.
4529
4530     """
4531     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4532
4533     if (self.op.master_candidate is not None or
4534         self.op.drained is not None or
4535         self.op.offline is not None):
4536       # we can't change the master's node flags
4537       if self.op.node_name == self.cfg.GetMasterNode():
4538         raise errors.OpPrereqError("The master role can be changed"
4539                                    " only via master-failover",
4540                                    errors.ECODE_INVAL)
4541
4542     if self.op.master_candidate and not node.master_capable:
4543       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4544                                  " it a master candidate" % node.name,
4545                                  errors.ECODE_STATE)
4546
4547     if self.op.vm_capable == False:
4548       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4549       if ipri or isec:
4550         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4551                                    " the vm_capable flag" % node.name,
4552                                    errors.ECODE_STATE)
4553
4554     if node.master_candidate and self.might_demote and not self.lock_all:
4555       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4556       # check if after removing the current node, we're missing master
4557       # candidates
4558       (mc_remaining, mc_should, _) = \
4559           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4560       if mc_remaining < mc_should:
4561         raise errors.OpPrereqError("Not enough master candidates, please"
4562                                    " pass auto promote option to allow"
4563                                    " promotion", errors.ECODE_STATE)
4564
4565     self.old_flags = old_flags = (node.master_candidate,
4566                                   node.drained, node.offline)
4567     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4568     self.old_role = old_role = self._F2R[old_flags]
4569
4570     # Check for ineffective changes
4571     for attr in self._FLAGS:
4572       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4573         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4574         setattr(self.op, attr, None)
4575
4576     # Past this point, any flag change to False means a transition
4577     # away from the respective state, as only real changes are kept
4578
4579     # TODO: We might query the real power state if it supports OOB
4580     if _SupportsOob(self.cfg, node):
4581       if self.op.offline is False and not (node.powered or
4582                                            self.op.powered == True):
4583         raise errors.OpPrereqError(("Please power on node %s first before you"
4584                                     " can reset offline state") %
4585                                    self.op.node_name)
4586     elif self.op.powered is not None:
4587       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4588                                   " which does not support out-of-band"
4589                                   " handling") % self.op.node_name)
4590
4591     # If we're being deofflined/drained, we'll MC ourself if needed
4592     if (self.op.drained == False or self.op.offline == False or
4593         (self.op.master_capable and not node.master_capable)):
4594       if _DecideSelfPromotion(self):
4595         self.op.master_candidate = True
4596         self.LogInfo("Auto-promoting node to master candidate")
4597
4598     # If we're no longer master capable, we'll demote ourselves from MC
4599     if self.op.master_capable == False and node.master_candidate:
4600       self.LogInfo("Demoting from master candidate")
4601       self.op.master_candidate = False
4602
4603     # Compute new role
4604     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4605     if self.op.master_candidate:
4606       new_role = self._ROLE_CANDIDATE
4607     elif self.op.drained:
4608       new_role = self._ROLE_DRAINED
4609     elif self.op.offline:
4610       new_role = self._ROLE_OFFLINE
4611     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4612       # False is still in new flags, which means we're un-setting (the
4613       # only) True flag
4614       new_role = self._ROLE_REGULAR
4615     else: # no new flags, nothing, keep old role
4616       new_role = old_role
4617
4618     self.new_role = new_role
4619
4620     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4621       # Trying to transition out of offline status
4622       result = self.rpc.call_version([node.name])[node.name]
4623       if result.fail_msg:
4624         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4625                                    " to report its version: %s" %
4626                                    (node.name, result.fail_msg),
4627                                    errors.ECODE_STATE)
4628       else:
4629         self.LogWarning("Transitioning node from offline to online state"
4630                         " without using re-add. Please make sure the node"
4631                         " is healthy!")
4632
4633     if self.op.secondary_ip:
4634       # Ok even without locking, because this can't be changed by any LU
4635       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4636       master_singlehomed = master.secondary_ip == master.primary_ip
4637       if master_singlehomed and self.op.secondary_ip:
4638         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4639                                    " homed cluster", errors.ECODE_INVAL)
4640
4641       if node.offline:
4642         if self.affected_instances:
4643           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4644                                      " node has instances (%s) configured"
4645                                      " to use it" % self.affected_instances)
4646       else:
4647         # On online nodes, check that no instances are running, and that
4648         # the node has the new ip and we can reach it.
4649         for instance in self.affected_instances:
4650           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4651
4652         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4653         if master.name != node.name:
4654           # check reachability from master secondary ip to new secondary ip
4655           if not netutils.TcpPing(self.op.secondary_ip,
4656                                   constants.DEFAULT_NODED_PORT,
4657                                   source=master.secondary_ip):
4658             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4659                                        " based ping to node daemon port",
4660                                        errors.ECODE_ENVIRON)
4661
4662     if self.op.ndparams:
4663       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4664       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4665       self.new_ndparams = new_ndparams
4666
4667   def Exec(self, feedback_fn):
4668     """Modifies a node.
4669
4670     """
4671     node = self.node
4672     old_role = self.old_role
4673     new_role = self.new_role
4674
4675     result = []
4676
4677     if self.op.ndparams:
4678       node.ndparams = self.new_ndparams
4679
4680     if self.op.powered is not None:
4681       node.powered = self.op.powered
4682
4683     for attr in ["master_capable", "vm_capable"]:
4684       val = getattr(self.op, attr)
4685       if val is not None:
4686         setattr(node, attr, val)
4687         result.append((attr, str(val)))
4688
4689     if new_role != old_role:
4690       # Tell the node to demote itself, if no longer MC and not offline
4691       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4692         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4693         if msg:
4694           self.LogWarning("Node failed to demote itself: %s", msg)
4695
4696       new_flags = self._R2F[new_role]
4697       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4698         if of != nf:
4699           result.append((desc, str(nf)))
4700       (node.master_candidate, node.drained, node.offline) = new_flags
4701
4702       # we locked all nodes, we adjust the CP before updating this node
4703       if self.lock_all:
4704         _AdjustCandidatePool(self, [node.name])
4705
4706     if self.op.secondary_ip:
4707       node.secondary_ip = self.op.secondary_ip
4708       result.append(("secondary_ip", self.op.secondary_ip))
4709
4710     # this will trigger configuration file update, if needed
4711     self.cfg.Update(node, feedback_fn)
4712
4713     # this will trigger job queue propagation or cleanup if the mc
4714     # flag changed
4715     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4716       self.context.ReaddNode(node)
4717
4718     return result
4719
4720
4721 class LUNodePowercycle(NoHooksLU):
4722   """Powercycles a node.
4723
4724   """
4725   REQ_BGL = False
4726
4727   def CheckArguments(self):
4728     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4729     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4730       raise errors.OpPrereqError("The node is the master and the force"
4731                                  " parameter was not set",
4732                                  errors.ECODE_INVAL)
4733
4734   def ExpandNames(self):
4735     """Locking for PowercycleNode.
4736
4737     This is a last-resort option and shouldn't block on other
4738     jobs. Therefore, we grab no locks.
4739
4740     """
4741     self.needed_locks = {}
4742
4743   def Exec(self, feedback_fn):
4744     """Reboots a node.
4745
4746     """
4747     result = self.rpc.call_node_powercycle(self.op.node_name,
4748                                            self.cfg.GetHypervisorType())
4749     result.Raise("Failed to schedule the reboot")
4750     return result.payload
4751
4752
4753 class LUClusterQuery(NoHooksLU):
4754   """Query cluster configuration.
4755
4756   """
4757   REQ_BGL = False
4758
4759   def ExpandNames(self):
4760     self.needed_locks = {}
4761
4762   def Exec(self, feedback_fn):
4763     """Return cluster config.
4764
4765     """
4766     cluster = self.cfg.GetClusterInfo()
4767     os_hvp = {}
4768
4769     # Filter just for enabled hypervisors
4770     for os_name, hv_dict in cluster.os_hvp.items():
4771       os_hvp[os_name] = {}
4772       for hv_name, hv_params in hv_dict.items():
4773         if hv_name in cluster.enabled_hypervisors:
4774           os_hvp[os_name][hv_name] = hv_params
4775
4776     # Convert ip_family to ip_version
4777     primary_ip_version = constants.IP4_VERSION
4778     if cluster.primary_ip_family == netutils.IP6Address.family:
4779       primary_ip_version = constants.IP6_VERSION
4780
4781     result = {
4782       "software_version": constants.RELEASE_VERSION,
4783       "protocol_version": constants.PROTOCOL_VERSION,
4784       "config_version": constants.CONFIG_VERSION,
4785       "os_api_version": max(constants.OS_API_VERSIONS),
4786       "export_version": constants.EXPORT_VERSION,
4787       "architecture": (platform.architecture()[0], platform.machine()),
4788       "name": cluster.cluster_name,
4789       "master": cluster.master_node,
4790       "default_hypervisor": cluster.enabled_hypervisors[0],
4791       "enabled_hypervisors": cluster.enabled_hypervisors,
4792       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4793                         for hypervisor_name in cluster.enabled_hypervisors]),
4794       "os_hvp": os_hvp,
4795       "beparams": cluster.beparams,
4796       "osparams": cluster.osparams,
4797       "nicparams": cluster.nicparams,
4798       "ndparams": cluster.ndparams,
4799       "candidate_pool_size": cluster.candidate_pool_size,
4800       "master_netdev": cluster.master_netdev,
4801       "volume_group_name": cluster.volume_group_name,
4802       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4803       "file_storage_dir": cluster.file_storage_dir,
4804       "shared_file_storage_dir": cluster.shared_file_storage_dir,
4805       "maintain_node_health": cluster.maintain_node_health,
4806       "ctime": cluster.ctime,
4807       "mtime": cluster.mtime,
4808       "uuid": cluster.uuid,
4809       "tags": list(cluster.GetTags()),
4810       "uid_pool": cluster.uid_pool,
4811       "default_iallocator": cluster.default_iallocator,
4812       "reserved_lvs": cluster.reserved_lvs,
4813       "primary_ip_version": primary_ip_version,
4814       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4815       "hidden_os": cluster.hidden_os,
4816       "blacklisted_os": cluster.blacklisted_os,
4817       }
4818
4819     return result
4820
4821
4822 class LUClusterConfigQuery(NoHooksLU):
4823   """Return configuration values.
4824
4825   """
4826   REQ_BGL = False
4827   _FIELDS_DYNAMIC = utils.FieldSet()
4828   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4829                                   "watcher_pause", "volume_group_name")
4830
4831   def CheckArguments(self):
4832     _CheckOutputFields(static=self._FIELDS_STATIC,
4833                        dynamic=self._FIELDS_DYNAMIC,
4834                        selected=self.op.output_fields)
4835
4836   def ExpandNames(self):
4837     self.needed_locks = {}
4838
4839   def Exec(self, feedback_fn):
4840     """Dump a representation of the cluster config to the standard output.
4841
4842     """
4843     values = []
4844     for field in self.op.output_fields:
4845       if field == "cluster_name":
4846         entry = self.cfg.GetClusterName()
4847       elif field == "master_node":
4848         entry = self.cfg.GetMasterNode()
4849       elif field == "drain_flag":
4850         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4851       elif field == "watcher_pause":
4852         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4853       elif field == "volume_group_name":
4854         entry = self.cfg.GetVGName()
4855       else:
4856         raise errors.ParameterError(field)
4857       values.append(entry)
4858     return values
4859
4860
4861 class LUInstanceActivateDisks(NoHooksLU):
4862   """Bring up an instance's disks.
4863
4864   """
4865   REQ_BGL = False
4866
4867   def ExpandNames(self):
4868     self._ExpandAndLockInstance()
4869     self.needed_locks[locking.LEVEL_NODE] = []
4870     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4871
4872   def DeclareLocks(self, level):
4873     if level == locking.LEVEL_NODE:
4874       self._LockInstancesNodes()
4875
4876   def CheckPrereq(self):
4877     """Check prerequisites.
4878
4879     This checks that the instance is in the cluster.
4880
4881     """
4882     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4883     assert self.instance is not None, \
4884       "Cannot retrieve locked instance %s" % self.op.instance_name
4885     _CheckNodeOnline(self, self.instance.primary_node)
4886
4887   def Exec(self, feedback_fn):
4888     """Activate the disks.
4889
4890     """
4891     disks_ok, disks_info = \
4892               _AssembleInstanceDisks(self, self.instance,
4893                                      ignore_size=self.op.ignore_size)
4894     if not disks_ok:
4895       raise errors.OpExecError("Cannot activate block devices")
4896
4897     return disks_info
4898
4899
4900 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4901                            ignore_size=False):
4902   """Prepare the block devices for an instance.
4903
4904   This sets up the block devices on all nodes.
4905
4906   @type lu: L{LogicalUnit}
4907   @param lu: the logical unit on whose behalf we execute
4908   @type instance: L{objects.Instance}
4909   @param instance: the instance for whose disks we assemble
4910   @type disks: list of L{objects.Disk} or None
4911   @param disks: which disks to assemble (or all, if None)
4912   @type ignore_secondaries: boolean
4913   @param ignore_secondaries: if true, errors on secondary nodes
4914       won't result in an error return from the function
4915   @type ignore_size: boolean
4916   @param ignore_size: if true, the current known size of the disk
4917       will not be used during the disk activation, useful for cases
4918       when the size is wrong
4919   @return: False if the operation failed, otherwise a list of
4920       (host, instance_visible_name, node_visible_name)
4921       with the mapping from node devices to instance devices
4922
4923   """
4924   device_info = []
4925   disks_ok = True
4926   iname = instance.name
4927   disks = _ExpandCheckDisks(instance, disks)
4928
4929   # With the two passes mechanism we try to reduce the window of
4930   # opportunity for the race condition of switching DRBD to primary
4931   # before handshaking occured, but we do not eliminate it
4932
4933   # The proper fix would be to wait (with some limits) until the
4934   # connection has been made and drbd transitions from WFConnection
4935   # into any other network-connected state (Connected, SyncTarget,
4936   # SyncSource, etc.)
4937
4938   # 1st pass, assemble on all nodes in secondary mode
4939   for idx, inst_disk in enumerate(disks):
4940     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4941       if ignore_size:
4942         node_disk = node_disk.Copy()
4943         node_disk.UnsetSize()
4944       lu.cfg.SetDiskID(node_disk, node)
4945       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4946       msg = result.fail_msg
4947       if msg:
4948         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4949                            " (is_primary=False, pass=1): %s",
4950                            inst_disk.iv_name, node, msg)
4951         if not ignore_secondaries:
4952           disks_ok = False
4953
4954   # FIXME: race condition on drbd migration to primary
4955
4956   # 2nd pass, do only the primary node
4957   for idx, inst_disk in enumerate(disks):
4958     dev_path = None
4959
4960     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4961       if node != instance.primary_node:
4962         continue
4963       if ignore_size:
4964         node_disk = node_disk.Copy()
4965         node_disk.UnsetSize()
4966       lu.cfg.SetDiskID(node_disk, node)
4967       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4968       msg = result.fail_msg
4969       if msg:
4970         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4971                            " (is_primary=True, pass=2): %s",
4972                            inst_disk.iv_name, node, msg)
4973         disks_ok = False
4974       else:
4975         dev_path = result.payload
4976
4977     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4978
4979   # leave the disks configured for the primary node
4980   # this is a workaround that would be fixed better by
4981   # improving the logical/physical id handling
4982   for disk in disks:
4983     lu.cfg.SetDiskID(disk, instance.primary_node)
4984
4985   return disks_ok, device_info
4986
4987
4988 def _StartInstanceDisks(lu, instance, force):
4989   """Start the disks of an instance.
4990
4991   """
4992   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4993                                            ignore_secondaries=force)
4994   if not disks_ok:
4995     _ShutdownInstanceDisks(lu, instance)
4996     if force is not None and not force:
4997       lu.proc.LogWarning("", hint="If the message above refers to a"
4998                          " secondary node,"
4999                          " you can retry the operation using '--force'.")
5000     raise errors.OpExecError("Disk consistency error")
5001
5002
5003 class LUInstanceDeactivateDisks(NoHooksLU):
5004   """Shutdown an instance's disks.
5005
5006   """
5007   REQ_BGL = False
5008
5009   def ExpandNames(self):
5010     self._ExpandAndLockInstance()
5011     self.needed_locks[locking.LEVEL_NODE] = []
5012     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5013
5014   def DeclareLocks(self, level):
5015     if level == locking.LEVEL_NODE:
5016       self._LockInstancesNodes()
5017
5018   def CheckPrereq(self):
5019     """Check prerequisites.
5020
5021     This checks that the instance is in the cluster.
5022
5023     """
5024     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5025     assert self.instance is not None, \
5026       "Cannot retrieve locked instance %s" % self.op.instance_name
5027
5028   def Exec(self, feedback_fn):
5029     """Deactivate the disks
5030
5031     """
5032     instance = self.instance
5033     if self.op.force:
5034       _ShutdownInstanceDisks(self, instance)
5035     else:
5036       _SafeShutdownInstanceDisks(self, instance)
5037
5038
5039 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5040   """Shutdown block devices of an instance.
5041
5042   This function checks if an instance is running, before calling
5043   _ShutdownInstanceDisks.
5044
5045   """
5046   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5047   _ShutdownInstanceDisks(lu, instance, disks=disks)
5048
5049
5050 def _ExpandCheckDisks(instance, disks):
5051   """Return the instance disks selected by the disks list
5052
5053   @type disks: list of L{objects.Disk} or None
5054   @param disks: selected disks
5055   @rtype: list of L{objects.Disk}
5056   @return: selected instance disks to act on
5057
5058   """
5059   if disks is None:
5060     return instance.disks
5061   else:
5062     if not set(disks).issubset(instance.disks):
5063       raise errors.ProgrammerError("Can only act on disks belonging to the"
5064                                    " target instance")
5065     return disks
5066
5067
5068 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5069   """Shutdown block devices of an instance.
5070
5071   This does the shutdown on all nodes of the instance.
5072
5073   If the ignore_primary is false, errors on the primary node are
5074   ignored.
5075
5076   """
5077   all_result = True
5078   disks = _ExpandCheckDisks(instance, disks)
5079
5080   for disk in disks:
5081     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5082       lu.cfg.SetDiskID(top_disk, node)
5083       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5084       msg = result.fail_msg
5085       if msg:
5086         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5087                       disk.iv_name, node, msg)
5088         if ((node == instance.primary_node and not ignore_primary) or
5089             (node != instance.primary_node and not result.offline)):
5090           all_result = False
5091   return all_result
5092
5093
5094 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5095   """Checks if a node has enough free memory.
5096
5097   This function check if a given node has the needed amount of free
5098   memory. In case the node has less memory or we cannot get the
5099   information from the node, this function raise an OpPrereqError
5100   exception.
5101
5102   @type lu: C{LogicalUnit}
5103   @param lu: a logical unit from which we get configuration data
5104   @type node: C{str}
5105   @param node: the node to check
5106   @type reason: C{str}
5107   @param reason: string to use in the error message
5108   @type requested: C{int}
5109   @param requested: the amount of memory in MiB to check for
5110   @type hypervisor_name: C{str}
5111   @param hypervisor_name: the hypervisor to ask for memory stats
5112   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5113       we cannot check the node
5114
5115   """
5116   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5117   nodeinfo[node].Raise("Can't get data from node %s" % node,
5118                        prereq=True, ecode=errors.ECODE_ENVIRON)
5119   free_mem = nodeinfo[node].payload.get('memory_free', None)
5120   if not isinstance(free_mem, int):
5121     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5122                                " was '%s'" % (node, free_mem),
5123                                errors.ECODE_ENVIRON)
5124   if requested > free_mem:
5125     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5126                                " needed %s MiB, available %s MiB" %
5127                                (node, reason, requested, free_mem),
5128                                errors.ECODE_NORES)
5129
5130
5131 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5132   """Checks if nodes have enough free disk space in the all VGs.
5133
5134   This function check if all given nodes have the needed amount of
5135   free disk. In case any node has less disk or we cannot get the
5136   information from the node, this function raise an OpPrereqError
5137   exception.
5138
5139   @type lu: C{LogicalUnit}
5140   @param lu: a logical unit from which we get configuration data
5141   @type nodenames: C{list}
5142   @param nodenames: the list of node names to check
5143   @type req_sizes: C{dict}
5144   @param req_sizes: the hash of vg and corresponding amount of disk in
5145       MiB to check for
5146   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5147       or we cannot check the node
5148
5149   """
5150   for vg, req_size in req_sizes.items():
5151     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5152
5153
5154 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5155   """Checks if nodes have enough free disk space in the specified VG.
5156
5157   This function check if all given nodes have the needed amount of
5158   free disk. In case any node has less disk or we cannot get the
5159   information from the node, this function raise an OpPrereqError
5160   exception.
5161
5162   @type lu: C{LogicalUnit}
5163   @param lu: a logical unit from which we get configuration data
5164   @type nodenames: C{list}
5165   @param nodenames: the list of node names to check
5166   @type vg: C{str}
5167   @param vg: the volume group to check
5168   @type requested: C{int}
5169   @param requested: the amount of disk in MiB to check for
5170   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5171       or we cannot check the node
5172
5173   """
5174   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5175   for node in nodenames:
5176     info = nodeinfo[node]
5177     info.Raise("Cannot get current information from node %s" % node,
5178                prereq=True, ecode=errors.ECODE_ENVIRON)
5179     vg_free = info.payload.get("vg_free", None)
5180     if not isinstance(vg_free, int):
5181       raise errors.OpPrereqError("Can't compute free disk space on node"
5182                                  " %s for vg %s, result was '%s'" %
5183                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5184     if requested > vg_free:
5185       raise errors.OpPrereqError("Not enough disk space on target node %s"
5186                                  " vg %s: required %d MiB, available %d MiB" %
5187                                  (node, vg, requested, vg_free),
5188                                  errors.ECODE_NORES)
5189
5190
5191 class LUInstanceStartup(LogicalUnit):
5192   """Starts an instance.
5193
5194   """
5195   HPATH = "instance-start"
5196   HTYPE = constants.HTYPE_INSTANCE
5197   REQ_BGL = False
5198
5199   def CheckArguments(self):
5200     # extra beparams
5201     if self.op.beparams:
5202       # fill the beparams dict
5203       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5204
5205   def ExpandNames(self):
5206     self._ExpandAndLockInstance()
5207
5208   def BuildHooksEnv(self):
5209     """Build hooks env.
5210
5211     This runs on master, primary and secondary nodes of the instance.
5212
5213     """
5214     env = {
5215       "FORCE": self.op.force,
5216       }
5217
5218     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5219
5220     return env
5221
5222   def BuildHooksNodes(self):
5223     """Build hooks nodes.
5224
5225     """
5226     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5227     return (nl, nl)
5228
5229   def CheckPrereq(self):
5230     """Check prerequisites.
5231
5232     This checks that the instance is in the cluster.
5233
5234     """
5235     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5236     assert self.instance is not None, \
5237       "Cannot retrieve locked instance %s" % self.op.instance_name
5238
5239     # extra hvparams
5240     if self.op.hvparams:
5241       # check hypervisor parameter syntax (locally)
5242       cluster = self.cfg.GetClusterInfo()
5243       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5244       filled_hvp = cluster.FillHV(instance)
5245       filled_hvp.update(self.op.hvparams)
5246       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5247       hv_type.CheckParameterSyntax(filled_hvp)
5248       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5249
5250     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5251
5252     if self.primary_offline and self.op.ignore_offline_nodes:
5253       self.proc.LogWarning("Ignoring offline primary node")
5254
5255       if self.op.hvparams or self.op.beparams:
5256         self.proc.LogWarning("Overridden parameters are ignored")
5257     else:
5258       _CheckNodeOnline(self, instance.primary_node)
5259
5260       bep = self.cfg.GetClusterInfo().FillBE(instance)
5261
5262       # check bridges existence
5263       _CheckInstanceBridgesExist(self, instance)
5264
5265       remote_info = self.rpc.call_instance_info(instance.primary_node,
5266                                                 instance.name,
5267                                                 instance.hypervisor)
5268       remote_info.Raise("Error checking node %s" % instance.primary_node,
5269                         prereq=True, ecode=errors.ECODE_ENVIRON)
5270       if not remote_info.payload: # not running already
5271         _CheckNodeFreeMemory(self, instance.primary_node,
5272                              "starting instance %s" % instance.name,
5273                              bep[constants.BE_MEMORY], instance.hypervisor)
5274
5275   def Exec(self, feedback_fn):
5276     """Start the instance.
5277
5278     """
5279     instance = self.instance
5280     force = self.op.force
5281
5282     self.cfg.MarkInstanceUp(instance.name)
5283
5284     if self.primary_offline:
5285       assert self.op.ignore_offline_nodes
5286       self.proc.LogInfo("Primary node offline, marked instance as started")
5287     else:
5288       node_current = instance.primary_node
5289
5290       _StartInstanceDisks(self, instance, force)
5291
5292       result = self.rpc.call_instance_start(node_current, instance,
5293                                             self.op.hvparams, self.op.beparams)
5294       msg = result.fail_msg
5295       if msg:
5296         _ShutdownInstanceDisks(self, instance)
5297         raise errors.OpExecError("Could not start instance: %s" % msg)
5298
5299
5300 class LUInstanceReboot(LogicalUnit):
5301   """Reboot an instance.
5302
5303   """
5304   HPATH = "instance-reboot"
5305   HTYPE = constants.HTYPE_INSTANCE
5306   REQ_BGL = False
5307
5308   def ExpandNames(self):
5309     self._ExpandAndLockInstance()
5310
5311   def BuildHooksEnv(self):
5312     """Build hooks env.
5313
5314     This runs on master, primary and secondary nodes of the instance.
5315
5316     """
5317     env = {
5318       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5319       "REBOOT_TYPE": self.op.reboot_type,
5320       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5321       }
5322
5323     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5324
5325     return env
5326
5327   def BuildHooksNodes(self):
5328     """Build hooks nodes.
5329
5330     """
5331     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5332     return (nl, nl)
5333
5334   def CheckPrereq(self):
5335     """Check prerequisites.
5336
5337     This checks that the instance is in the cluster.
5338
5339     """
5340     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5341     assert self.instance is not None, \
5342       "Cannot retrieve locked instance %s" % self.op.instance_name
5343
5344     _CheckNodeOnline(self, instance.primary_node)
5345
5346     # check bridges existence
5347     _CheckInstanceBridgesExist(self, instance)
5348
5349   def Exec(self, feedback_fn):
5350     """Reboot the instance.
5351
5352     """
5353     instance = self.instance
5354     ignore_secondaries = self.op.ignore_secondaries
5355     reboot_type = self.op.reboot_type
5356
5357     remote_info = self.rpc.call_instance_info(instance.primary_node,
5358                                               instance.name,
5359                                               instance.hypervisor)
5360     remote_info.Raise("Error checking node %s" % instance.primary_node)
5361     instance_running = bool(remote_info.payload)
5362
5363     node_current = instance.primary_node
5364
5365     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5366                                             constants.INSTANCE_REBOOT_HARD]:
5367       for disk in instance.disks:
5368         self.cfg.SetDiskID(disk, node_current)
5369       result = self.rpc.call_instance_reboot(node_current, instance,
5370                                              reboot_type,
5371                                              self.op.shutdown_timeout)
5372       result.Raise("Could not reboot instance")
5373     else:
5374       if instance_running:
5375         result = self.rpc.call_instance_shutdown(node_current, instance,
5376                                                  self.op.shutdown_timeout)
5377         result.Raise("Could not shutdown instance for full reboot")
5378         _ShutdownInstanceDisks(self, instance)
5379       else:
5380         self.LogInfo("Instance %s was already stopped, starting now",
5381                      instance.name)
5382       _StartInstanceDisks(self, instance, ignore_secondaries)
5383       result = self.rpc.call_instance_start(node_current, instance, None, None)
5384       msg = result.fail_msg
5385       if msg:
5386         _ShutdownInstanceDisks(self, instance)
5387         raise errors.OpExecError("Could not start instance for"
5388                                  " full reboot: %s" % msg)
5389
5390     self.cfg.MarkInstanceUp(instance.name)
5391
5392
5393 class LUInstanceShutdown(LogicalUnit):
5394   """Shutdown an instance.
5395
5396   """
5397   HPATH = "instance-stop"
5398   HTYPE = constants.HTYPE_INSTANCE
5399   REQ_BGL = False
5400
5401   def ExpandNames(self):
5402     self._ExpandAndLockInstance()
5403
5404   def BuildHooksEnv(self):
5405     """Build hooks env.
5406
5407     This runs on master, primary and secondary nodes of the instance.
5408
5409     """
5410     env = _BuildInstanceHookEnvByObject(self, self.instance)
5411     env["TIMEOUT"] = self.op.timeout
5412     return env
5413
5414   def BuildHooksNodes(self):
5415     """Build hooks nodes.
5416
5417     """
5418     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5419     return (nl, nl)
5420
5421   def CheckPrereq(self):
5422     """Check prerequisites.
5423
5424     This checks that the instance is in the cluster.
5425
5426     """
5427     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5428     assert self.instance is not None, \
5429       "Cannot retrieve locked instance %s" % self.op.instance_name
5430
5431     self.primary_offline = \
5432       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5433
5434     if self.primary_offline and self.op.ignore_offline_nodes:
5435       self.proc.LogWarning("Ignoring offline primary node")
5436     else:
5437       _CheckNodeOnline(self, self.instance.primary_node)
5438
5439   def Exec(self, feedback_fn):
5440     """Shutdown the instance.
5441
5442     """
5443     instance = self.instance
5444     node_current = instance.primary_node
5445     timeout = self.op.timeout
5446
5447     self.cfg.MarkInstanceDown(instance.name)
5448
5449     if self.primary_offline:
5450       assert self.op.ignore_offline_nodes
5451       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5452     else:
5453       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5454       msg = result.fail_msg
5455       if msg:
5456         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5457
5458       _ShutdownInstanceDisks(self, instance)
5459
5460
5461 class LUInstanceReinstall(LogicalUnit):
5462   """Reinstall an instance.
5463
5464   """
5465   HPATH = "instance-reinstall"
5466   HTYPE = constants.HTYPE_INSTANCE
5467   REQ_BGL = False
5468
5469   def ExpandNames(self):
5470     self._ExpandAndLockInstance()
5471
5472   def BuildHooksEnv(self):
5473     """Build hooks env.
5474
5475     This runs on master, primary and secondary nodes of the instance.
5476
5477     """
5478     return _BuildInstanceHookEnvByObject(self, self.instance)
5479
5480   def BuildHooksNodes(self):
5481     """Build hooks nodes.
5482
5483     """
5484     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5485     return (nl, nl)
5486
5487   def CheckPrereq(self):
5488     """Check prerequisites.
5489
5490     This checks that the instance is in the cluster and is not running.
5491
5492     """
5493     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5494     assert instance is not None, \
5495       "Cannot retrieve locked instance %s" % self.op.instance_name
5496     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5497                      " offline, cannot reinstall")
5498     for node in instance.secondary_nodes:
5499       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5500                        " cannot reinstall")
5501
5502     if instance.disk_template == constants.DT_DISKLESS:
5503       raise errors.OpPrereqError("Instance '%s' has no disks" %
5504                                  self.op.instance_name,
5505                                  errors.ECODE_INVAL)
5506     _CheckInstanceDown(self, instance, "cannot reinstall")
5507
5508     if self.op.os_type is not None:
5509       # OS verification
5510       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5511       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5512       instance_os = self.op.os_type
5513     else:
5514       instance_os = instance.os
5515
5516     nodelist = list(instance.all_nodes)
5517
5518     if self.op.osparams:
5519       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5520       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5521       self.os_inst = i_osdict # the new dict (without defaults)
5522     else:
5523       self.os_inst = None
5524
5525     self.instance = instance
5526
5527   def Exec(self, feedback_fn):
5528     """Reinstall the instance.
5529
5530     """
5531     inst = self.instance
5532
5533     if self.op.os_type is not None:
5534       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5535       inst.os = self.op.os_type
5536       # Write to configuration
5537       self.cfg.Update(inst, feedback_fn)
5538
5539     _StartInstanceDisks(self, inst, None)
5540     try:
5541       feedback_fn("Running the instance OS create scripts...")
5542       # FIXME: pass debug option from opcode to backend
5543       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5544                                              self.op.debug_level,
5545                                              osparams=self.os_inst)
5546       result.Raise("Could not install OS for instance %s on node %s" %
5547                    (inst.name, inst.primary_node))
5548     finally:
5549       _ShutdownInstanceDisks(self, inst)
5550
5551
5552 class LUInstanceRecreateDisks(LogicalUnit):
5553   """Recreate an instance's missing disks.
5554
5555   """
5556   HPATH = "instance-recreate-disks"
5557   HTYPE = constants.HTYPE_INSTANCE
5558   REQ_BGL = False
5559
5560   def ExpandNames(self):
5561     self._ExpandAndLockInstance()
5562
5563   def BuildHooksEnv(self):
5564     """Build hooks env.
5565
5566     This runs on master, primary and secondary nodes of the instance.
5567
5568     """
5569     return _BuildInstanceHookEnvByObject(self, self.instance)
5570
5571   def BuildHooksNodes(self):
5572     """Build hooks nodes.
5573
5574     """
5575     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5576     return (nl, nl)
5577
5578   def CheckPrereq(self):
5579     """Check prerequisites.
5580
5581     This checks that the instance is in the cluster and is not running.
5582
5583     """
5584     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5585     assert instance is not None, \
5586       "Cannot retrieve locked instance %s" % self.op.instance_name
5587     _CheckNodeOnline(self, instance.primary_node)
5588
5589     if instance.disk_template == constants.DT_DISKLESS:
5590       raise errors.OpPrereqError("Instance '%s' has no disks" %
5591                                  self.op.instance_name, errors.ECODE_INVAL)
5592     _CheckInstanceDown(self, instance, "cannot recreate disks")
5593
5594     if not self.op.disks:
5595       self.op.disks = range(len(instance.disks))
5596     else:
5597       for idx in self.op.disks:
5598         if idx >= len(instance.disks):
5599           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5600                                      errors.ECODE_INVAL)
5601
5602     self.instance = instance
5603
5604   def Exec(self, feedback_fn):
5605     """Recreate the disks.
5606
5607     """
5608     to_skip = []
5609     for idx, _ in enumerate(self.instance.disks):
5610       if idx not in self.op.disks: # disk idx has not been passed in
5611         to_skip.append(idx)
5612         continue
5613
5614     _CreateDisks(self, self.instance, to_skip=to_skip)
5615
5616
5617 class LUInstanceRename(LogicalUnit):
5618   """Rename an instance.
5619
5620   """
5621   HPATH = "instance-rename"
5622   HTYPE = constants.HTYPE_INSTANCE
5623
5624   def CheckArguments(self):
5625     """Check arguments.
5626
5627     """
5628     if self.op.ip_check and not self.op.name_check:
5629       # TODO: make the ip check more flexible and not depend on the name check
5630       raise errors.OpPrereqError("Cannot do ip check without a name check",
5631                                  errors.ECODE_INVAL)
5632
5633   def BuildHooksEnv(self):
5634     """Build hooks env.
5635
5636     This runs on master, primary and secondary nodes of the instance.
5637
5638     """
5639     env = _BuildInstanceHookEnvByObject(self, self.instance)
5640     env["INSTANCE_NEW_NAME"] = self.op.new_name
5641     return env
5642
5643   def BuildHooksNodes(self):
5644     """Build hooks nodes.
5645
5646     """
5647     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5648     return (nl, nl)
5649
5650   def CheckPrereq(self):
5651     """Check prerequisites.
5652
5653     This checks that the instance is in the cluster and is not running.
5654
5655     """
5656     self.op.instance_name = _ExpandInstanceName(self.cfg,
5657                                                 self.op.instance_name)
5658     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5659     assert instance is not None
5660     _CheckNodeOnline(self, instance.primary_node)
5661     _CheckInstanceDown(self, instance, "cannot rename")
5662     self.instance = instance
5663
5664     new_name = self.op.new_name
5665     if self.op.name_check:
5666       hostname = netutils.GetHostname(name=new_name)
5667       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5668                    hostname.name)
5669       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5670         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5671                                     " same as given hostname '%s'") %
5672                                     (hostname.name, self.op.new_name),
5673                                     errors.ECODE_INVAL)
5674       new_name = self.op.new_name = hostname.name
5675       if (self.op.ip_check and
5676           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5677         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5678                                    (hostname.ip, new_name),
5679                                    errors.ECODE_NOTUNIQUE)
5680
5681     instance_list = self.cfg.GetInstanceList()
5682     if new_name in instance_list and new_name != instance.name:
5683       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5684                                  new_name, errors.ECODE_EXISTS)
5685
5686   def Exec(self, feedback_fn):
5687     """Rename the instance.
5688
5689     """
5690     inst = self.instance
5691     old_name = inst.name
5692
5693     rename_file_storage = False
5694     if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5695         self.op.new_name != inst.name):
5696       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5697       rename_file_storage = True
5698
5699     self.cfg.RenameInstance(inst.name, self.op.new_name)
5700     # Change the instance lock. This is definitely safe while we hold the BGL
5701     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5702     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5703
5704     # re-read the instance from the configuration after rename
5705     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5706
5707     if rename_file_storage:
5708       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5709       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5710                                                      old_file_storage_dir,
5711                                                      new_file_storage_dir)
5712       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5713                    " (but the instance has been renamed in Ganeti)" %
5714                    (inst.primary_node, old_file_storage_dir,
5715                     new_file_storage_dir))
5716
5717     _StartInstanceDisks(self, inst, None)
5718     try:
5719       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5720                                                  old_name, self.op.debug_level)
5721       msg = result.fail_msg
5722       if msg:
5723         msg = ("Could not run OS rename script for instance %s on node %s"
5724                " (but the instance has been renamed in Ganeti): %s" %
5725                (inst.name, inst.primary_node, msg))
5726         self.proc.LogWarning(msg)
5727     finally:
5728       _ShutdownInstanceDisks(self, inst)
5729
5730     return inst.name
5731
5732
5733 class LUInstanceRemove(LogicalUnit):
5734   """Remove an instance.
5735
5736   """
5737   HPATH = "instance-remove"
5738   HTYPE = constants.HTYPE_INSTANCE
5739   REQ_BGL = False
5740
5741   def ExpandNames(self):
5742     self._ExpandAndLockInstance()
5743     self.needed_locks[locking.LEVEL_NODE] = []
5744     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5745
5746   def DeclareLocks(self, level):
5747     if level == locking.LEVEL_NODE:
5748       self._LockInstancesNodes()
5749
5750   def BuildHooksEnv(self):
5751     """Build hooks env.
5752
5753     This runs on master, primary and secondary nodes of the instance.
5754
5755     """
5756     env = _BuildInstanceHookEnvByObject(self, self.instance)
5757     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5758     return env
5759
5760   def BuildHooksNodes(self):
5761     """Build hooks nodes.
5762
5763     """
5764     nl = [self.cfg.GetMasterNode()]
5765     nl_post = list(self.instance.all_nodes) + nl
5766     return (nl, nl_post)
5767
5768   def CheckPrereq(self):
5769     """Check prerequisites.
5770
5771     This checks that the instance is in the cluster.
5772
5773     """
5774     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5775     assert self.instance is not None, \
5776       "Cannot retrieve locked instance %s" % self.op.instance_name
5777
5778   def Exec(self, feedback_fn):
5779     """Remove the instance.
5780
5781     """
5782     instance = self.instance
5783     logging.info("Shutting down instance %s on node %s",
5784                  instance.name, instance.primary_node)
5785
5786     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5787                                              self.op.shutdown_timeout)
5788     msg = result.fail_msg
5789     if msg:
5790       if self.op.ignore_failures:
5791         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5792       else:
5793         raise errors.OpExecError("Could not shutdown instance %s on"
5794                                  " node %s: %s" %
5795                                  (instance.name, instance.primary_node, msg))
5796
5797     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5798
5799
5800 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5801   """Utility function to remove an instance.
5802
5803   """
5804   logging.info("Removing block devices for instance %s", instance.name)
5805
5806   if not _RemoveDisks(lu, instance):
5807     if not ignore_failures:
5808       raise errors.OpExecError("Can't remove instance's disks")
5809     feedback_fn("Warning: can't remove instance's disks")
5810
5811   logging.info("Removing instance %s out of cluster config", instance.name)
5812
5813   lu.cfg.RemoveInstance(instance.name)
5814
5815   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5816     "Instance lock removal conflict"
5817
5818   # Remove lock for the instance
5819   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5820
5821
5822 class LUInstanceQuery(NoHooksLU):
5823   """Logical unit for querying instances.
5824
5825   """
5826   # pylint: disable-msg=W0142
5827   REQ_BGL = False
5828
5829   def CheckArguments(self):
5830     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5831                              self.op.output_fields, self.op.use_locking)
5832
5833   def ExpandNames(self):
5834     self.iq.ExpandNames(self)
5835
5836   def DeclareLocks(self, level):
5837     self.iq.DeclareLocks(self, level)
5838
5839   def Exec(self, feedback_fn):
5840     return self.iq.OldStyleQuery(self)
5841
5842
5843 class LUInstanceFailover(LogicalUnit):
5844   """Failover an instance.
5845
5846   """
5847   HPATH = "instance-failover"
5848   HTYPE = constants.HTYPE_INSTANCE
5849   REQ_BGL = False
5850
5851   def CheckArguments(self):
5852     """Check the arguments.
5853
5854     """
5855     self.iallocator = getattr(self.op, "iallocator", None)
5856     self.target_node = getattr(self.op, "target_node", None)
5857
5858   def ExpandNames(self):
5859     self._ExpandAndLockInstance()
5860
5861     if self.op.target_node is not None:
5862       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5863
5864     self.needed_locks[locking.LEVEL_NODE] = []
5865     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5866
5867   def DeclareLocks(self, level):
5868     if level == locking.LEVEL_NODE:
5869       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5870       if instance.disk_template in constants.DTS_EXT_MIRROR:
5871         if self.op.target_node is None:
5872           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5873         else:
5874           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5875                                                    self.op.target_node]
5876         del self.recalculate_locks[locking.LEVEL_NODE]
5877       else:
5878         self._LockInstancesNodes()
5879
5880   def BuildHooksEnv(self):
5881     """Build hooks env.
5882
5883     This runs on master, primary and secondary nodes of the instance.
5884
5885     """
5886     instance = self.instance
5887     source_node = instance.primary_node
5888     env = {
5889       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5890       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5891       "OLD_PRIMARY": source_node,
5892       "NEW_PRIMARY": self.op.target_node,
5893       }
5894
5895     if instance.disk_template in constants.DTS_INT_MIRROR:
5896       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
5897       env["NEW_SECONDARY"] = source_node
5898     else:
5899       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
5900
5901     env.update(_BuildInstanceHookEnvByObject(self, instance))
5902
5903     return env
5904
5905   def BuildHooksNodes(self):
5906     """Build hooks nodes.
5907
5908     """
5909     nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
5910     return (nl, nl + [self.instance.primary_node])
5911
5912   def CheckPrereq(self):
5913     """Check prerequisites.
5914
5915     This checks that the instance is in the cluster.
5916
5917     """
5918     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5919     assert self.instance is not None, \
5920       "Cannot retrieve locked instance %s" % self.op.instance_name
5921
5922     bep = self.cfg.GetClusterInfo().FillBE(instance)
5923     if instance.disk_template not in constants.DTS_MIRRORED:
5924       raise errors.OpPrereqError("Instance's disk layout is not"
5925                                  " mirrored, cannot failover.",
5926                                  errors.ECODE_STATE)
5927
5928     if instance.disk_template in constants.DTS_EXT_MIRROR:
5929       _CheckIAllocatorOrNode(self, "iallocator", "target_node")
5930       if self.op.iallocator:
5931         self._RunAllocator()
5932         # Release all unnecessary node locks
5933         nodes_keep = [instance.primary_node, self.op.target_node]
5934         nodes_rel = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5935                      if node not in nodes_keep]
5936         self.context.glm.release(locking.LEVEL_NODE, nodes_rel)
5937         self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5938
5939       # self.op.target_node is already populated, either directly or by the
5940       # iallocator run
5941       target_node = self.op.target_node
5942
5943     else:
5944       secondary_nodes = instance.secondary_nodes
5945       if not secondary_nodes:
5946         raise errors.ConfigurationError("No secondary node but using"
5947                                         " %s disk template" %
5948                                         instance.disk_template)
5949       target_node = secondary_nodes[0]
5950
5951       if self.op.iallocator or (self.op.target_node and
5952                                 self.op.target_node != target_node):
5953         raise errors.OpPrereqError("Instances with disk template %s cannot"
5954                                    " be failed over to arbitrary nodes"
5955                                    " (neither an iallocator nor a target"
5956                                    " node can be passed)" %
5957                                    instance.disk_template, errors.ECODE_INVAL)
5958     _CheckNodeOnline(self, target_node)
5959     _CheckNodeNotDrained(self, target_node)
5960
5961     # Save target_node so that we can use it in BuildHooksEnv
5962     self.op.target_node = target_node
5963
5964     if instance.admin_up:
5965       # check memory requirements on the secondary node
5966       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5967                            instance.name, bep[constants.BE_MEMORY],
5968                            instance.hypervisor)
5969     else:
5970       self.LogInfo("Not checking memory on the secondary node as"
5971                    " instance will not be started")
5972
5973     # check bridge existance
5974     _CheckInstanceBridgesExist(self, instance, node=target_node)
5975
5976   def Exec(self, feedback_fn):
5977     """Failover an instance.
5978
5979     The failover is done by shutting it down on its present node and
5980     starting it on the secondary.
5981
5982     """
5983     instance = self.instance
5984     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5985
5986     source_node = instance.primary_node
5987     target_node = self.op.target_node
5988
5989     if instance.admin_up:
5990       feedback_fn("* checking disk consistency between source and target")
5991       for dev in instance.disks:
5992         # for drbd, these are drbd over lvm
5993         if not _CheckDiskConsistency(self, dev, target_node, False):
5994           if not self.op.ignore_consistency:
5995             raise errors.OpExecError("Disk %s is degraded on target node,"
5996                                      " aborting failover." % dev.iv_name)
5997     else:
5998       feedback_fn("* not checking disk consistency as instance is not running")
5999
6000     feedback_fn("* shutting down instance on source node")
6001     logging.info("Shutting down instance %s on node %s",
6002                  instance.name, source_node)
6003
6004     result = self.rpc.call_instance_shutdown(source_node, instance,
6005                                              self.op.shutdown_timeout)
6006     msg = result.fail_msg
6007     if msg:
6008       if self.op.ignore_consistency or primary_node.offline:
6009         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6010                              " Proceeding anyway. Please make sure node"
6011                              " %s is down. Error details: %s",
6012                              instance.name, source_node, source_node, msg)
6013       else:
6014         raise errors.OpExecError("Could not shutdown instance %s on"
6015                                  " node %s: %s" %
6016                                  (instance.name, source_node, msg))
6017
6018     feedback_fn("* deactivating the instance's disks on source node")
6019     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6020       raise errors.OpExecError("Can't shut down the instance's disks.")
6021
6022     instance.primary_node = target_node
6023     # distribute new instance config to the other nodes
6024     self.cfg.Update(instance, feedback_fn)
6025
6026     # Only start the instance if it's marked as up
6027     if instance.admin_up:
6028       feedback_fn("* activating the instance's disks on target node")
6029       logging.info("Starting instance %s on node %s",
6030                    instance.name, target_node)
6031
6032       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6033                                            ignore_secondaries=True)
6034       if not disks_ok:
6035         _ShutdownInstanceDisks(self, instance)
6036         raise errors.OpExecError("Can't activate the instance's disks")
6037
6038       feedback_fn("* starting the instance on the target node")
6039       result = self.rpc.call_instance_start(target_node, instance, None, None)
6040       msg = result.fail_msg
6041       if msg:
6042         _ShutdownInstanceDisks(self, instance)
6043         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6044                                  (instance.name, target_node, msg))
6045
6046   def _RunAllocator(self):
6047     """Run the allocator based on input opcode.
6048
6049     """
6050     ial = IAllocator(self.cfg, self.rpc,
6051                      mode=constants.IALLOCATOR_MODE_RELOC,
6052                      name=self.instance.name,
6053                      # TODO See why hail breaks with a single node below
6054                      relocate_from=[self.instance.primary_node,
6055                                     self.instance.primary_node],
6056                      )
6057
6058     ial.Run(self.op.iallocator)
6059
6060     if not ial.success:
6061       raise errors.OpPrereqError("Can't compute nodes using"
6062                                  " iallocator '%s': %s" %
6063                                  (self.op.iallocator, ial.info),
6064                                  errors.ECODE_NORES)
6065     if len(ial.result) != ial.required_nodes:
6066       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6067                                  " of nodes (%s), required %s" %
6068                                  (self.op.iallocator, len(ial.result),
6069                                   ial.required_nodes), errors.ECODE_FAULT)
6070     self.op.target_node = ial.result[0]
6071     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6072                  self.instance.name, self.op.iallocator,
6073                  utils.CommaJoin(ial.result))
6074
6075
6076 class LUInstanceMigrate(LogicalUnit):
6077   """Migrate an instance.
6078
6079   This is migration without shutting down, compared to the failover,
6080   which is done with shutdown.
6081
6082   """
6083   HPATH = "instance-migrate"
6084   HTYPE = constants.HTYPE_INSTANCE
6085   REQ_BGL = False
6086
6087   def ExpandNames(self):
6088     self._ExpandAndLockInstance()
6089
6090     if self.op.target_node is not None:
6091       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6092
6093     self.needed_locks[locking.LEVEL_NODE] = []
6094     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6095
6096     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6097                                        self.op.cleanup, self.op.iallocator,
6098                                        self.op.target_node)
6099     self.tasklets = [self._migrater]
6100
6101   def DeclareLocks(self, level):
6102     if level == locking.LEVEL_NODE:
6103       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6104       if instance.disk_template in constants.DTS_EXT_MIRROR:
6105         if self.op.target_node is None:
6106           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6107         else:
6108           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6109                                                    self.op.target_node]
6110         del self.recalculate_locks[locking.LEVEL_NODE]
6111       else:
6112         self._LockInstancesNodes()
6113
6114   def BuildHooksEnv(self):
6115     """Build hooks env.
6116
6117     This runs on master, primary and secondary nodes of the instance.
6118
6119     """
6120     instance = self._migrater.instance
6121     source_node = instance.primary_node
6122     target_node = self._migrater.target_node
6123     env = _BuildInstanceHookEnvByObject(self, instance)
6124     env.update({
6125       "MIGRATE_LIVE": self._migrater.live,
6126       "MIGRATE_CLEANUP": self.op.cleanup,
6127       "OLD_PRIMARY": source_node,
6128       "NEW_PRIMARY": target_node,
6129       })
6130
6131     if instance.disk_template in constants.DTS_INT_MIRROR:
6132       env["OLD_SECONDARY"] = target_node
6133       env["NEW_SECONDARY"] = source_node
6134     else:
6135       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6136
6137     return env
6138
6139   def BuildHooksNodes(self):
6140     """Build hooks nodes.
6141
6142     """
6143     instance = self._migrater.instance
6144     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6145     return (nl, nl + [instance.primary_node])
6146
6147
6148 class LUInstanceMove(LogicalUnit):
6149   """Move an instance by data-copying.
6150
6151   """
6152   HPATH = "instance-move"
6153   HTYPE = constants.HTYPE_INSTANCE
6154   REQ_BGL = False
6155
6156   def ExpandNames(self):
6157     self._ExpandAndLockInstance()
6158     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6159     self.op.target_node = target_node
6160     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6161     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6162
6163   def DeclareLocks(self, level):
6164     if level == locking.LEVEL_NODE:
6165       self._LockInstancesNodes(primary_only=True)
6166
6167   def BuildHooksEnv(self):
6168     """Build hooks env.
6169
6170     This runs on master, primary and secondary nodes of the instance.
6171
6172     """
6173     env = {
6174       "TARGET_NODE": self.op.target_node,
6175       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6176       }
6177     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6178     return env
6179
6180   def BuildHooksNodes(self):
6181     """Build hooks nodes.
6182
6183     """
6184     nl = [
6185       self.cfg.GetMasterNode(),
6186       self.instance.primary_node,
6187       self.op.target_node,
6188       ]
6189     return (nl, nl)
6190
6191   def CheckPrereq(self):
6192     """Check prerequisites.
6193
6194     This checks that the instance is in the cluster.
6195
6196     """
6197     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6198     assert self.instance is not None, \
6199       "Cannot retrieve locked instance %s" % self.op.instance_name
6200
6201     node = self.cfg.GetNodeInfo(self.op.target_node)
6202     assert node is not None, \
6203       "Cannot retrieve locked node %s" % self.op.target_node
6204
6205     self.target_node = target_node = node.name
6206
6207     if target_node == instance.primary_node:
6208       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6209                                  (instance.name, target_node),
6210                                  errors.ECODE_STATE)
6211
6212     bep = self.cfg.GetClusterInfo().FillBE(instance)
6213
6214     for idx, dsk in enumerate(instance.disks):
6215       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6216         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6217                                    " cannot copy" % idx, errors.ECODE_STATE)
6218
6219     _CheckNodeOnline(self, target_node)
6220     _CheckNodeNotDrained(self, target_node)
6221     _CheckNodeVmCapable(self, target_node)
6222
6223     if instance.admin_up:
6224       # check memory requirements on the secondary node
6225       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6226                            instance.name, bep[constants.BE_MEMORY],
6227                            instance.hypervisor)
6228     else:
6229       self.LogInfo("Not checking memory on the secondary node as"
6230                    " instance will not be started")
6231
6232     # check bridge existance
6233     _CheckInstanceBridgesExist(self, instance, node=target_node)
6234
6235   def Exec(self, feedback_fn):
6236     """Move an instance.
6237
6238     The move is done by shutting it down on its present node, copying
6239     the data over (slow) and starting it on the new node.
6240
6241     """
6242     instance = self.instance
6243
6244     source_node = instance.primary_node
6245     target_node = self.target_node
6246
6247     self.LogInfo("Shutting down instance %s on source node %s",
6248                  instance.name, source_node)
6249
6250     result = self.rpc.call_instance_shutdown(source_node, instance,
6251                                              self.op.shutdown_timeout)
6252     msg = result.fail_msg
6253     if msg:
6254       if self.op.ignore_consistency:
6255         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6256                              " Proceeding anyway. Please make sure node"
6257                              " %s is down. Error details: %s",
6258                              instance.name, source_node, source_node, msg)
6259       else:
6260         raise errors.OpExecError("Could not shutdown instance %s on"
6261                                  " node %s: %s" %
6262                                  (instance.name, source_node, msg))
6263
6264     # create the target disks
6265     try:
6266       _CreateDisks(self, instance, target_node=target_node)
6267     except errors.OpExecError:
6268       self.LogWarning("Device creation failed, reverting...")
6269       try:
6270         _RemoveDisks(self, instance, target_node=target_node)
6271       finally:
6272         self.cfg.ReleaseDRBDMinors(instance.name)
6273         raise
6274
6275     cluster_name = self.cfg.GetClusterInfo().cluster_name
6276
6277     errs = []
6278     # activate, get path, copy the data over
6279     for idx, disk in enumerate(instance.disks):
6280       self.LogInfo("Copying data for disk %d", idx)
6281       result = self.rpc.call_blockdev_assemble(target_node, disk,
6282                                                instance.name, True, idx)
6283       if result.fail_msg:
6284         self.LogWarning("Can't assemble newly created disk %d: %s",
6285                         idx, result.fail_msg)
6286         errs.append(result.fail_msg)
6287         break
6288       dev_path = result.payload
6289       result = self.rpc.call_blockdev_export(source_node, disk,
6290                                              target_node, dev_path,
6291                                              cluster_name)
6292       if result.fail_msg:
6293         self.LogWarning("Can't copy data over for disk %d: %s",
6294                         idx, result.fail_msg)
6295         errs.append(result.fail_msg)
6296         break
6297
6298     if errs:
6299       self.LogWarning("Some disks failed to copy, aborting")
6300       try:
6301         _RemoveDisks(self, instance, target_node=target_node)
6302       finally:
6303         self.cfg.ReleaseDRBDMinors(instance.name)
6304         raise errors.OpExecError("Errors during disk copy: %s" %
6305                                  (",".join(errs),))
6306
6307     instance.primary_node = target_node
6308     self.cfg.Update(instance, feedback_fn)
6309
6310     self.LogInfo("Removing the disks on the original node")
6311     _RemoveDisks(self, instance, target_node=source_node)
6312
6313     # Only start the instance if it's marked as up
6314     if instance.admin_up:
6315       self.LogInfo("Starting instance %s on node %s",
6316                    instance.name, target_node)
6317
6318       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6319                                            ignore_secondaries=True)
6320       if not disks_ok:
6321         _ShutdownInstanceDisks(self, instance)
6322         raise errors.OpExecError("Can't activate the instance's disks")
6323
6324       result = self.rpc.call_instance_start(target_node, instance, None, None)
6325       msg = result.fail_msg
6326       if msg:
6327         _ShutdownInstanceDisks(self, instance)
6328         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6329                                  (instance.name, target_node, msg))
6330
6331
6332 class LUNodeMigrate(LogicalUnit):
6333   """Migrate all instances from a node.
6334
6335   """
6336   HPATH = "node-migrate"
6337   HTYPE = constants.HTYPE_NODE
6338   REQ_BGL = False
6339
6340   def CheckArguments(self):
6341     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6342
6343   def ExpandNames(self):
6344     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6345
6346     self.needed_locks = {}
6347
6348     # Create tasklets for migrating instances for all instances on this node
6349     names = []
6350     tasklets = []
6351
6352     self.lock_all_nodes = False
6353
6354     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6355       logging.debug("Migrating instance %s", inst.name)
6356       names.append(inst.name)
6357
6358       tasklets.append(TLMigrateInstance(self, inst.name, False,
6359                                         self.op.iallocator, None))
6360
6361       if inst.disk_template in constants.DTS_EXT_MIRROR:
6362         # We need to lock all nodes, as the iallocator will choose the
6363         # destination nodes afterwards
6364         self.lock_all_nodes = True
6365
6366     self.tasklets = tasklets
6367
6368     # Declare node locks
6369     if self.lock_all_nodes:
6370       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6371     else:
6372       self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6373       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6374
6375     # Declare instance locks
6376     self.needed_locks[locking.LEVEL_INSTANCE] = names
6377
6378   def DeclareLocks(self, level):
6379     if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6380       self._LockInstancesNodes()
6381
6382   def BuildHooksEnv(self):
6383     """Build hooks env.
6384
6385     This runs on the master, the primary and all the secondaries.
6386
6387     """
6388     return {
6389       "NODE_NAME": self.op.node_name,
6390       }
6391
6392   def BuildHooksNodes(self):
6393     """Build hooks nodes.
6394
6395     """
6396     nl = [self.cfg.GetMasterNode()]
6397     return (nl, nl)
6398
6399
6400 class TLMigrateInstance(Tasklet):
6401   """Tasklet class for instance migration.
6402
6403   @type live: boolean
6404   @ivar live: whether the migration will be done live or non-live;
6405       this variable is initalized only after CheckPrereq has run
6406
6407   """
6408   def __init__(self, lu, instance_name, cleanup,
6409                iallocator=None, target_node=None):
6410     """Initializes this class.
6411
6412     """
6413     Tasklet.__init__(self, lu)
6414
6415     # Parameters
6416     self.instance_name = instance_name
6417     self.cleanup = cleanup
6418     self.live = False # will be overridden later
6419     self.iallocator = iallocator
6420     self.target_node = target_node
6421
6422   def CheckPrereq(self):
6423     """Check prerequisites.
6424
6425     This checks that the instance is in the cluster.
6426
6427     """
6428     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6429     instance = self.cfg.GetInstanceInfo(instance_name)
6430     assert instance is not None
6431     self.instance = instance
6432
6433     if instance.disk_template not in constants.DTS_MIRRORED:
6434       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6435                                  " migrations" % instance.disk_template,
6436                                  errors.ECODE_STATE)
6437
6438     if instance.disk_template in constants.DTS_EXT_MIRROR:
6439       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6440
6441       if self.iallocator:
6442         self._RunAllocator()
6443
6444       # self.target_node is already populated, either directly or by the
6445       # iallocator run
6446       target_node = self.target_node
6447
6448       if len(self.lu.tasklets) == 1:
6449         # It is safe to remove locks only when we're the only tasklet in the LU
6450         nodes_keep = [instance.primary_node, self.target_node]
6451         nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6452                      if node not in nodes_keep]
6453         self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6454         self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6455
6456     else:
6457       secondary_nodes = instance.secondary_nodes
6458       if not secondary_nodes:
6459         raise errors.ConfigurationError("No secondary node but using"
6460                                         " %s disk template" %
6461                                         instance.disk_template)
6462       target_node = secondary_nodes[0]
6463       if self.lu.op.iallocator or (self.lu.op.target_node and
6464                                    self.lu.op.target_node != target_node):
6465         raise errors.OpPrereqError("Instances with disk template %s cannot"
6466                                    " be migrated over to arbitrary nodes"
6467                                    " (neither an iallocator nor a target"
6468                                    " node can be passed)" %
6469                                    instance.disk_template, errors.ECODE_INVAL)
6470
6471     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6472
6473     # check memory requirements on the secondary node
6474     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6475                          instance.name, i_be[constants.BE_MEMORY],
6476                          instance.hypervisor)
6477
6478     # check bridge existance
6479     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6480
6481     if not self.cleanup:
6482       _CheckNodeNotDrained(self.lu, target_node)
6483       result = self.rpc.call_instance_migratable(instance.primary_node,
6484                                                  instance)
6485       result.Raise("Can't migrate, please use failover",
6486                    prereq=True, ecode=errors.ECODE_STATE)
6487
6488
6489   def _RunAllocator(self):
6490     """Run the allocator based on input opcode.
6491
6492     """
6493     ial = IAllocator(self.cfg, self.rpc,
6494                      mode=constants.IALLOCATOR_MODE_RELOC,
6495                      name=self.instance_name,
6496                      # TODO See why hail breaks with a single node below
6497                      relocate_from=[self.instance.primary_node,
6498                                     self.instance.primary_node],
6499                      )
6500
6501     ial.Run(self.iallocator)
6502
6503     if not ial.success:
6504       raise errors.OpPrereqError("Can't compute nodes using"
6505                                  " iallocator '%s': %s" %
6506                                  (self.iallocator, ial.info),
6507                                  errors.ECODE_NORES)
6508     if len(ial.result) != ial.required_nodes:
6509       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6510                                  " of nodes (%s), required %s" %
6511                                  (self.iallocator, len(ial.result),
6512                                   ial.required_nodes), errors.ECODE_FAULT)
6513     self.target_node = ial.result[0]
6514     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6515                  self.instance_name, self.iallocator,
6516                  utils.CommaJoin(ial.result))
6517
6518     if self.lu.op.live is not None and self.lu.op.mode is not None:
6519       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6520                                  " parameters are accepted",
6521                                  errors.ECODE_INVAL)
6522     if self.lu.op.live is not None:
6523       if self.lu.op.live:
6524         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6525       else:
6526         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6527       # reset the 'live' parameter to None so that repeated
6528       # invocations of CheckPrereq do not raise an exception
6529       self.lu.op.live = None
6530     elif self.lu.op.mode is None:
6531       # read the default value from the hypervisor
6532       i_hv = self.cfg.GetClusterInfo().FillHV(self.instance, skip_globals=False)
6533       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6534
6535     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6536
6537   def _WaitUntilSync(self):
6538     """Poll with custom rpc for disk sync.
6539
6540     This uses our own step-based rpc call.
6541
6542     """
6543     self.feedback_fn("* wait until resync is done")
6544     all_done = False
6545     while not all_done:
6546       all_done = True
6547       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6548                                             self.nodes_ip,
6549                                             self.instance.disks)
6550       min_percent = 100
6551       for node, nres in result.items():
6552         nres.Raise("Cannot resync disks on node %s" % node)
6553         node_done, node_percent = nres.payload
6554         all_done = all_done and node_done
6555         if node_percent is not None:
6556           min_percent = min(min_percent, node_percent)
6557       if not all_done:
6558         if min_percent < 100:
6559           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6560         time.sleep(2)
6561
6562   def _EnsureSecondary(self, node):
6563     """Demote a node to secondary.
6564
6565     """
6566     self.feedback_fn("* switching node %s to secondary mode" % node)
6567
6568     for dev in self.instance.disks:
6569       self.cfg.SetDiskID(dev, node)
6570
6571     result = self.rpc.call_blockdev_close(node, self.instance.name,
6572                                           self.instance.disks)
6573     result.Raise("Cannot change disk to secondary on node %s" % node)
6574
6575   def _GoStandalone(self):
6576     """Disconnect from the network.
6577
6578     """
6579     self.feedback_fn("* changing into standalone mode")
6580     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6581                                                self.instance.disks)
6582     for node, nres in result.items():
6583       nres.Raise("Cannot disconnect disks node %s" % node)
6584
6585   def _GoReconnect(self, multimaster):
6586     """Reconnect to the network.
6587
6588     """
6589     if multimaster:
6590       msg = "dual-master"
6591     else:
6592       msg = "single-master"
6593     self.feedback_fn("* changing disks into %s mode" % msg)
6594     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6595                                            self.instance.disks,
6596                                            self.instance.name, multimaster)
6597     for node, nres in result.items():
6598       nres.Raise("Cannot change disks config on node %s" % node)
6599
6600   def _ExecCleanup(self):
6601     """Try to cleanup after a failed migration.
6602
6603     The cleanup is done by:
6604       - check that the instance is running only on one node
6605         (and update the config if needed)
6606       - change disks on its secondary node to secondary
6607       - wait until disks are fully synchronized
6608       - disconnect from the network
6609       - change disks into single-master mode
6610       - wait again until disks are fully synchronized
6611
6612     """
6613     instance = self.instance
6614     target_node = self.target_node
6615     source_node = self.source_node
6616
6617     # check running on only one node
6618     self.feedback_fn("* checking where the instance actually runs"
6619                      " (if this hangs, the hypervisor might be in"
6620                      " a bad state)")
6621     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6622     for node, result in ins_l.items():
6623       result.Raise("Can't contact node %s" % node)
6624
6625     runningon_source = instance.name in ins_l[source_node].payload
6626     runningon_target = instance.name in ins_l[target_node].payload
6627
6628     if runningon_source and runningon_target:
6629       raise errors.OpExecError("Instance seems to be running on two nodes,"
6630                                " or the hypervisor is confused. You will have"
6631                                " to ensure manually that it runs only on one"
6632                                " and restart this operation.")
6633
6634     if not (runningon_source or runningon_target):
6635       raise errors.OpExecError("Instance does not seem to be running at all."
6636                                " In this case, it's safer to repair by"
6637                                " running 'gnt-instance stop' to ensure disk"
6638                                " shutdown, and then restarting it.")
6639
6640     if runningon_target:
6641       # the migration has actually succeeded, we need to update the config
6642       self.feedback_fn("* instance running on secondary node (%s),"
6643                        " updating config" % target_node)
6644       instance.primary_node = target_node
6645       self.cfg.Update(instance, self.feedback_fn)
6646       demoted_node = source_node
6647     else:
6648       self.feedback_fn("* instance confirmed to be running on its"
6649                        " primary node (%s)" % source_node)
6650       demoted_node = target_node
6651
6652     if instance.disk_template in constants.DTS_INT_MIRROR:
6653       self._EnsureSecondary(demoted_node)
6654       try:
6655         self._WaitUntilSync()
6656       except errors.OpExecError:
6657         # we ignore here errors, since if the device is standalone, it
6658         # won't be able to sync
6659         pass
6660       self._GoStandalone()
6661       self._GoReconnect(False)
6662       self._WaitUntilSync()
6663
6664     self.feedback_fn("* done")
6665
6666   def _RevertDiskStatus(self):
6667     """Try to revert the disk status after a failed migration.
6668
6669     """
6670     target_node = self.target_node
6671     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6672       return
6673
6674     try:
6675       self._EnsureSecondary(target_node)
6676       self._GoStandalone()
6677       self._GoReconnect(False)
6678       self._WaitUntilSync()
6679     except errors.OpExecError, err:
6680       self.lu.LogWarning("Migration failed and I can't reconnect the"
6681                          " drives: error '%s'\n"
6682                          "Please look and recover the instance status" %
6683                          str(err))
6684
6685   def _AbortMigration(self):
6686     """Call the hypervisor code to abort a started migration.
6687
6688     """
6689     instance = self.instance
6690     target_node = self.target_node
6691     migration_info = self.migration_info
6692
6693     abort_result = self.rpc.call_finalize_migration(target_node,
6694                                                     instance,
6695                                                     migration_info,
6696                                                     False)
6697     abort_msg = abort_result.fail_msg
6698     if abort_msg:
6699       logging.error("Aborting migration failed on target node %s: %s",
6700                     target_node, abort_msg)
6701       # Don't raise an exception here, as we stil have to try to revert the
6702       # disk status, even if this step failed.
6703
6704   def _ExecMigration(self):
6705     """Migrate an instance.
6706
6707     The migrate is done by:
6708       - change the disks into dual-master mode
6709       - wait until disks are fully synchronized again
6710       - migrate the instance
6711       - change disks on the new secondary node (the old primary) to secondary
6712       - wait until disks are fully synchronized
6713       - change disks into single-master mode
6714
6715     """
6716     instance = self.instance
6717     target_node = self.target_node
6718     source_node = self.source_node
6719
6720     self.feedback_fn("* checking disk consistency between source and target")
6721     for dev in instance.disks:
6722       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6723         raise errors.OpExecError("Disk %s is degraded or not fully"
6724                                  " synchronized on target node,"
6725                                  " aborting migrate." % dev.iv_name)
6726
6727     # First get the migration information from the remote node
6728     result = self.rpc.call_migration_info(source_node, instance)
6729     msg = result.fail_msg
6730     if msg:
6731       log_err = ("Failed fetching source migration information from %s: %s" %
6732                  (source_node, msg))
6733       logging.error(log_err)
6734       raise errors.OpExecError(log_err)
6735
6736     self.migration_info = migration_info = result.payload
6737
6738     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6739       # Then switch the disks to master/master mode
6740       self._EnsureSecondary(target_node)
6741       self._GoStandalone()
6742       self._GoReconnect(True)
6743       self._WaitUntilSync()
6744
6745     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6746     result = self.rpc.call_accept_instance(target_node,
6747                                            instance,
6748                                            migration_info,
6749                                            self.nodes_ip[target_node])
6750
6751     msg = result.fail_msg
6752     if msg:
6753       logging.error("Instance pre-migration failed, trying to revert"
6754                     " disk status: %s", msg)
6755       self.feedback_fn("Pre-migration failed, aborting")
6756       self._AbortMigration()
6757       self._RevertDiskStatus()
6758       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6759                                (instance.name, msg))
6760
6761     self.feedback_fn("* migrating instance to %s" % target_node)
6762     time.sleep(10)
6763     result = self.rpc.call_instance_migrate(source_node, instance,
6764                                             self.nodes_ip[target_node],
6765                                             self.live)
6766     msg = result.fail_msg
6767     if msg:
6768       logging.error("Instance migration failed, trying to revert"
6769                     " disk status: %s", msg)
6770       self.feedback_fn("Migration failed, aborting")
6771       self._AbortMigration()
6772       self._RevertDiskStatus()
6773       raise errors.OpExecError("Could not migrate instance %s: %s" %
6774                                (instance.name, msg))
6775     time.sleep(10)
6776
6777     instance.primary_node = target_node
6778     # distribute new instance config to the other nodes
6779     self.cfg.Update(instance, self.feedback_fn)
6780
6781     result = self.rpc.call_finalize_migration(target_node,
6782                                               instance,
6783                                               migration_info,
6784                                               True)
6785     msg = result.fail_msg
6786     if msg:
6787       logging.error("Instance migration succeeded, but finalization failed:"
6788                     " %s", msg)
6789       raise errors.OpExecError("Could not finalize instance migration: %s" %
6790                                msg)
6791
6792     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6793       self._EnsureSecondary(source_node)
6794       self._WaitUntilSync()
6795       self._GoStandalone()
6796       self._GoReconnect(False)
6797       self._WaitUntilSync()
6798
6799     self.feedback_fn("* done")
6800
6801   def Exec(self, feedback_fn):
6802     """Perform the migration.
6803
6804     """
6805     feedback_fn("Migrating instance %s" % self.instance.name)
6806
6807     self.feedback_fn = feedback_fn
6808
6809     self.source_node = self.instance.primary_node
6810
6811     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6812     if self.instance.disk_template in constants.DTS_INT_MIRROR:
6813       self.target_node = self.instance.secondary_nodes[0]
6814       # Otherwise self.target_node has been populated either
6815       # directly, or through an iallocator.
6816
6817     self.all_nodes = [self.source_node, self.target_node]
6818     self.nodes_ip = {
6819       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6820       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6821       }
6822
6823     if self.cleanup:
6824       return self._ExecCleanup()
6825     else:
6826       return self._ExecMigration()
6827
6828
6829 def _CreateBlockDev(lu, node, instance, device, force_create,
6830                     info, force_open):
6831   """Create a tree of block devices on a given node.
6832
6833   If this device type has to be created on secondaries, create it and
6834   all its children.
6835
6836   If not, just recurse to children keeping the same 'force' value.
6837
6838   @param lu: the lu on whose behalf we execute
6839   @param node: the node on which to create the device
6840   @type instance: L{objects.Instance}
6841   @param instance: the instance which owns the device
6842   @type device: L{objects.Disk}
6843   @param device: the device to create
6844   @type force_create: boolean
6845   @param force_create: whether to force creation of this device; this
6846       will be change to True whenever we find a device which has
6847       CreateOnSecondary() attribute
6848   @param info: the extra 'metadata' we should attach to the device
6849       (this will be represented as a LVM tag)
6850   @type force_open: boolean
6851   @param force_open: this parameter will be passes to the
6852       L{backend.BlockdevCreate} function where it specifies
6853       whether we run on primary or not, and it affects both
6854       the child assembly and the device own Open() execution
6855
6856   """
6857   if device.CreateOnSecondary():
6858     force_create = True
6859
6860   if device.children:
6861     for child in device.children:
6862       _CreateBlockDev(lu, node, instance, child, force_create,
6863                       info, force_open)
6864
6865   if not force_create:
6866     return
6867
6868   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6869
6870
6871 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6872   """Create a single block device on a given node.
6873
6874   This will not recurse over children of the device, so they must be
6875   created in advance.
6876
6877   @param lu: the lu on whose behalf we execute
6878   @param node: the node on which to create the device
6879   @type instance: L{objects.Instance}
6880   @param instance: the instance which owns the device
6881   @type device: L{objects.Disk}
6882   @param device: the device to create
6883   @param info: the extra 'metadata' we should attach to the device
6884       (this will be represented as a LVM tag)
6885   @type force_open: boolean
6886   @param force_open: this parameter will be passes to the
6887       L{backend.BlockdevCreate} function where it specifies
6888       whether we run on primary or not, and it affects both
6889       the child assembly and the device own Open() execution
6890
6891   """
6892   lu.cfg.SetDiskID(device, node)
6893   result = lu.rpc.call_blockdev_create(node, device, device.size,
6894                                        instance.name, force_open, info)
6895   result.Raise("Can't create block device %s on"
6896                " node %s for instance %s" % (device, node, instance.name))
6897   if device.physical_id is None:
6898     device.physical_id = result.payload
6899
6900
6901 def _GenerateUniqueNames(lu, exts):
6902   """Generate a suitable LV name.
6903
6904   This will generate a logical volume name for the given instance.
6905
6906   """
6907   results = []
6908   for val in exts:
6909     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6910     results.append("%s%s" % (new_id, val))
6911   return results
6912
6913
6914 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6915                          p_minor, s_minor):
6916   """Generate a drbd8 device complete with its children.
6917
6918   """
6919   port = lu.cfg.AllocatePort()
6920   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6921   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6922                           logical_id=(vgname, names[0]))
6923   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6924                           logical_id=(vgname, names[1]))
6925   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6926                           logical_id=(primary, secondary, port,
6927                                       p_minor, s_minor,
6928                                       shared_secret),
6929                           children=[dev_data, dev_meta],
6930                           iv_name=iv_name)
6931   return drbd_dev
6932
6933
6934 def _GenerateDiskTemplate(lu, template_name,
6935                           instance_name, primary_node,
6936                           secondary_nodes, disk_info,
6937                           file_storage_dir, file_driver,
6938                           base_index, feedback_fn):
6939   """Generate the entire disk layout for a given template type.
6940
6941   """
6942   #TODO: compute space requirements
6943
6944   vgname = lu.cfg.GetVGName()
6945   disk_count = len(disk_info)
6946   disks = []
6947   if template_name == constants.DT_DISKLESS:
6948     pass
6949   elif template_name == constants.DT_PLAIN:
6950     if len(secondary_nodes) != 0:
6951       raise errors.ProgrammerError("Wrong template configuration")
6952
6953     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6954                                       for i in range(disk_count)])
6955     for idx, disk in enumerate(disk_info):
6956       disk_index = idx + base_index
6957       vg = disk.get("vg", vgname)
6958       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6959       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6960                               logical_id=(vg, names[idx]),
6961                               iv_name="disk/%d" % disk_index,
6962                               mode=disk["mode"])
6963       disks.append(disk_dev)
6964   elif template_name == constants.DT_DRBD8:
6965     if len(secondary_nodes) != 1:
6966       raise errors.ProgrammerError("Wrong template configuration")
6967     remote_node = secondary_nodes[0]
6968     minors = lu.cfg.AllocateDRBDMinor(
6969       [primary_node, remote_node] * len(disk_info), instance_name)
6970
6971     names = []
6972     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6973                                                for i in range(disk_count)]):
6974       names.append(lv_prefix + "_data")
6975       names.append(lv_prefix + "_meta")
6976     for idx, disk in enumerate(disk_info):
6977       disk_index = idx + base_index
6978       vg = disk.get("vg", vgname)
6979       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6980                                       disk["size"], vg, names[idx*2:idx*2+2],
6981                                       "disk/%d" % disk_index,
6982                                       minors[idx*2], minors[idx*2+1])
6983       disk_dev.mode = disk["mode"]
6984       disks.append(disk_dev)
6985   elif template_name == constants.DT_FILE:
6986     if len(secondary_nodes) != 0:
6987       raise errors.ProgrammerError("Wrong template configuration")
6988
6989     opcodes.RequireFileStorage()
6990
6991     for idx, disk in enumerate(disk_info):
6992       disk_index = idx + base_index
6993       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6994                               iv_name="disk/%d" % disk_index,
6995                               logical_id=(file_driver,
6996                                           "%s/disk%d" % (file_storage_dir,
6997                                                          disk_index)),
6998                               mode=disk["mode"])
6999       disks.append(disk_dev)
7000   elif template_name == constants.DT_SHARED_FILE:
7001     if len(secondary_nodes) != 0:
7002       raise errors.ProgrammerError("Wrong template configuration")
7003
7004     opcodes.RequireSharedFileStorage()
7005
7006     for idx, disk in enumerate(disk_info):
7007       disk_index = idx + base_index
7008       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
7009                               iv_name="disk/%d" % disk_index,
7010                               logical_id=(file_driver,
7011                                           "%s/disk%d" % (file_storage_dir,
7012                                                          disk_index)),
7013                               mode=disk["mode"])
7014       disks.append(disk_dev)
7015   elif template_name == constants.DT_BLOCK:
7016     if len(secondary_nodes) != 0:
7017       raise errors.ProgrammerError("Wrong template configuration")
7018
7019     for idx, disk in enumerate(disk_info):
7020       disk_index = idx + base_index
7021       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV, size=disk["size"],
7022                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7023                                           disk["adopt"]),
7024                               iv_name="disk/%d" % disk_index,
7025                               mode=disk["mode"])
7026       disks.append(disk_dev)
7027
7028   else:
7029     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7030   return disks
7031
7032
7033 def _GetInstanceInfoText(instance):
7034   """Compute that text that should be added to the disk's metadata.
7035
7036   """
7037   return "originstname+%s" % instance.name
7038
7039
7040 def _CalcEta(time_taken, written, total_size):
7041   """Calculates the ETA based on size written and total size.
7042
7043   @param time_taken: The time taken so far
7044   @param written: amount written so far
7045   @param total_size: The total size of data to be written
7046   @return: The remaining time in seconds
7047
7048   """
7049   avg_time = time_taken / float(written)
7050   return (total_size - written) * avg_time
7051
7052
7053 def _WipeDisks(lu, instance):
7054   """Wipes instance disks.
7055
7056   @type lu: L{LogicalUnit}
7057   @param lu: the logical unit on whose behalf we execute
7058   @type instance: L{objects.Instance}
7059   @param instance: the instance whose disks we should create
7060   @return: the success of the wipe
7061
7062   """
7063   node = instance.primary_node
7064
7065   for device in instance.disks:
7066     lu.cfg.SetDiskID(device, node)
7067
7068   logging.info("Pause sync of instance %s disks", instance.name)
7069   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7070
7071   for idx, success in enumerate(result.payload):
7072     if not success:
7073       logging.warn("pause-sync of instance %s for disks %d failed",
7074                    instance.name, idx)
7075
7076   try:
7077     for idx, device in enumerate(instance.disks):
7078       lu.LogInfo("* Wiping disk %d", idx)
7079       logging.info("Wiping disk %d for instance %s, node %s",
7080                    idx, instance.name, node)
7081
7082       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7083       # MAX_WIPE_CHUNK at max
7084       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7085                             constants.MIN_WIPE_CHUNK_PERCENT)
7086
7087       offset = 0
7088       size = device.size
7089       last_output = 0
7090       start_time = time.time()
7091
7092       while offset < size:
7093         wipe_size = min(wipe_chunk_size, size - offset)
7094         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7095         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7096                      (idx, offset, wipe_size))
7097         now = time.time()
7098         offset += wipe_size
7099         if now - last_output >= 60:
7100           eta = _CalcEta(now - start_time, offset, size)
7101           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7102                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7103           last_output = now
7104   finally:
7105     logging.info("Resume sync of instance %s disks", instance.name)
7106
7107     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7108
7109     for idx, success in enumerate(result.payload):
7110       if not success:
7111         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7112                       " look at the status and troubleshoot the issue.", idx)
7113         logging.warn("resume-sync of instance %s for disks %d failed",
7114                      instance.name, idx)
7115
7116
7117 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7118   """Create all disks for an instance.
7119
7120   This abstracts away some work from AddInstance.
7121
7122   @type lu: L{LogicalUnit}
7123   @param lu: the logical unit on whose behalf we execute
7124   @type instance: L{objects.Instance}
7125   @param instance: the instance whose disks we should create
7126   @type to_skip: list
7127   @param to_skip: list of indices to skip
7128   @type target_node: string
7129   @param target_node: if passed, overrides the target node for creation
7130   @rtype: boolean
7131   @return: the success of the creation
7132
7133   """
7134   info = _GetInstanceInfoText(instance)
7135   if target_node is None:
7136     pnode = instance.primary_node
7137     all_nodes = instance.all_nodes
7138   else:
7139     pnode = target_node
7140     all_nodes = [pnode]
7141
7142   if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7143     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7144     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7145
7146     result.Raise("Failed to create directory '%s' on"
7147                  " node %s" % (file_storage_dir, pnode))
7148
7149   # Note: this needs to be kept in sync with adding of disks in
7150   # LUInstanceSetParams
7151   for idx, device in enumerate(instance.disks):
7152     if to_skip and idx in to_skip:
7153       continue
7154     logging.info("Creating volume %s for instance %s",
7155                  device.iv_name, instance.name)
7156     #HARDCODE
7157     for node in all_nodes:
7158       f_create = node == pnode
7159       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7160
7161
7162 def _RemoveDisks(lu, instance, target_node=None):
7163   """Remove all disks for an instance.
7164
7165   This abstracts away some work from `AddInstance()` and
7166   `RemoveInstance()`. Note that in case some of the devices couldn't
7167   be removed, the removal will continue with the other ones (compare
7168   with `_CreateDisks()`).
7169
7170   @type lu: L{LogicalUnit}
7171   @param lu: the logical unit on whose behalf we execute
7172   @type instance: L{objects.Instance}
7173   @param instance: the instance whose disks we should remove
7174   @type target_node: string
7175   @param target_node: used to override the node on which to remove the disks
7176   @rtype: boolean
7177   @return: the success of the removal
7178
7179   """
7180   logging.info("Removing block devices for instance %s", instance.name)
7181
7182   all_result = True
7183   for device in instance.disks:
7184     if target_node:
7185       edata = [(target_node, device)]
7186     else:
7187       edata = device.ComputeNodeTree(instance.primary_node)
7188     for node, disk in edata:
7189       lu.cfg.SetDiskID(disk, node)
7190       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7191       if msg:
7192         lu.LogWarning("Could not remove block device %s on node %s,"
7193                       " continuing anyway: %s", device.iv_name, node, msg)
7194         all_result = False
7195
7196   if instance.disk_template == constants.DT_FILE:
7197     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7198     if target_node:
7199       tgt = target_node
7200     else:
7201       tgt = instance.primary_node
7202     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7203     if result.fail_msg:
7204       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7205                     file_storage_dir, instance.primary_node, result.fail_msg)
7206       all_result = False
7207
7208   return all_result
7209
7210
7211 def _ComputeDiskSizePerVG(disk_template, disks):
7212   """Compute disk size requirements in the volume group
7213
7214   """
7215   def _compute(disks, payload):
7216     """Universal algorithm
7217
7218     """
7219     vgs = {}
7220     for disk in disks:
7221       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
7222
7223     return vgs
7224
7225   # Required free disk space as a function of disk and swap space
7226   req_size_dict = {
7227     constants.DT_DISKLESS: {},
7228     constants.DT_PLAIN: _compute(disks, 0),
7229     # 128 MB are added for drbd metadata for each disk
7230     constants.DT_DRBD8: _compute(disks, 128),
7231     constants.DT_FILE: {},
7232     constants.DT_SHARED_FILE: {},
7233   }
7234
7235   if disk_template not in req_size_dict:
7236     raise errors.ProgrammerError("Disk template '%s' size requirement"
7237                                  " is unknown" %  disk_template)
7238
7239   return req_size_dict[disk_template]
7240
7241
7242 def _ComputeDiskSize(disk_template, disks):
7243   """Compute disk size requirements in the volume group
7244
7245   """
7246   # Required free disk space as a function of disk and swap space
7247   req_size_dict = {
7248     constants.DT_DISKLESS: None,
7249     constants.DT_PLAIN: sum(d["size"] for d in disks),
7250     # 128 MB are added for drbd metadata for each disk
7251     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
7252     constants.DT_FILE: None,
7253     constants.DT_SHARED_FILE: 0,
7254     constants.DT_BLOCK: 0,
7255   }
7256
7257   if disk_template not in req_size_dict:
7258     raise errors.ProgrammerError("Disk template '%s' size requirement"
7259                                  " is unknown" %  disk_template)
7260
7261   return req_size_dict[disk_template]
7262
7263
7264 def _FilterVmNodes(lu, nodenames):
7265   """Filters out non-vm_capable nodes from a list.
7266
7267   @type lu: L{LogicalUnit}
7268   @param lu: the logical unit for which we check
7269   @type nodenames: list
7270   @param nodenames: the list of nodes on which we should check
7271   @rtype: list
7272   @return: the list of vm-capable nodes
7273
7274   """
7275   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7276   return [name for name in nodenames if name not in vm_nodes]
7277
7278
7279 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7280   """Hypervisor parameter validation.
7281
7282   This function abstract the hypervisor parameter validation to be
7283   used in both instance create and instance modify.
7284
7285   @type lu: L{LogicalUnit}
7286   @param lu: the logical unit for which we check
7287   @type nodenames: list
7288   @param nodenames: the list of nodes on which we should check
7289   @type hvname: string
7290   @param hvname: the name of the hypervisor we should use
7291   @type hvparams: dict
7292   @param hvparams: the parameters which we need to check
7293   @raise errors.OpPrereqError: if the parameters are not valid
7294
7295   """
7296   nodenames = _FilterVmNodes(lu, nodenames)
7297   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7298                                                   hvname,
7299                                                   hvparams)
7300   for node in nodenames:
7301     info = hvinfo[node]
7302     if info.offline:
7303       continue
7304     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7305
7306
7307 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7308   """OS parameters validation.
7309
7310   @type lu: L{LogicalUnit}
7311   @param lu: the logical unit for which we check
7312   @type required: boolean
7313   @param required: whether the validation should fail if the OS is not
7314       found
7315   @type nodenames: list
7316   @param nodenames: the list of nodes on which we should check
7317   @type osname: string
7318   @param osname: the name of the hypervisor we should use
7319   @type osparams: dict
7320   @param osparams: the parameters which we need to check
7321   @raise errors.OpPrereqError: if the parameters are not valid
7322
7323   """
7324   nodenames = _FilterVmNodes(lu, nodenames)
7325   result = lu.rpc.call_os_validate(required, nodenames, osname,
7326                                    [constants.OS_VALIDATE_PARAMETERS],
7327                                    osparams)
7328   for node, nres in result.items():
7329     # we don't check for offline cases since this should be run only
7330     # against the master node and/or an instance's nodes
7331     nres.Raise("OS Parameters validation failed on node %s" % node)
7332     if not nres.payload:
7333       lu.LogInfo("OS %s not found on node %s, validation skipped",
7334                  osname, node)
7335
7336
7337 class LUInstanceCreate(LogicalUnit):
7338   """Create an instance.
7339
7340   """
7341   HPATH = "instance-add"
7342   HTYPE = constants.HTYPE_INSTANCE
7343   REQ_BGL = False
7344
7345   def CheckArguments(self):
7346     """Check arguments.
7347
7348     """
7349     # do not require name_check to ease forward/backward compatibility
7350     # for tools
7351     if self.op.no_install and self.op.start:
7352       self.LogInfo("No-installation mode selected, disabling startup")
7353       self.op.start = False
7354     # validate/normalize the instance name
7355     self.op.instance_name = \
7356       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7357
7358     if self.op.ip_check and not self.op.name_check:
7359       # TODO: make the ip check more flexible and not depend on the name check
7360       raise errors.OpPrereqError("Cannot do ip check without a name check",
7361                                  errors.ECODE_INVAL)
7362
7363     # check nics' parameter names
7364     for nic in self.op.nics:
7365       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7366
7367     # check disks. parameter names and consistent adopt/no-adopt strategy
7368     has_adopt = has_no_adopt = False
7369     for disk in self.op.disks:
7370       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7371       if "adopt" in disk:
7372         has_adopt = True
7373       else:
7374         has_no_adopt = True
7375     if has_adopt and has_no_adopt:
7376       raise errors.OpPrereqError("Either all disks are adopted or none is",
7377                                  errors.ECODE_INVAL)
7378     if has_adopt:
7379       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7380         raise errors.OpPrereqError("Disk adoption is not supported for the"
7381                                    " '%s' disk template" %
7382                                    self.op.disk_template,
7383                                    errors.ECODE_INVAL)
7384       if self.op.iallocator is not None:
7385         raise errors.OpPrereqError("Disk adoption not allowed with an"
7386                                    " iallocator script", errors.ECODE_INVAL)
7387       if self.op.mode == constants.INSTANCE_IMPORT:
7388         raise errors.OpPrereqError("Disk adoption not allowed for"
7389                                    " instance import", errors.ECODE_INVAL)
7390     else:
7391       if self.op.disk_template in constants.DTS_MUST_ADOPT:
7392         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7393                                    " but no 'adopt' parameter given" %
7394                                    self.op.disk_template,
7395                                    errors.ECODE_INVAL)
7396
7397     self.adopt_disks = has_adopt
7398
7399     # instance name verification
7400     if self.op.name_check:
7401       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7402       self.op.instance_name = self.hostname1.name
7403       # used in CheckPrereq for ip ping check
7404       self.check_ip = self.hostname1.ip
7405     else:
7406       self.check_ip = None
7407
7408     # file storage checks
7409     if (self.op.file_driver and
7410         not self.op.file_driver in constants.FILE_DRIVER):
7411       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7412                                  self.op.file_driver, errors.ECODE_INVAL)
7413
7414     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7415       raise errors.OpPrereqError("File storage directory path not absolute",
7416                                  errors.ECODE_INVAL)
7417
7418     ### Node/iallocator related checks
7419     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7420
7421     if self.op.pnode is not None:
7422       if self.op.disk_template in constants.DTS_INT_MIRROR:
7423         if self.op.snode is None:
7424           raise errors.OpPrereqError("The networked disk templates need"
7425                                      " a mirror node", errors.ECODE_INVAL)
7426       elif self.op.snode:
7427         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7428                         " template")
7429         self.op.snode = None
7430
7431     self._cds = _GetClusterDomainSecret()
7432
7433     if self.op.mode == constants.INSTANCE_IMPORT:
7434       # On import force_variant must be True, because if we forced it at
7435       # initial install, our only chance when importing it back is that it
7436       # works again!
7437       self.op.force_variant = True
7438
7439       if self.op.no_install:
7440         self.LogInfo("No-installation mode has no effect during import")
7441
7442     elif self.op.mode == constants.INSTANCE_CREATE:
7443       if self.op.os_type is None:
7444         raise errors.OpPrereqError("No guest OS specified",
7445                                    errors.ECODE_INVAL)
7446       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7447         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7448                                    " installation" % self.op.os_type,
7449                                    errors.ECODE_STATE)
7450       if self.op.disk_template is None:
7451         raise errors.OpPrereqError("No disk template specified",
7452                                    errors.ECODE_INVAL)
7453
7454     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7455       # Check handshake to ensure both clusters have the same domain secret
7456       src_handshake = self.op.source_handshake
7457       if not src_handshake:
7458         raise errors.OpPrereqError("Missing source handshake",
7459                                    errors.ECODE_INVAL)
7460
7461       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7462                                                            src_handshake)
7463       if errmsg:
7464         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7465                                    errors.ECODE_INVAL)
7466
7467       # Load and check source CA
7468       self.source_x509_ca_pem = self.op.source_x509_ca
7469       if not self.source_x509_ca_pem:
7470         raise errors.OpPrereqError("Missing source X509 CA",
7471                                    errors.ECODE_INVAL)
7472
7473       try:
7474         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7475                                                     self._cds)
7476       except OpenSSL.crypto.Error, err:
7477         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7478                                    (err, ), errors.ECODE_INVAL)
7479
7480       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7481       if errcode is not None:
7482         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7483                                    errors.ECODE_INVAL)
7484
7485       self.source_x509_ca = cert
7486
7487       src_instance_name = self.op.source_instance_name
7488       if not src_instance_name:
7489         raise errors.OpPrereqError("Missing source instance name",
7490                                    errors.ECODE_INVAL)
7491
7492       self.source_instance_name = \
7493           netutils.GetHostname(name=src_instance_name).name
7494
7495     else:
7496       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7497                                  self.op.mode, errors.ECODE_INVAL)
7498
7499   def ExpandNames(self):
7500     """ExpandNames for CreateInstance.
7501
7502     Figure out the right locks for instance creation.
7503
7504     """
7505     self.needed_locks = {}
7506
7507     instance_name = self.op.instance_name
7508     # this is just a preventive check, but someone might still add this
7509     # instance in the meantime, and creation will fail at lock-add time
7510     if instance_name in self.cfg.GetInstanceList():
7511       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7512                                  instance_name, errors.ECODE_EXISTS)
7513
7514     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7515
7516     if self.op.iallocator:
7517       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7518     else:
7519       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7520       nodelist = [self.op.pnode]
7521       if self.op.snode is not None:
7522         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7523         nodelist.append(self.op.snode)
7524       self.needed_locks[locking.LEVEL_NODE] = nodelist
7525
7526     # in case of import lock the source node too
7527     if self.op.mode == constants.INSTANCE_IMPORT:
7528       src_node = self.op.src_node
7529       src_path = self.op.src_path
7530
7531       if src_path is None:
7532         self.op.src_path = src_path = self.op.instance_name
7533
7534       if src_node is None:
7535         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7536         self.op.src_node = None
7537         if os.path.isabs(src_path):
7538           raise errors.OpPrereqError("Importing an instance from an absolute"
7539                                      " path requires a source node option.",
7540                                      errors.ECODE_INVAL)
7541       else:
7542         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7543         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7544           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7545         if not os.path.isabs(src_path):
7546           self.op.src_path = src_path = \
7547             utils.PathJoin(constants.EXPORT_DIR, src_path)
7548
7549   def _RunAllocator(self):
7550     """Run the allocator based on input opcode.
7551
7552     """
7553     nics = [n.ToDict() for n in self.nics]
7554     ial = IAllocator(self.cfg, self.rpc,
7555                      mode=constants.IALLOCATOR_MODE_ALLOC,
7556                      name=self.op.instance_name,
7557                      disk_template=self.op.disk_template,
7558                      tags=[],
7559                      os=self.op.os_type,
7560                      vcpus=self.be_full[constants.BE_VCPUS],
7561                      mem_size=self.be_full[constants.BE_MEMORY],
7562                      disks=self.disks,
7563                      nics=nics,
7564                      hypervisor=self.op.hypervisor,
7565                      )
7566
7567     ial.Run(self.op.iallocator)
7568
7569     if not ial.success:
7570       raise errors.OpPrereqError("Can't compute nodes using"
7571                                  " iallocator '%s': %s" %
7572                                  (self.op.iallocator, ial.info),
7573                                  errors.ECODE_NORES)
7574     if len(ial.result) != ial.required_nodes:
7575       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7576                                  " of nodes (%s), required %s" %
7577                                  (self.op.iallocator, len(ial.result),
7578                                   ial.required_nodes), errors.ECODE_FAULT)
7579     self.op.pnode = ial.result[0]
7580     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7581                  self.op.instance_name, self.op.iallocator,
7582                  utils.CommaJoin(ial.result))
7583     if ial.required_nodes == 2:
7584       self.op.snode = ial.result[1]
7585
7586   def BuildHooksEnv(self):
7587     """Build hooks env.
7588
7589     This runs on master, primary and secondary nodes of the instance.
7590
7591     """
7592     env = {
7593       "ADD_MODE": self.op.mode,
7594       }
7595     if self.op.mode == constants.INSTANCE_IMPORT:
7596       env["SRC_NODE"] = self.op.src_node
7597       env["SRC_PATH"] = self.op.src_path
7598       env["SRC_IMAGES"] = self.src_images
7599
7600     env.update(_BuildInstanceHookEnv(
7601       name=self.op.instance_name,
7602       primary_node=self.op.pnode,
7603       secondary_nodes=self.secondaries,
7604       status=self.op.start,
7605       os_type=self.op.os_type,
7606       memory=self.be_full[constants.BE_MEMORY],
7607       vcpus=self.be_full[constants.BE_VCPUS],
7608       nics=_NICListToTuple(self, self.nics),
7609       disk_template=self.op.disk_template,
7610       disks=[(d["size"], d["mode"]) for d in self.disks],
7611       bep=self.be_full,
7612       hvp=self.hv_full,
7613       hypervisor_name=self.op.hypervisor,
7614     ))
7615
7616     return env
7617
7618   def BuildHooksNodes(self):
7619     """Build hooks nodes.
7620
7621     """
7622     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7623     return nl, nl
7624
7625   def _ReadExportInfo(self):
7626     """Reads the export information from disk.
7627
7628     It will override the opcode source node and path with the actual
7629     information, if these two were not specified before.
7630
7631     @return: the export information
7632
7633     """
7634     assert self.op.mode == constants.INSTANCE_IMPORT
7635
7636     src_node = self.op.src_node
7637     src_path = self.op.src_path
7638
7639     if src_node is None:
7640       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7641       exp_list = self.rpc.call_export_list(locked_nodes)
7642       found = False
7643       for node in exp_list:
7644         if exp_list[node].fail_msg:
7645           continue
7646         if src_path in exp_list[node].payload:
7647           found = True
7648           self.op.src_node = src_node = node
7649           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7650                                                        src_path)
7651           break
7652       if not found:
7653         raise errors.OpPrereqError("No export found for relative path %s" %
7654                                     src_path, errors.ECODE_INVAL)
7655
7656     _CheckNodeOnline(self, src_node)
7657     result = self.rpc.call_export_info(src_node, src_path)
7658     result.Raise("No export or invalid export found in dir %s" % src_path)
7659
7660     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7661     if not export_info.has_section(constants.INISECT_EXP):
7662       raise errors.ProgrammerError("Corrupted export config",
7663                                    errors.ECODE_ENVIRON)
7664
7665     ei_version = export_info.get(constants.INISECT_EXP, "version")
7666     if (int(ei_version) != constants.EXPORT_VERSION):
7667       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7668                                  (ei_version, constants.EXPORT_VERSION),
7669                                  errors.ECODE_ENVIRON)
7670     return export_info
7671
7672   def _ReadExportParams(self, einfo):
7673     """Use export parameters as defaults.
7674
7675     In case the opcode doesn't specify (as in override) some instance
7676     parameters, then try to use them from the export information, if
7677     that declares them.
7678
7679     """
7680     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7681
7682     if self.op.disk_template is None:
7683       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7684         self.op.disk_template = einfo.get(constants.INISECT_INS,
7685                                           "disk_template")
7686       else:
7687         raise errors.OpPrereqError("No disk template specified and the export"
7688                                    " is missing the disk_template information",
7689                                    errors.ECODE_INVAL)
7690
7691     if not self.op.disks:
7692       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7693         disks = []
7694         # TODO: import the disk iv_name too
7695         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7696           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7697           disks.append({"size": disk_sz})
7698         self.op.disks = disks
7699       else:
7700         raise errors.OpPrereqError("No disk info specified and the export"
7701                                    " is missing the disk information",
7702                                    errors.ECODE_INVAL)
7703
7704     if (not self.op.nics and
7705         einfo.has_option(constants.INISECT_INS, "nic_count")):
7706       nics = []
7707       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7708         ndict = {}
7709         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7710           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7711           ndict[name] = v
7712         nics.append(ndict)
7713       self.op.nics = nics
7714
7715     if (self.op.hypervisor is None and
7716         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7717       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7718     if einfo.has_section(constants.INISECT_HYP):
7719       # use the export parameters but do not override the ones
7720       # specified by the user
7721       for name, value in einfo.items(constants.INISECT_HYP):
7722         if name not in self.op.hvparams:
7723           self.op.hvparams[name] = value
7724
7725     if einfo.has_section(constants.INISECT_BEP):
7726       # use the parameters, without overriding
7727       for name, value in einfo.items(constants.INISECT_BEP):
7728         if name not in self.op.beparams:
7729           self.op.beparams[name] = value
7730     else:
7731       # try to read the parameters old style, from the main section
7732       for name in constants.BES_PARAMETERS:
7733         if (name not in self.op.beparams and
7734             einfo.has_option(constants.INISECT_INS, name)):
7735           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7736
7737     if einfo.has_section(constants.INISECT_OSP):
7738       # use the parameters, without overriding
7739       for name, value in einfo.items(constants.INISECT_OSP):
7740         if name not in self.op.osparams:
7741           self.op.osparams[name] = value
7742
7743   def _RevertToDefaults(self, cluster):
7744     """Revert the instance parameters to the default values.
7745
7746     """
7747     # hvparams
7748     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7749     for name in self.op.hvparams.keys():
7750       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7751         del self.op.hvparams[name]
7752     # beparams
7753     be_defs = cluster.SimpleFillBE({})
7754     for name in self.op.beparams.keys():
7755       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7756         del self.op.beparams[name]
7757     # nic params
7758     nic_defs = cluster.SimpleFillNIC({})
7759     for nic in self.op.nics:
7760       for name in constants.NICS_PARAMETERS:
7761         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7762           del nic[name]
7763     # osparams
7764     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7765     for name in self.op.osparams.keys():
7766       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7767         del self.op.osparams[name]
7768
7769   def CheckPrereq(self):
7770     """Check prerequisites.
7771
7772     """
7773     if self.op.mode == constants.INSTANCE_IMPORT:
7774       export_info = self._ReadExportInfo()
7775       self._ReadExportParams(export_info)
7776
7777     if (not self.cfg.GetVGName() and
7778         self.op.disk_template not in constants.DTS_NOT_LVM):
7779       raise errors.OpPrereqError("Cluster does not support lvm-based"
7780                                  " instances", errors.ECODE_STATE)
7781
7782     if self.op.hypervisor is None:
7783       self.op.hypervisor = self.cfg.GetHypervisorType()
7784
7785     cluster = self.cfg.GetClusterInfo()
7786     enabled_hvs = cluster.enabled_hypervisors
7787     if self.op.hypervisor not in enabled_hvs:
7788       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7789                                  " cluster (%s)" % (self.op.hypervisor,
7790                                   ",".join(enabled_hvs)),
7791                                  errors.ECODE_STATE)
7792
7793     # check hypervisor parameter syntax (locally)
7794     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7795     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7796                                       self.op.hvparams)
7797     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7798     hv_type.CheckParameterSyntax(filled_hvp)
7799     self.hv_full = filled_hvp
7800     # check that we don't specify global parameters on an instance
7801     _CheckGlobalHvParams(self.op.hvparams)
7802
7803     # fill and remember the beparams dict
7804     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7805     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7806
7807     # build os parameters
7808     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7809
7810     # now that hvp/bep are in final format, let's reset to defaults,
7811     # if told to do so
7812     if self.op.identify_defaults:
7813       self._RevertToDefaults(cluster)
7814
7815     # NIC buildup
7816     self.nics = []
7817     for idx, nic in enumerate(self.op.nics):
7818       nic_mode_req = nic.get("mode", None)
7819       nic_mode = nic_mode_req
7820       if nic_mode is None:
7821         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7822
7823       # in routed mode, for the first nic, the default ip is 'auto'
7824       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7825         default_ip_mode = constants.VALUE_AUTO
7826       else:
7827         default_ip_mode = constants.VALUE_NONE
7828
7829       # ip validity checks
7830       ip = nic.get("ip", default_ip_mode)
7831       if ip is None or ip.lower() == constants.VALUE_NONE:
7832         nic_ip = None
7833       elif ip.lower() == constants.VALUE_AUTO:
7834         if not self.op.name_check:
7835           raise errors.OpPrereqError("IP address set to auto but name checks"
7836                                      " have been skipped",
7837                                      errors.ECODE_INVAL)
7838         nic_ip = self.hostname1.ip
7839       else:
7840         if not netutils.IPAddress.IsValid(ip):
7841           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7842                                      errors.ECODE_INVAL)
7843         nic_ip = ip
7844
7845       # TODO: check the ip address for uniqueness
7846       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7847         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7848                                    errors.ECODE_INVAL)
7849
7850       # MAC address verification
7851       mac = nic.get("mac", constants.VALUE_AUTO)
7852       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7853         mac = utils.NormalizeAndValidateMac(mac)
7854
7855         try:
7856           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7857         except errors.ReservationError:
7858           raise errors.OpPrereqError("MAC address %s already in use"
7859                                      " in cluster" % mac,
7860                                      errors.ECODE_NOTUNIQUE)
7861
7862       #  Build nic parameters
7863       link = nic.get(constants.INIC_LINK, None)
7864       nicparams = {}
7865       if nic_mode_req:
7866         nicparams[constants.NIC_MODE] = nic_mode_req
7867       if link:
7868         nicparams[constants.NIC_LINK] = link
7869
7870       check_params = cluster.SimpleFillNIC(nicparams)
7871       objects.NIC.CheckParameterSyntax(check_params)
7872       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7873
7874     # disk checks/pre-build
7875     self.disks = []
7876     for disk in self.op.disks:
7877       mode = disk.get("mode", constants.DISK_RDWR)
7878       if mode not in constants.DISK_ACCESS_SET:
7879         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7880                                    mode, errors.ECODE_INVAL)
7881       size = disk.get("size", None)
7882       if size is None:
7883         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7884       try:
7885         size = int(size)
7886       except (TypeError, ValueError):
7887         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7888                                    errors.ECODE_INVAL)
7889       vg = disk.get("vg", self.cfg.GetVGName())
7890       new_disk = {"size": size, "mode": mode, "vg": vg}
7891       if "adopt" in disk:
7892         new_disk["adopt"] = disk["adopt"]
7893       self.disks.append(new_disk)
7894
7895     if self.op.mode == constants.INSTANCE_IMPORT:
7896
7897       # Check that the new instance doesn't have less disks than the export
7898       instance_disks = len(self.disks)
7899       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7900       if instance_disks < export_disks:
7901         raise errors.OpPrereqError("Not enough disks to import."
7902                                    " (instance: %d, export: %d)" %
7903                                    (instance_disks, export_disks),
7904                                    errors.ECODE_INVAL)
7905
7906       disk_images = []
7907       for idx in range(export_disks):
7908         option = 'disk%d_dump' % idx
7909         if export_info.has_option(constants.INISECT_INS, option):
7910           # FIXME: are the old os-es, disk sizes, etc. useful?
7911           export_name = export_info.get(constants.INISECT_INS, option)
7912           image = utils.PathJoin(self.op.src_path, export_name)
7913           disk_images.append(image)
7914         else:
7915           disk_images.append(False)
7916
7917       self.src_images = disk_images
7918
7919       old_name = export_info.get(constants.INISECT_INS, 'name')
7920       try:
7921         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7922       except (TypeError, ValueError), err:
7923         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7924                                    " an integer: %s" % str(err),
7925                                    errors.ECODE_STATE)
7926       if self.op.instance_name == old_name:
7927         for idx, nic in enumerate(self.nics):
7928           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7929             nic_mac_ini = 'nic%d_mac' % idx
7930             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7931
7932     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7933
7934     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7935     if self.op.ip_check:
7936       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7937         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7938                                    (self.check_ip, self.op.instance_name),
7939                                    errors.ECODE_NOTUNIQUE)
7940
7941     #### mac address generation
7942     # By generating here the mac address both the allocator and the hooks get
7943     # the real final mac address rather than the 'auto' or 'generate' value.
7944     # There is a race condition between the generation and the instance object
7945     # creation, which means that we know the mac is valid now, but we're not
7946     # sure it will be when we actually add the instance. If things go bad
7947     # adding the instance will abort because of a duplicate mac, and the
7948     # creation job will fail.
7949     for nic in self.nics:
7950       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7951         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7952
7953     #### allocator run
7954
7955     if self.op.iallocator is not None:
7956       self._RunAllocator()
7957
7958     #### node related checks
7959
7960     # check primary node
7961     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7962     assert self.pnode is not None, \
7963       "Cannot retrieve locked node %s" % self.op.pnode
7964     if pnode.offline:
7965       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7966                                  pnode.name, errors.ECODE_STATE)
7967     if pnode.drained:
7968       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7969                                  pnode.name, errors.ECODE_STATE)
7970     if not pnode.vm_capable:
7971       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7972                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7973
7974     self.secondaries = []
7975
7976     # mirror node verification
7977     if self.op.disk_template in constants.DTS_INT_MIRROR:
7978       if self.op.snode == pnode.name:
7979         raise errors.OpPrereqError("The secondary node cannot be the"
7980                                    " primary node.", errors.ECODE_INVAL)
7981       _CheckNodeOnline(self, self.op.snode)
7982       _CheckNodeNotDrained(self, self.op.snode)
7983       _CheckNodeVmCapable(self, self.op.snode)
7984       self.secondaries.append(self.op.snode)
7985
7986     nodenames = [pnode.name] + self.secondaries
7987
7988     if not self.adopt_disks:
7989       # Check lv size requirements, if not adopting
7990       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7991       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7992
7993     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
7994       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7995       if len(all_lvs) != len(self.disks):
7996         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7997                                    errors.ECODE_INVAL)
7998       for lv_name in all_lvs:
7999         try:
8000           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8001           # to ReserveLV uses the same syntax
8002           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8003         except errors.ReservationError:
8004           raise errors.OpPrereqError("LV named %s used by another instance" %
8005                                      lv_name, errors.ECODE_NOTUNIQUE)
8006
8007       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8008       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8009
8010       node_lvs = self.rpc.call_lv_list([pnode.name],
8011                                        vg_names.payload.keys())[pnode.name]
8012       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8013       node_lvs = node_lvs.payload
8014
8015       delta = all_lvs.difference(node_lvs.keys())
8016       if delta:
8017         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8018                                    utils.CommaJoin(delta),
8019                                    errors.ECODE_INVAL)
8020       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8021       if online_lvs:
8022         raise errors.OpPrereqError("Online logical volumes found, cannot"
8023                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8024                                    errors.ECODE_STATE)
8025       # update the size of disk based on what is found
8026       for dsk in self.disks:
8027         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
8028
8029     elif self.op.disk_template == constants.DT_BLOCK:
8030       # Normalize and de-duplicate device paths
8031       all_disks = set([os.path.abspath(i["adopt"]) for i in self.disks])
8032       if len(all_disks) != len(self.disks):
8033         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8034                                    errors.ECODE_INVAL)
8035       baddisks = [d for d in all_disks
8036                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8037       if baddisks:
8038         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8039                                    " cannot be adopted" %
8040                                    (", ".join(baddisks),
8041                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8042                                    errors.ECODE_INVAL)
8043
8044       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8045                                             list(all_disks))[pnode.name]
8046       node_disks.Raise("Cannot get block device information from node %s" %
8047                        pnode.name)
8048       node_disks = node_disks.payload
8049       delta = all_disks.difference(node_disks.keys())
8050       if delta:
8051         raise errors.OpPrereqError("Missing block device(s): %s" %
8052                                    utils.CommaJoin(delta),
8053                                    errors.ECODE_INVAL)
8054       for dsk in self.disks:
8055         dsk["size"] = int(float(node_disks[dsk["adopt"]]))
8056
8057     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8058
8059     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8060     # check OS parameters (remotely)
8061     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8062
8063     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8064
8065     # memory check on primary node
8066     if self.op.start:
8067       _CheckNodeFreeMemory(self, self.pnode.name,
8068                            "creating instance %s" % self.op.instance_name,
8069                            self.be_full[constants.BE_MEMORY],
8070                            self.op.hypervisor)
8071
8072     self.dry_run_result = list(nodenames)
8073
8074   def Exec(self, feedback_fn):
8075     """Create and add the instance to the cluster.
8076
8077     """
8078     instance = self.op.instance_name
8079     pnode_name = self.pnode.name
8080
8081     ht_kind = self.op.hypervisor
8082     if ht_kind in constants.HTS_REQ_PORT:
8083       network_port = self.cfg.AllocatePort()
8084     else:
8085       network_port = None
8086
8087     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8088       # this is needed because os.path.join does not accept None arguments
8089       if self.op.file_storage_dir is None:
8090         string_file_storage_dir = ""
8091       else:
8092         string_file_storage_dir = self.op.file_storage_dir
8093
8094       # build the full file storage dir path
8095       if self.op.disk_template == constants.DT_SHARED_FILE:
8096         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8097       else:
8098         get_fsd_fn = self.cfg.GetFileStorageDir
8099
8100       file_storage_dir = utils.PathJoin(get_fsd_fn(),
8101                                         string_file_storage_dir, instance)
8102     else:
8103       file_storage_dir = ""
8104
8105     disks = _GenerateDiskTemplate(self,
8106                                   self.op.disk_template,
8107                                   instance, pnode_name,
8108                                   self.secondaries,
8109                                   self.disks,
8110                                   file_storage_dir,
8111                                   self.op.file_driver,
8112                                   0,
8113                                   feedback_fn)
8114
8115     iobj = objects.Instance(name=instance, os=self.op.os_type,
8116                             primary_node=pnode_name,
8117                             nics=self.nics, disks=disks,
8118                             disk_template=self.op.disk_template,
8119                             admin_up=False,
8120                             network_port=network_port,
8121                             beparams=self.op.beparams,
8122                             hvparams=self.op.hvparams,
8123                             hypervisor=self.op.hypervisor,
8124                             osparams=self.op.osparams,
8125                             )
8126
8127     if self.adopt_disks:
8128       if self.op.disk_template == constants.DT_PLAIN:
8129         # rename LVs to the newly-generated names; we need to construct
8130         # 'fake' LV disks with the old data, plus the new unique_id
8131         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8132         rename_to = []
8133         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8134           rename_to.append(t_dsk.logical_id)
8135           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
8136           self.cfg.SetDiskID(t_dsk, pnode_name)
8137         result = self.rpc.call_blockdev_rename(pnode_name,
8138                                                zip(tmp_disks, rename_to))
8139         result.Raise("Failed to rename adoped LVs")
8140     else:
8141       feedback_fn("* creating instance disks...")
8142       try:
8143         _CreateDisks(self, iobj)
8144       except errors.OpExecError:
8145         self.LogWarning("Device creation failed, reverting...")
8146         try:
8147           _RemoveDisks(self, iobj)
8148         finally:
8149           self.cfg.ReleaseDRBDMinors(instance)
8150           raise
8151
8152       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8153         feedback_fn("* wiping instance disks...")
8154         try:
8155           _WipeDisks(self, iobj)
8156         except errors.OpExecError:
8157           self.LogWarning("Device wiping failed, reverting...")
8158           try:
8159             _RemoveDisks(self, iobj)
8160           finally:
8161             self.cfg.ReleaseDRBDMinors(instance)
8162             raise
8163
8164     feedback_fn("adding instance %s to cluster config" % instance)
8165
8166     self.cfg.AddInstance(iobj, self.proc.GetECId())
8167
8168     # Declare that we don't want to remove the instance lock anymore, as we've
8169     # added the instance to the config
8170     del self.remove_locks[locking.LEVEL_INSTANCE]
8171     # Unlock all the nodes
8172     if self.op.mode == constants.INSTANCE_IMPORT:
8173       nodes_keep = [self.op.src_node]
8174       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8175                        if node != self.op.src_node]
8176       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8177       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8178     else:
8179       self.context.glm.release(locking.LEVEL_NODE)
8180       del self.acquired_locks[locking.LEVEL_NODE]
8181
8182     if self.op.wait_for_sync:
8183       disk_abort = not _WaitForSync(self, iobj)
8184     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8185       # make sure the disks are not degraded (still sync-ing is ok)
8186       time.sleep(15)
8187       feedback_fn("* checking mirrors status")
8188       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8189     else:
8190       disk_abort = False
8191
8192     if disk_abort:
8193       _RemoveDisks(self, iobj)
8194       self.cfg.RemoveInstance(iobj.name)
8195       # Make sure the instance lock gets removed
8196       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8197       raise errors.OpExecError("There are some degraded disks for"
8198                                " this instance")
8199
8200     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8201       if self.op.mode == constants.INSTANCE_CREATE:
8202         if not self.op.no_install:
8203           feedback_fn("* running the instance OS create scripts...")
8204           # FIXME: pass debug option from opcode to backend
8205           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8206                                                  self.op.debug_level)
8207           result.Raise("Could not add os for instance %s"
8208                        " on node %s" % (instance, pnode_name))
8209
8210       elif self.op.mode == constants.INSTANCE_IMPORT:
8211         feedback_fn("* running the instance OS import scripts...")
8212
8213         transfers = []
8214
8215         for idx, image in enumerate(self.src_images):
8216           if not image:
8217             continue
8218
8219           # FIXME: pass debug option from opcode to backend
8220           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8221                                              constants.IEIO_FILE, (image, ),
8222                                              constants.IEIO_SCRIPT,
8223                                              (iobj.disks[idx], idx),
8224                                              None)
8225           transfers.append(dt)
8226
8227         import_result = \
8228           masterd.instance.TransferInstanceData(self, feedback_fn,
8229                                                 self.op.src_node, pnode_name,
8230                                                 self.pnode.secondary_ip,
8231                                                 iobj, transfers)
8232         if not compat.all(import_result):
8233           self.LogWarning("Some disks for instance %s on node %s were not"
8234                           " imported successfully" % (instance, pnode_name))
8235
8236       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8237         feedback_fn("* preparing remote import...")
8238         # The source cluster will stop the instance before attempting to make a
8239         # connection. In some cases stopping an instance can take a long time,
8240         # hence the shutdown timeout is added to the connection timeout.
8241         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8242                            self.op.source_shutdown_timeout)
8243         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8244
8245         assert iobj.primary_node == self.pnode.name
8246         disk_results = \
8247           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8248                                         self.source_x509_ca,
8249                                         self._cds, timeouts)
8250         if not compat.all(disk_results):
8251           # TODO: Should the instance still be started, even if some disks
8252           # failed to import (valid for local imports, too)?
8253           self.LogWarning("Some disks for instance %s on node %s were not"
8254                           " imported successfully" % (instance, pnode_name))
8255
8256         # Run rename script on newly imported instance
8257         assert iobj.name == instance
8258         feedback_fn("Running rename script for %s" % instance)
8259         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8260                                                    self.source_instance_name,
8261                                                    self.op.debug_level)
8262         if result.fail_msg:
8263           self.LogWarning("Failed to run rename script for %s on node"
8264                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8265
8266       else:
8267         # also checked in the prereq part
8268         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8269                                      % self.op.mode)
8270
8271     if self.op.start:
8272       iobj.admin_up = True
8273       self.cfg.Update(iobj, feedback_fn)
8274       logging.info("Starting instance %s on node %s", instance, pnode_name)
8275       feedback_fn("* starting instance...")
8276       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8277       result.Raise("Could not start instance")
8278
8279     return list(iobj.all_nodes)
8280
8281
8282 class LUInstanceConsole(NoHooksLU):
8283   """Connect to an instance's console.
8284
8285   This is somewhat special in that it returns the command line that
8286   you need to run on the master node in order to connect to the
8287   console.
8288
8289   """
8290   REQ_BGL = False
8291
8292   def ExpandNames(self):
8293     self._ExpandAndLockInstance()
8294
8295   def CheckPrereq(self):
8296     """Check prerequisites.
8297
8298     This checks that the instance is in the cluster.
8299
8300     """
8301     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8302     assert self.instance is not None, \
8303       "Cannot retrieve locked instance %s" % self.op.instance_name
8304     _CheckNodeOnline(self, self.instance.primary_node)
8305
8306   def Exec(self, feedback_fn):
8307     """Connect to the console of an instance
8308
8309     """
8310     instance = self.instance
8311     node = instance.primary_node
8312
8313     node_insts = self.rpc.call_instance_list([node],
8314                                              [instance.hypervisor])[node]
8315     node_insts.Raise("Can't get node information from %s" % node)
8316
8317     if instance.name not in node_insts.payload:
8318       if instance.admin_up:
8319         state = constants.INSTST_ERRORDOWN
8320       else:
8321         state = constants.INSTST_ADMINDOWN
8322       raise errors.OpExecError("Instance %s is not running (state %s)" %
8323                                (instance.name, state))
8324
8325     logging.debug("Connecting to console of %s on %s", instance.name, node)
8326
8327     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8328
8329
8330 def _GetInstanceConsole(cluster, instance):
8331   """Returns console information for an instance.
8332
8333   @type cluster: L{objects.Cluster}
8334   @type instance: L{objects.Instance}
8335   @rtype: dict
8336
8337   """
8338   hyper = hypervisor.GetHypervisor(instance.hypervisor)
8339   # beparams and hvparams are passed separately, to avoid editing the
8340   # instance and then saving the defaults in the instance itself.
8341   hvparams = cluster.FillHV(instance)
8342   beparams = cluster.FillBE(instance)
8343   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8344
8345   assert console.instance == instance.name
8346   assert console.Validate()
8347
8348   return console.ToDict()
8349
8350
8351 class LUInstanceReplaceDisks(LogicalUnit):
8352   """Replace the disks of an instance.
8353
8354   """
8355   HPATH = "mirrors-replace"
8356   HTYPE = constants.HTYPE_INSTANCE
8357   REQ_BGL = False
8358
8359   def CheckArguments(self):
8360     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8361                                   self.op.iallocator)
8362
8363   def ExpandNames(self):
8364     self._ExpandAndLockInstance()
8365
8366     if self.op.iallocator is not None:
8367       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8368
8369     elif self.op.remote_node is not None:
8370       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8371       self.op.remote_node = remote_node
8372
8373       # Warning: do not remove the locking of the new secondary here
8374       # unless DRBD8.AddChildren is changed to work in parallel;
8375       # currently it doesn't since parallel invocations of
8376       # FindUnusedMinor will conflict
8377       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8378       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8379
8380     else:
8381       self.needed_locks[locking.LEVEL_NODE] = []
8382       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8383
8384     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8385                                    self.op.iallocator, self.op.remote_node,
8386                                    self.op.disks, False, self.op.early_release)
8387
8388     self.tasklets = [self.replacer]
8389
8390   def DeclareLocks(self, level):
8391     # If we're not already locking all nodes in the set we have to declare the
8392     # instance's primary/secondary nodes.
8393     if (level == locking.LEVEL_NODE and
8394         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8395       self._LockInstancesNodes()
8396
8397   def BuildHooksEnv(self):
8398     """Build hooks env.
8399
8400     This runs on the master, the primary and all the secondaries.
8401
8402     """
8403     instance = self.replacer.instance
8404     env = {
8405       "MODE": self.op.mode,
8406       "NEW_SECONDARY": self.op.remote_node,
8407       "OLD_SECONDARY": instance.secondary_nodes[0],
8408       }
8409     env.update(_BuildInstanceHookEnvByObject(self, instance))
8410     return env
8411
8412   def BuildHooksNodes(self):
8413     """Build hooks nodes.
8414
8415     """
8416     instance = self.replacer.instance
8417     nl = [
8418       self.cfg.GetMasterNode(),
8419       instance.primary_node,
8420       ]
8421     if self.op.remote_node is not None:
8422       nl.append(self.op.remote_node)
8423     return nl, nl
8424
8425
8426 class TLReplaceDisks(Tasklet):
8427   """Replaces disks for an instance.
8428
8429   Note: Locking is not within the scope of this class.
8430
8431   """
8432   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8433                disks, delay_iallocator, early_release):
8434     """Initializes this class.
8435
8436     """
8437     Tasklet.__init__(self, lu)
8438
8439     # Parameters
8440     self.instance_name = instance_name
8441     self.mode = mode
8442     self.iallocator_name = iallocator_name
8443     self.remote_node = remote_node
8444     self.disks = disks
8445     self.delay_iallocator = delay_iallocator
8446     self.early_release = early_release
8447
8448     # Runtime data
8449     self.instance = None
8450     self.new_node = None
8451     self.target_node = None
8452     self.other_node = None
8453     self.remote_node_info = None
8454     self.node_secondary_ip = None
8455
8456   @staticmethod
8457   def CheckArguments(mode, remote_node, iallocator):
8458     """Helper function for users of this class.
8459
8460     """
8461     # check for valid parameter combination
8462     if mode == constants.REPLACE_DISK_CHG:
8463       if remote_node is None and iallocator is None:
8464         raise errors.OpPrereqError("When changing the secondary either an"
8465                                    " iallocator script must be used or the"
8466                                    " new node given", errors.ECODE_INVAL)
8467
8468       if remote_node is not None and iallocator is not None:
8469         raise errors.OpPrereqError("Give either the iallocator or the new"
8470                                    " secondary, not both", errors.ECODE_INVAL)
8471
8472     elif remote_node is not None or iallocator is not None:
8473       # Not replacing the secondary
8474       raise errors.OpPrereqError("The iallocator and new node options can"
8475                                  " only be used when changing the"
8476                                  " secondary node", errors.ECODE_INVAL)
8477
8478   @staticmethod
8479   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8480     """Compute a new secondary node using an IAllocator.
8481
8482     """
8483     ial = IAllocator(lu.cfg, lu.rpc,
8484                      mode=constants.IALLOCATOR_MODE_RELOC,
8485                      name=instance_name,
8486                      relocate_from=relocate_from)
8487
8488     ial.Run(iallocator_name)
8489
8490     if not ial.success:
8491       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8492                                  " %s" % (iallocator_name, ial.info),
8493                                  errors.ECODE_NORES)
8494
8495     if len(ial.result) != ial.required_nodes:
8496       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8497                                  " of nodes (%s), required %s" %
8498                                  (iallocator_name,
8499                                   len(ial.result), ial.required_nodes),
8500                                  errors.ECODE_FAULT)
8501
8502     remote_node_name = ial.result[0]
8503
8504     lu.LogInfo("Selected new secondary for instance '%s': %s",
8505                instance_name, remote_node_name)
8506
8507     return remote_node_name
8508
8509   def _FindFaultyDisks(self, node_name):
8510     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8511                                     node_name, True)
8512
8513   def _CheckDisksActivated(self, instance):
8514     """Checks if the instance disks are activated.
8515
8516     @param instance: The instance to check disks
8517     @return: True if they are activated, False otherwise
8518
8519     """
8520     nodes = instance.all_nodes
8521
8522     for idx, dev in enumerate(instance.disks):
8523       for node in nodes:
8524         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8525         self.cfg.SetDiskID(dev, node)
8526
8527         result = self.rpc.call_blockdev_find(node, dev)
8528
8529         if result.offline:
8530           continue
8531         elif result.fail_msg or not result.payload:
8532           return False
8533
8534     return True
8535
8536
8537   def CheckPrereq(self):
8538     """Check prerequisites.
8539
8540     This checks that the instance is in the cluster.
8541
8542     """
8543     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8544     assert instance is not None, \
8545       "Cannot retrieve locked instance %s" % self.instance_name
8546
8547     if instance.disk_template != constants.DT_DRBD8:
8548       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8549                                  " instances", errors.ECODE_INVAL)
8550
8551     if len(instance.secondary_nodes) != 1:
8552       raise errors.OpPrereqError("The instance has a strange layout,"
8553                                  " expected one secondary but found %d" %
8554                                  len(instance.secondary_nodes),
8555                                  errors.ECODE_FAULT)
8556
8557     if not self.delay_iallocator:
8558       self._CheckPrereq2()
8559
8560   def _CheckPrereq2(self):
8561     """Check prerequisites, second part.
8562
8563     This function should always be part of CheckPrereq. It was separated and is
8564     now called from Exec because during node evacuation iallocator was only
8565     called with an unmodified cluster model, not taking planned changes into
8566     account.
8567
8568     """
8569     instance = self.instance
8570     secondary_node = instance.secondary_nodes[0]
8571
8572     if self.iallocator_name is None:
8573       remote_node = self.remote_node
8574     else:
8575       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8576                                        instance.name, instance.secondary_nodes)
8577
8578     if remote_node is not None:
8579       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8580       assert self.remote_node_info is not None, \
8581         "Cannot retrieve locked node %s" % remote_node
8582     else:
8583       self.remote_node_info = None
8584
8585     if remote_node == self.instance.primary_node:
8586       raise errors.OpPrereqError("The specified node is the primary node of"
8587                                  " the instance.", errors.ECODE_INVAL)
8588
8589     if remote_node == secondary_node:
8590       raise errors.OpPrereqError("The specified node is already the"
8591                                  " secondary node of the instance.",
8592                                  errors.ECODE_INVAL)
8593
8594     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8595                                     constants.REPLACE_DISK_CHG):
8596       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8597                                  errors.ECODE_INVAL)
8598
8599     if self.mode == constants.REPLACE_DISK_AUTO:
8600       if not self._CheckDisksActivated(instance):
8601         raise errors.OpPrereqError("Please run activate-disks on instance %s"
8602                                    " first" % self.instance_name,
8603                                    errors.ECODE_STATE)
8604       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8605       faulty_secondary = self._FindFaultyDisks(secondary_node)
8606
8607       if faulty_primary and faulty_secondary:
8608         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8609                                    " one node and can not be repaired"
8610                                    " automatically" % self.instance_name,
8611                                    errors.ECODE_STATE)
8612
8613       if faulty_primary:
8614         self.disks = faulty_primary
8615         self.target_node = instance.primary_node
8616         self.other_node = secondary_node
8617         check_nodes = [self.target_node, self.other_node]
8618       elif faulty_secondary:
8619         self.disks = faulty_secondary
8620         self.target_node = secondary_node
8621         self.other_node = instance.primary_node
8622         check_nodes = [self.target_node, self.other_node]
8623       else:
8624         self.disks = []
8625         check_nodes = []
8626
8627     else:
8628       # Non-automatic modes
8629       if self.mode == constants.REPLACE_DISK_PRI:
8630         self.target_node = instance.primary_node
8631         self.other_node = secondary_node
8632         check_nodes = [self.target_node, self.other_node]
8633
8634       elif self.mode == constants.REPLACE_DISK_SEC:
8635         self.target_node = secondary_node
8636         self.other_node = instance.primary_node
8637         check_nodes = [self.target_node, self.other_node]
8638
8639       elif self.mode == constants.REPLACE_DISK_CHG:
8640         self.new_node = remote_node
8641         self.other_node = instance.primary_node
8642         self.target_node = secondary_node
8643         check_nodes = [self.new_node, self.other_node]
8644
8645         _CheckNodeNotDrained(self.lu, remote_node)
8646         _CheckNodeVmCapable(self.lu, remote_node)
8647
8648         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8649         assert old_node_info is not None
8650         if old_node_info.offline and not self.early_release:
8651           # doesn't make sense to delay the release
8652           self.early_release = True
8653           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8654                           " early-release mode", secondary_node)
8655
8656       else:
8657         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8658                                      self.mode)
8659
8660       # If not specified all disks should be replaced
8661       if not self.disks:
8662         self.disks = range(len(self.instance.disks))
8663
8664     for node in check_nodes:
8665       _CheckNodeOnline(self.lu, node)
8666
8667     # Check whether disks are valid
8668     for disk_idx in self.disks:
8669       instance.FindDisk(disk_idx)
8670
8671     # Get secondary node IP addresses
8672     node_2nd_ip = {}
8673
8674     for node_name in [self.target_node, self.other_node, self.new_node]:
8675       if node_name is not None:
8676         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8677
8678     self.node_secondary_ip = node_2nd_ip
8679
8680   def Exec(self, feedback_fn):
8681     """Execute disk replacement.
8682
8683     This dispatches the disk replacement to the appropriate handler.
8684
8685     """
8686     if self.delay_iallocator:
8687       self._CheckPrereq2()
8688
8689     if not self.disks:
8690       feedback_fn("No disks need replacement")
8691       return
8692
8693     feedback_fn("Replacing disk(s) %s for %s" %
8694                 (utils.CommaJoin(self.disks), self.instance.name))
8695
8696     activate_disks = (not self.instance.admin_up)
8697
8698     # Activate the instance disks if we're replacing them on a down instance
8699     if activate_disks:
8700       _StartInstanceDisks(self.lu, self.instance, True)
8701
8702     try:
8703       # Should we replace the secondary node?
8704       if self.new_node is not None:
8705         fn = self._ExecDrbd8Secondary
8706       else:
8707         fn = self._ExecDrbd8DiskOnly
8708
8709       return fn(feedback_fn)
8710
8711     finally:
8712       # Deactivate the instance disks if we're replacing them on a
8713       # down instance
8714       if activate_disks:
8715         _SafeShutdownInstanceDisks(self.lu, self.instance)
8716
8717   def _CheckVolumeGroup(self, nodes):
8718     self.lu.LogInfo("Checking volume groups")
8719
8720     vgname = self.cfg.GetVGName()
8721
8722     # Make sure volume group exists on all involved nodes
8723     results = self.rpc.call_vg_list(nodes)
8724     if not results:
8725       raise errors.OpExecError("Can't list volume groups on the nodes")
8726
8727     for node in nodes:
8728       res = results[node]
8729       res.Raise("Error checking node %s" % node)
8730       if vgname not in res.payload:
8731         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8732                                  (vgname, node))
8733
8734   def _CheckDisksExistence(self, nodes):
8735     # Check disk existence
8736     for idx, dev in enumerate(self.instance.disks):
8737       if idx not in self.disks:
8738         continue
8739
8740       for node in nodes:
8741         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8742         self.cfg.SetDiskID(dev, node)
8743
8744         result = self.rpc.call_blockdev_find(node, dev)
8745
8746         msg = result.fail_msg
8747         if msg or not result.payload:
8748           if not msg:
8749             msg = "disk not found"
8750           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8751                                    (idx, node, msg))
8752
8753   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8754     for idx, dev in enumerate(self.instance.disks):
8755       if idx not in self.disks:
8756         continue
8757
8758       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8759                       (idx, node_name))
8760
8761       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8762                                    ldisk=ldisk):
8763         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8764                                  " replace disks for instance %s" %
8765                                  (node_name, self.instance.name))
8766
8767   def _CreateNewStorage(self, node_name):
8768     vgname = self.cfg.GetVGName()
8769     iv_names = {}
8770
8771     for idx, dev in enumerate(self.instance.disks):
8772       if idx not in self.disks:
8773         continue
8774
8775       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8776
8777       self.cfg.SetDiskID(dev, node_name)
8778
8779       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8780       names = _GenerateUniqueNames(self.lu, lv_names)
8781
8782       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8783                              logical_id=(vgname, names[0]))
8784       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8785                              logical_id=(vgname, names[1]))
8786
8787       new_lvs = [lv_data, lv_meta]
8788       old_lvs = dev.children
8789       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8790
8791       # we pass force_create=True to force the LVM creation
8792       for new_lv in new_lvs:
8793         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8794                         _GetInstanceInfoText(self.instance), False)
8795
8796     return iv_names
8797
8798   def _CheckDevices(self, node_name, iv_names):
8799     for name, (dev, _, _) in iv_names.iteritems():
8800       self.cfg.SetDiskID(dev, node_name)
8801
8802       result = self.rpc.call_blockdev_find(node_name, dev)
8803
8804       msg = result.fail_msg
8805       if msg or not result.payload:
8806         if not msg:
8807           msg = "disk not found"
8808         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8809                                  (name, msg))
8810
8811       if result.payload.is_degraded:
8812         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8813
8814   def _RemoveOldStorage(self, node_name, iv_names):
8815     for name, (_, old_lvs, _) in iv_names.iteritems():
8816       self.lu.LogInfo("Remove logical volumes for %s" % name)
8817
8818       for lv in old_lvs:
8819         self.cfg.SetDiskID(lv, node_name)
8820
8821         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8822         if msg:
8823           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8824                              hint="remove unused LVs manually")
8825
8826   def _ReleaseNodeLock(self, node_name):
8827     """Releases the lock for a given node."""
8828     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8829
8830   def _ExecDrbd8DiskOnly(self, feedback_fn):
8831     """Replace a disk on the primary or secondary for DRBD 8.
8832
8833     The algorithm for replace is quite complicated:
8834
8835       1. for each disk to be replaced:
8836
8837         1. create new LVs on the target node with unique names
8838         1. detach old LVs from the drbd device
8839         1. rename old LVs to name_replaced.<time_t>
8840         1. rename new LVs to old LVs
8841         1. attach the new LVs (with the old names now) to the drbd device
8842
8843       1. wait for sync across all devices
8844
8845       1. for each modified disk:
8846
8847         1. remove old LVs (which have the name name_replaces.<time_t>)
8848
8849     Failures are not very well handled.
8850
8851     """
8852     steps_total = 6
8853
8854     # Step: check device activation
8855     self.lu.LogStep(1, steps_total, "Check device existence")
8856     self._CheckDisksExistence([self.other_node, self.target_node])
8857     self._CheckVolumeGroup([self.target_node, self.other_node])
8858
8859     # Step: check other node consistency
8860     self.lu.LogStep(2, steps_total, "Check peer consistency")
8861     self._CheckDisksConsistency(self.other_node,
8862                                 self.other_node == self.instance.primary_node,
8863                                 False)
8864
8865     # Step: create new storage
8866     self.lu.LogStep(3, steps_total, "Allocate new storage")
8867     iv_names = self._CreateNewStorage(self.target_node)
8868
8869     # Step: for each lv, detach+rename*2+attach
8870     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8871     for dev, old_lvs, new_lvs in iv_names.itervalues():
8872       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8873
8874       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8875                                                      old_lvs)
8876       result.Raise("Can't detach drbd from local storage on node"
8877                    " %s for device %s" % (self.target_node, dev.iv_name))
8878       #dev.children = []
8879       #cfg.Update(instance)
8880
8881       # ok, we created the new LVs, so now we know we have the needed
8882       # storage; as such, we proceed on the target node to rename
8883       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8884       # using the assumption that logical_id == physical_id (which in
8885       # turn is the unique_id on that node)
8886
8887       # FIXME(iustin): use a better name for the replaced LVs
8888       temp_suffix = int(time.time())
8889       ren_fn = lambda d, suff: (d.physical_id[0],
8890                                 d.physical_id[1] + "_replaced-%s" % suff)
8891
8892       # Build the rename list based on what LVs exist on the node
8893       rename_old_to_new = []
8894       for to_ren in old_lvs:
8895         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8896         if not result.fail_msg and result.payload:
8897           # device exists
8898           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8899
8900       self.lu.LogInfo("Renaming the old LVs on the target node")
8901       result = self.rpc.call_blockdev_rename(self.target_node,
8902                                              rename_old_to_new)
8903       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8904
8905       # Now we rename the new LVs to the old LVs
8906       self.lu.LogInfo("Renaming the new LVs on the target node")
8907       rename_new_to_old = [(new, old.physical_id)
8908                            for old, new in zip(old_lvs, new_lvs)]
8909       result = self.rpc.call_blockdev_rename(self.target_node,
8910                                              rename_new_to_old)
8911       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8912
8913       for old, new in zip(old_lvs, new_lvs):
8914         new.logical_id = old.logical_id
8915         self.cfg.SetDiskID(new, self.target_node)
8916
8917       for disk in old_lvs:
8918         disk.logical_id = ren_fn(disk, temp_suffix)
8919         self.cfg.SetDiskID(disk, self.target_node)
8920
8921       # Now that the new lvs have the old name, we can add them to the device
8922       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8923       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8924                                                   new_lvs)
8925       msg = result.fail_msg
8926       if msg:
8927         for new_lv in new_lvs:
8928           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8929                                                new_lv).fail_msg
8930           if msg2:
8931             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8932                                hint=("cleanup manually the unused logical"
8933                                      "volumes"))
8934         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8935
8936       dev.children = new_lvs
8937
8938       self.cfg.Update(self.instance, feedback_fn)
8939
8940     cstep = 5
8941     if self.early_release:
8942       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8943       cstep += 1
8944       self._RemoveOldStorage(self.target_node, iv_names)
8945       # WARNING: we release both node locks here, do not do other RPCs
8946       # than WaitForSync to the primary node
8947       self._ReleaseNodeLock([self.target_node, self.other_node])
8948
8949     # Wait for sync
8950     # This can fail as the old devices are degraded and _WaitForSync
8951     # does a combined result over all disks, so we don't check its return value
8952     self.lu.LogStep(cstep, steps_total, "Sync devices")
8953     cstep += 1
8954     _WaitForSync(self.lu, self.instance)
8955
8956     # Check all devices manually
8957     self._CheckDevices(self.instance.primary_node, iv_names)
8958
8959     # Step: remove old storage
8960     if not self.early_release:
8961       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8962       cstep += 1
8963       self._RemoveOldStorage(self.target_node, iv_names)
8964
8965   def _ExecDrbd8Secondary(self, feedback_fn):
8966     """Replace the secondary node for DRBD 8.
8967
8968     The algorithm for replace is quite complicated:
8969       - for all disks of the instance:
8970         - create new LVs on the new node with same names
8971         - shutdown the drbd device on the old secondary
8972         - disconnect the drbd network on the primary
8973         - create the drbd device on the new secondary
8974         - network attach the drbd on the primary, using an artifice:
8975           the drbd code for Attach() will connect to the network if it
8976           finds a device which is connected to the good local disks but
8977           not network enabled
8978       - wait for sync across all devices
8979       - remove all disks from the old secondary
8980
8981     Failures are not very well handled.
8982
8983     """
8984     steps_total = 6
8985
8986     # Step: check device activation
8987     self.lu.LogStep(1, steps_total, "Check device existence")
8988     self._CheckDisksExistence([self.instance.primary_node])
8989     self._CheckVolumeGroup([self.instance.primary_node])
8990
8991     # Step: check other node consistency
8992     self.lu.LogStep(2, steps_total, "Check peer consistency")
8993     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8994
8995     # Step: create new storage
8996     self.lu.LogStep(3, steps_total, "Allocate new storage")
8997     for idx, dev in enumerate(self.instance.disks):
8998       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8999                       (self.new_node, idx))
9000       # we pass force_create=True to force LVM creation
9001       for new_lv in dev.children:
9002         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9003                         _GetInstanceInfoText(self.instance), False)
9004
9005     # Step 4: dbrd minors and drbd setups changes
9006     # after this, we must manually remove the drbd minors on both the
9007     # error and the success paths
9008     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9009     minors = self.cfg.AllocateDRBDMinor([self.new_node
9010                                          for dev in self.instance.disks],
9011                                         self.instance.name)
9012     logging.debug("Allocated minors %r", minors)
9013
9014     iv_names = {}
9015     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9016       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9017                       (self.new_node, idx))
9018       # create new devices on new_node; note that we create two IDs:
9019       # one without port, so the drbd will be activated without
9020       # networking information on the new node at this stage, and one
9021       # with network, for the latter activation in step 4
9022       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9023       if self.instance.primary_node == o_node1:
9024         p_minor = o_minor1
9025       else:
9026         assert self.instance.primary_node == o_node2, "Three-node instance?"
9027         p_minor = o_minor2
9028
9029       new_alone_id = (self.instance.primary_node, self.new_node, None,
9030                       p_minor, new_minor, o_secret)
9031       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9032                     p_minor, new_minor, o_secret)
9033
9034       iv_names[idx] = (dev, dev.children, new_net_id)
9035       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9036                     new_net_id)
9037       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9038                               logical_id=new_alone_id,
9039                               children=dev.children,
9040                               size=dev.size)
9041       try:
9042         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9043                               _GetInstanceInfoText(self.instance), False)
9044       except errors.GenericError:
9045         self.cfg.ReleaseDRBDMinors(self.instance.name)
9046         raise
9047
9048     # We have new devices, shutdown the drbd on the old secondary
9049     for idx, dev in enumerate(self.instance.disks):
9050       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9051       self.cfg.SetDiskID(dev, self.target_node)
9052       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9053       if msg:
9054         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9055                            "node: %s" % (idx, msg),
9056                            hint=("Please cleanup this device manually as"
9057                                  " soon as possible"))
9058
9059     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9060     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9061                                                self.node_secondary_ip,
9062                                                self.instance.disks)\
9063                                               [self.instance.primary_node]
9064
9065     msg = result.fail_msg
9066     if msg:
9067       # detaches didn't succeed (unlikely)
9068       self.cfg.ReleaseDRBDMinors(self.instance.name)
9069       raise errors.OpExecError("Can't detach the disks from the network on"
9070                                " old node: %s" % (msg,))
9071
9072     # if we managed to detach at least one, we update all the disks of
9073     # the instance to point to the new secondary
9074     self.lu.LogInfo("Updating instance configuration")
9075     for dev, _, new_logical_id in iv_names.itervalues():
9076       dev.logical_id = new_logical_id
9077       self.cfg.SetDiskID(dev, self.instance.primary_node)
9078
9079     self.cfg.Update(self.instance, feedback_fn)
9080
9081     # and now perform the drbd attach
9082     self.lu.LogInfo("Attaching primary drbds to new secondary"
9083                     " (standalone => connected)")
9084     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9085                                             self.new_node],
9086                                            self.node_secondary_ip,
9087                                            self.instance.disks,
9088                                            self.instance.name,
9089                                            False)
9090     for to_node, to_result in result.items():
9091       msg = to_result.fail_msg
9092       if msg:
9093         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9094                            to_node, msg,
9095                            hint=("please do a gnt-instance info to see the"
9096                                  " status of disks"))
9097     cstep = 5
9098     if self.early_release:
9099       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9100       cstep += 1
9101       self._RemoveOldStorage(self.target_node, iv_names)
9102       # WARNING: we release all node locks here, do not do other RPCs
9103       # than WaitForSync to the primary node
9104       self._ReleaseNodeLock([self.instance.primary_node,
9105                              self.target_node,
9106                              self.new_node])
9107
9108     # Wait for sync
9109     # This can fail as the old devices are degraded and _WaitForSync
9110     # does a combined result over all disks, so we don't check its return value
9111     self.lu.LogStep(cstep, steps_total, "Sync devices")
9112     cstep += 1
9113     _WaitForSync(self.lu, self.instance)
9114
9115     # Check all devices manually
9116     self._CheckDevices(self.instance.primary_node, iv_names)
9117
9118     # Step: remove old storage
9119     if not self.early_release:
9120       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9121       self._RemoveOldStorage(self.target_node, iv_names)
9122
9123
9124 class LURepairNodeStorage(NoHooksLU):
9125   """Repairs the volume group on a node.
9126
9127   """
9128   REQ_BGL = False
9129
9130   def CheckArguments(self):
9131     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9132
9133     storage_type = self.op.storage_type
9134
9135     if (constants.SO_FIX_CONSISTENCY not in
9136         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9137       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9138                                  " repaired" % storage_type,
9139                                  errors.ECODE_INVAL)
9140
9141   def ExpandNames(self):
9142     self.needed_locks = {
9143       locking.LEVEL_NODE: [self.op.node_name],
9144       }
9145
9146   def _CheckFaultyDisks(self, instance, node_name):
9147     """Ensure faulty disks abort the opcode or at least warn."""
9148     try:
9149       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9150                                   node_name, True):
9151         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9152                                    " node '%s'" % (instance.name, node_name),
9153                                    errors.ECODE_STATE)
9154     except errors.OpPrereqError, err:
9155       if self.op.ignore_consistency:
9156         self.proc.LogWarning(str(err.args[0]))
9157       else:
9158         raise
9159
9160   def CheckPrereq(self):
9161     """Check prerequisites.
9162
9163     """
9164     # Check whether any instance on this node has faulty disks
9165     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9166       if not inst.admin_up:
9167         continue
9168       check_nodes = set(inst.all_nodes)
9169       check_nodes.discard(self.op.node_name)
9170       for inst_node_name in check_nodes:
9171         self._CheckFaultyDisks(inst, inst_node_name)
9172
9173   def Exec(self, feedback_fn):
9174     feedback_fn("Repairing storage unit '%s' on %s ..." %
9175                 (self.op.name, self.op.node_name))
9176
9177     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9178     result = self.rpc.call_storage_execute(self.op.node_name,
9179                                            self.op.storage_type, st_args,
9180                                            self.op.name,
9181                                            constants.SO_FIX_CONSISTENCY)
9182     result.Raise("Failed to repair storage unit '%s' on %s" %
9183                  (self.op.name, self.op.node_name))
9184
9185
9186 class LUNodeEvacStrategy(NoHooksLU):
9187   """Computes the node evacuation strategy.
9188
9189   """
9190   REQ_BGL = False
9191
9192   def CheckArguments(self):
9193     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9194
9195   def ExpandNames(self):
9196     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9197     self.needed_locks = locks = {}
9198     if self.op.remote_node is None:
9199       locks[locking.LEVEL_NODE] = locking.ALL_SET
9200     else:
9201       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9202       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9203
9204   def Exec(self, feedback_fn):
9205     if self.op.remote_node is not None:
9206       instances = []
9207       for node in self.op.nodes:
9208         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9209       result = []
9210       for i in instances:
9211         if i.primary_node == self.op.remote_node:
9212           raise errors.OpPrereqError("Node %s is the primary node of"
9213                                      " instance %s, cannot use it as"
9214                                      " secondary" %
9215                                      (self.op.remote_node, i.name),
9216                                      errors.ECODE_INVAL)
9217         result.append([i.name, self.op.remote_node])
9218     else:
9219       ial = IAllocator(self.cfg, self.rpc,
9220                        mode=constants.IALLOCATOR_MODE_MEVAC,
9221                        evac_nodes=self.op.nodes)
9222       ial.Run(self.op.iallocator, validate=True)
9223       if not ial.success:
9224         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9225                                  errors.ECODE_NORES)
9226       result = ial.result
9227     return result
9228
9229
9230 class LUInstanceGrowDisk(LogicalUnit):
9231   """Grow a disk of an instance.
9232
9233   """
9234   HPATH = "disk-grow"
9235   HTYPE = constants.HTYPE_INSTANCE
9236   REQ_BGL = False
9237
9238   def ExpandNames(self):
9239     self._ExpandAndLockInstance()
9240     self.needed_locks[locking.LEVEL_NODE] = []
9241     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9242
9243   def DeclareLocks(self, level):
9244     if level == locking.LEVEL_NODE:
9245       self._LockInstancesNodes()
9246
9247   def BuildHooksEnv(self):
9248     """Build hooks env.
9249
9250     This runs on the master, the primary and all the secondaries.
9251
9252     """
9253     env = {
9254       "DISK": self.op.disk,
9255       "AMOUNT": self.op.amount,
9256       }
9257     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9258     return env
9259
9260   def BuildHooksNodes(self):
9261     """Build hooks nodes.
9262
9263     """
9264     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9265     return (nl, nl)
9266
9267   def CheckPrereq(self):
9268     """Check prerequisites.
9269
9270     This checks that the instance is in the cluster.
9271
9272     """
9273     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9274     assert instance is not None, \
9275       "Cannot retrieve locked instance %s" % self.op.instance_name
9276     nodenames = list(instance.all_nodes)
9277     for node in nodenames:
9278       _CheckNodeOnline(self, node)
9279
9280     self.instance = instance
9281
9282     if instance.disk_template not in constants.DTS_GROWABLE:
9283       raise errors.OpPrereqError("Instance's disk layout does not support"
9284                                  " growing.", errors.ECODE_INVAL)
9285
9286     self.disk = instance.FindDisk(self.op.disk)
9287
9288     if instance.disk_template not in (constants.DT_FILE,
9289                                       constants.DT_SHARED_FILE):
9290       # TODO: check the free disk space for file, when that feature will be
9291       # supported
9292       _CheckNodesFreeDiskPerVG(self, nodenames,
9293                                self.disk.ComputeGrowth(self.op.amount))
9294
9295   def Exec(self, feedback_fn):
9296     """Execute disk grow.
9297
9298     """
9299     instance = self.instance
9300     disk = self.disk
9301
9302     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9303     if not disks_ok:
9304       raise errors.OpExecError("Cannot activate block device to grow")
9305
9306     for node in instance.all_nodes:
9307       self.cfg.SetDiskID(disk, node)
9308       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9309       result.Raise("Grow request failed to node %s" % node)
9310
9311       # TODO: Rewrite code to work properly
9312       # DRBD goes into sync mode for a short amount of time after executing the
9313       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9314       # calling "resize" in sync mode fails. Sleeping for a short amount of
9315       # time is a work-around.
9316       time.sleep(5)
9317
9318     disk.RecordGrow(self.op.amount)
9319     self.cfg.Update(instance, feedback_fn)
9320     if self.op.wait_for_sync:
9321       disk_abort = not _WaitForSync(self, instance, disks=[disk])
9322       if disk_abort:
9323         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9324                              " status.\nPlease check the instance.")
9325       if not instance.admin_up:
9326         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9327     elif not instance.admin_up:
9328       self.proc.LogWarning("Not shutting down the disk even if the instance is"
9329                            " not supposed to be running because no wait for"
9330                            " sync mode was requested.")
9331
9332
9333 class LUInstanceQueryData(NoHooksLU):
9334   """Query runtime instance data.
9335
9336   """
9337   REQ_BGL = False
9338
9339   def ExpandNames(self):
9340     self.needed_locks = {}
9341     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9342
9343     if self.op.instances:
9344       self.wanted_names = []
9345       for name in self.op.instances:
9346         full_name = _ExpandInstanceName(self.cfg, name)
9347         self.wanted_names.append(full_name)
9348       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9349     else:
9350       self.wanted_names = None
9351       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9352
9353     self.needed_locks[locking.LEVEL_NODE] = []
9354     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9355
9356   def DeclareLocks(self, level):
9357     if level == locking.LEVEL_NODE:
9358       self._LockInstancesNodes()
9359
9360   def CheckPrereq(self):
9361     """Check prerequisites.
9362
9363     This only checks the optional instance list against the existing names.
9364
9365     """
9366     if self.wanted_names is None:
9367       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9368
9369     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9370                              in self.wanted_names]
9371
9372   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9373     """Returns the status of a block device
9374
9375     """
9376     if self.op.static or not node:
9377       return None
9378
9379     self.cfg.SetDiskID(dev, node)
9380
9381     result = self.rpc.call_blockdev_find(node, dev)
9382     if result.offline:
9383       return None
9384
9385     result.Raise("Can't compute disk status for %s" % instance_name)
9386
9387     status = result.payload
9388     if status is None:
9389       return None
9390
9391     return (status.dev_path, status.major, status.minor,
9392             status.sync_percent, status.estimated_time,
9393             status.is_degraded, status.ldisk_status)
9394
9395   def _ComputeDiskStatus(self, instance, snode, dev):
9396     """Compute block device status.
9397
9398     """
9399     if dev.dev_type in constants.LDS_DRBD:
9400       # we change the snode then (otherwise we use the one passed in)
9401       if dev.logical_id[0] == instance.primary_node:
9402         snode = dev.logical_id[1]
9403       else:
9404         snode = dev.logical_id[0]
9405
9406     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9407                                               instance.name, dev)
9408     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9409
9410     if dev.children:
9411       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9412                       for child in dev.children]
9413     else:
9414       dev_children = []
9415
9416     data = {
9417       "iv_name": dev.iv_name,
9418       "dev_type": dev.dev_type,
9419       "logical_id": dev.logical_id,
9420       "physical_id": dev.physical_id,
9421       "pstatus": dev_pstatus,
9422       "sstatus": dev_sstatus,
9423       "children": dev_children,
9424       "mode": dev.mode,
9425       "size": dev.size,
9426       }
9427
9428     return data
9429
9430   def Exec(self, feedback_fn):
9431     """Gather and return data"""
9432     result = {}
9433
9434     cluster = self.cfg.GetClusterInfo()
9435
9436     for instance in self.wanted_instances:
9437       if not self.op.static:
9438         remote_info = self.rpc.call_instance_info(instance.primary_node,
9439                                                   instance.name,
9440                                                   instance.hypervisor)
9441         remote_info.Raise("Error checking node %s" % instance.primary_node)
9442         remote_info = remote_info.payload
9443         if remote_info and "state" in remote_info:
9444           remote_state = "up"
9445         else:
9446           remote_state = "down"
9447       else:
9448         remote_state = None
9449       if instance.admin_up:
9450         config_state = "up"
9451       else:
9452         config_state = "down"
9453
9454       disks = [self._ComputeDiskStatus(instance, None, device)
9455                for device in instance.disks]
9456
9457       idict = {
9458         "name": instance.name,
9459         "config_state": config_state,
9460         "run_state": remote_state,
9461         "pnode": instance.primary_node,
9462         "snodes": instance.secondary_nodes,
9463         "os": instance.os,
9464         # this happens to be the same format used for hooks
9465         "nics": _NICListToTuple(self, instance.nics),
9466         "disk_template": instance.disk_template,
9467         "disks": disks,
9468         "hypervisor": instance.hypervisor,
9469         "network_port": instance.network_port,
9470         "hv_instance": instance.hvparams,
9471         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9472         "be_instance": instance.beparams,
9473         "be_actual": cluster.FillBE(instance),
9474         "os_instance": instance.osparams,
9475         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9476         "serial_no": instance.serial_no,
9477         "mtime": instance.mtime,
9478         "ctime": instance.ctime,
9479         "uuid": instance.uuid,
9480         }
9481
9482       result[instance.name] = idict
9483
9484     return result
9485
9486
9487 class LUInstanceSetParams(LogicalUnit):
9488   """Modifies an instances's parameters.
9489
9490   """
9491   HPATH = "instance-modify"
9492   HTYPE = constants.HTYPE_INSTANCE
9493   REQ_BGL = False
9494
9495   def CheckArguments(self):
9496     if not (self.op.nics or self.op.disks or self.op.disk_template or
9497             self.op.hvparams or self.op.beparams or self.op.os_name):
9498       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9499
9500     if self.op.hvparams:
9501       _CheckGlobalHvParams(self.op.hvparams)
9502
9503     # Disk validation
9504     disk_addremove = 0
9505     for disk_op, disk_dict in self.op.disks:
9506       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9507       if disk_op == constants.DDM_REMOVE:
9508         disk_addremove += 1
9509         continue
9510       elif disk_op == constants.DDM_ADD:
9511         disk_addremove += 1
9512       else:
9513         if not isinstance(disk_op, int):
9514           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9515         if not isinstance(disk_dict, dict):
9516           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9517           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9518
9519       if disk_op == constants.DDM_ADD:
9520         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9521         if mode not in constants.DISK_ACCESS_SET:
9522           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9523                                      errors.ECODE_INVAL)
9524         size = disk_dict.get('size', None)
9525         if size is None:
9526           raise errors.OpPrereqError("Required disk parameter size missing",
9527                                      errors.ECODE_INVAL)
9528         try:
9529           size = int(size)
9530         except (TypeError, ValueError), err:
9531           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9532                                      str(err), errors.ECODE_INVAL)
9533         disk_dict['size'] = size
9534       else:
9535         # modification of disk
9536         if 'size' in disk_dict:
9537           raise errors.OpPrereqError("Disk size change not possible, use"
9538                                      " grow-disk", errors.ECODE_INVAL)
9539
9540     if disk_addremove > 1:
9541       raise errors.OpPrereqError("Only one disk add or remove operation"
9542                                  " supported at a time", errors.ECODE_INVAL)
9543
9544     if self.op.disks and self.op.disk_template is not None:
9545       raise errors.OpPrereqError("Disk template conversion and other disk"
9546                                  " changes not supported at the same time",
9547                                  errors.ECODE_INVAL)
9548
9549     if (self.op.disk_template and
9550         self.op.disk_template in constants.DTS_INT_MIRROR and
9551         self.op.remote_node is None):
9552       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9553                                  " one requires specifying a secondary node",
9554                                  errors.ECODE_INVAL)
9555
9556     # NIC validation
9557     nic_addremove = 0
9558     for nic_op, nic_dict in self.op.nics:
9559       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9560       if nic_op == constants.DDM_REMOVE:
9561         nic_addremove += 1
9562         continue
9563       elif nic_op == constants.DDM_ADD:
9564         nic_addremove += 1
9565       else:
9566         if not isinstance(nic_op, int):
9567           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9568         if not isinstance(nic_dict, dict):
9569           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9570           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9571
9572       # nic_dict should be a dict
9573       nic_ip = nic_dict.get('ip', None)
9574       if nic_ip is not None:
9575         if nic_ip.lower() == constants.VALUE_NONE:
9576           nic_dict['ip'] = None
9577         else:
9578           if not netutils.IPAddress.IsValid(nic_ip):
9579             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9580                                        errors.ECODE_INVAL)
9581
9582       nic_bridge = nic_dict.get('bridge', None)
9583       nic_link = nic_dict.get('link', None)
9584       if nic_bridge and nic_link:
9585         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9586                                    " at the same time", errors.ECODE_INVAL)
9587       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9588         nic_dict['bridge'] = None
9589       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9590         nic_dict['link'] = None
9591
9592       if nic_op == constants.DDM_ADD:
9593         nic_mac = nic_dict.get('mac', None)
9594         if nic_mac is None:
9595           nic_dict['mac'] = constants.VALUE_AUTO
9596
9597       if 'mac' in nic_dict:
9598         nic_mac = nic_dict['mac']
9599         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9600           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9601
9602         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9603           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9604                                      " modifying an existing nic",
9605                                      errors.ECODE_INVAL)
9606
9607     if nic_addremove > 1:
9608       raise errors.OpPrereqError("Only one NIC add or remove operation"
9609                                  " supported at a time", errors.ECODE_INVAL)
9610
9611   def ExpandNames(self):
9612     self._ExpandAndLockInstance()
9613     self.needed_locks[locking.LEVEL_NODE] = []
9614     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9615
9616   def DeclareLocks(self, level):
9617     if level == locking.LEVEL_NODE:
9618       self._LockInstancesNodes()
9619       if self.op.disk_template and self.op.remote_node:
9620         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9621         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9622
9623   def BuildHooksEnv(self):
9624     """Build hooks env.
9625
9626     This runs on the master, primary and secondaries.
9627
9628     """
9629     args = dict()
9630     if constants.BE_MEMORY in self.be_new:
9631       args['memory'] = self.be_new[constants.BE_MEMORY]
9632     if constants.BE_VCPUS in self.be_new:
9633       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9634     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9635     # information at all.
9636     if self.op.nics:
9637       args['nics'] = []
9638       nic_override = dict(self.op.nics)
9639       for idx, nic in enumerate(self.instance.nics):
9640         if idx in nic_override:
9641           this_nic_override = nic_override[idx]
9642         else:
9643           this_nic_override = {}
9644         if 'ip' in this_nic_override:
9645           ip = this_nic_override['ip']
9646         else:
9647           ip = nic.ip
9648         if 'mac' in this_nic_override:
9649           mac = this_nic_override['mac']
9650         else:
9651           mac = nic.mac
9652         if idx in self.nic_pnew:
9653           nicparams = self.nic_pnew[idx]
9654         else:
9655           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9656         mode = nicparams[constants.NIC_MODE]
9657         link = nicparams[constants.NIC_LINK]
9658         args['nics'].append((ip, mac, mode, link))
9659       if constants.DDM_ADD in nic_override:
9660         ip = nic_override[constants.DDM_ADD].get('ip', None)
9661         mac = nic_override[constants.DDM_ADD]['mac']
9662         nicparams = self.nic_pnew[constants.DDM_ADD]
9663         mode = nicparams[constants.NIC_MODE]
9664         link = nicparams[constants.NIC_LINK]
9665         args['nics'].append((ip, mac, mode, link))
9666       elif constants.DDM_REMOVE in nic_override:
9667         del args['nics'][-1]
9668
9669     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9670     if self.op.disk_template:
9671       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9672
9673     return env
9674
9675   def BuildHooksNodes(self):
9676     """Build hooks nodes.
9677
9678     """
9679     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9680     return (nl, nl)
9681
9682   def CheckPrereq(self):
9683     """Check prerequisites.
9684
9685     This only checks the instance list against the existing names.
9686
9687     """
9688     # checking the new params on the primary/secondary nodes
9689
9690     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9691     cluster = self.cluster = self.cfg.GetClusterInfo()
9692     assert self.instance is not None, \
9693       "Cannot retrieve locked instance %s" % self.op.instance_name
9694     pnode = instance.primary_node
9695     nodelist = list(instance.all_nodes)
9696
9697     # OS change
9698     if self.op.os_name and not self.op.force:
9699       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9700                       self.op.force_variant)
9701       instance_os = self.op.os_name
9702     else:
9703       instance_os = instance.os
9704
9705     if self.op.disk_template:
9706       if instance.disk_template == self.op.disk_template:
9707         raise errors.OpPrereqError("Instance already has disk template %s" %
9708                                    instance.disk_template, errors.ECODE_INVAL)
9709
9710       if (instance.disk_template,
9711           self.op.disk_template) not in self._DISK_CONVERSIONS:
9712         raise errors.OpPrereqError("Unsupported disk template conversion from"
9713                                    " %s to %s" % (instance.disk_template,
9714                                                   self.op.disk_template),
9715                                    errors.ECODE_INVAL)
9716       _CheckInstanceDown(self, instance, "cannot change disk template")
9717       if self.op.disk_template in constants.DTS_INT_MIRROR:
9718         if self.op.remote_node == pnode:
9719           raise errors.OpPrereqError("Given new secondary node %s is the same"
9720                                      " as the primary node of the instance" %
9721                                      self.op.remote_node, errors.ECODE_STATE)
9722         _CheckNodeOnline(self, self.op.remote_node)
9723         _CheckNodeNotDrained(self, self.op.remote_node)
9724         # FIXME: here we assume that the old instance type is DT_PLAIN
9725         assert instance.disk_template == constants.DT_PLAIN
9726         disks = [{"size": d.size, "vg": d.logical_id[0]}
9727                  for d in instance.disks]
9728         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9729         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9730
9731     # hvparams processing
9732     if self.op.hvparams:
9733       hv_type = instance.hypervisor
9734       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9735       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9736       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9737
9738       # local check
9739       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9740       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9741       self.hv_new = hv_new # the new actual values
9742       self.hv_inst = i_hvdict # the new dict (without defaults)
9743     else:
9744       self.hv_new = self.hv_inst = {}
9745
9746     # beparams processing
9747     if self.op.beparams:
9748       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9749                                    use_none=True)
9750       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9751       be_new = cluster.SimpleFillBE(i_bedict)
9752       self.be_new = be_new # the new actual values
9753       self.be_inst = i_bedict # the new dict (without defaults)
9754     else:
9755       self.be_new = self.be_inst = {}
9756
9757     # osparams processing
9758     if self.op.osparams:
9759       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9760       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9761       self.os_inst = i_osdict # the new dict (without defaults)
9762     else:
9763       self.os_inst = {}
9764
9765     self.warn = []
9766
9767     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9768       mem_check_list = [pnode]
9769       if be_new[constants.BE_AUTO_BALANCE]:
9770         # either we changed auto_balance to yes or it was from before
9771         mem_check_list.extend(instance.secondary_nodes)
9772       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9773                                                   instance.hypervisor)
9774       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9775                                          instance.hypervisor)
9776       pninfo = nodeinfo[pnode]
9777       msg = pninfo.fail_msg
9778       if msg:
9779         # Assume the primary node is unreachable and go ahead
9780         self.warn.append("Can't get info from primary node %s: %s" %
9781                          (pnode,  msg))
9782       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9783         self.warn.append("Node data from primary node %s doesn't contain"
9784                          " free memory information" % pnode)
9785       elif instance_info.fail_msg:
9786         self.warn.append("Can't get instance runtime information: %s" %
9787                         instance_info.fail_msg)
9788       else:
9789         if instance_info.payload:
9790           current_mem = int(instance_info.payload['memory'])
9791         else:
9792           # Assume instance not running
9793           # (there is a slight race condition here, but it's not very probable,
9794           # and we have no other way to check)
9795           current_mem = 0
9796         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9797                     pninfo.payload['memory_free'])
9798         if miss_mem > 0:
9799           raise errors.OpPrereqError("This change will prevent the instance"
9800                                      " from starting, due to %d MB of memory"
9801                                      " missing on its primary node" % miss_mem,
9802                                      errors.ECODE_NORES)
9803
9804       if be_new[constants.BE_AUTO_BALANCE]:
9805         for node, nres in nodeinfo.items():
9806           if node not in instance.secondary_nodes:
9807             continue
9808           msg = nres.fail_msg
9809           if msg:
9810             self.warn.append("Can't get info from secondary node %s: %s" %
9811                              (node, msg))
9812           elif not isinstance(nres.payload.get('memory_free', None), int):
9813             self.warn.append("Secondary node %s didn't return free"
9814                              " memory information" % node)
9815           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9816             self.warn.append("Not enough memory to failover instance to"
9817                              " secondary node %s" % node)
9818
9819     # NIC processing
9820     self.nic_pnew = {}
9821     self.nic_pinst = {}
9822     for nic_op, nic_dict in self.op.nics:
9823       if nic_op == constants.DDM_REMOVE:
9824         if not instance.nics:
9825           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9826                                      errors.ECODE_INVAL)
9827         continue
9828       if nic_op != constants.DDM_ADD:
9829         # an existing nic
9830         if not instance.nics:
9831           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9832                                      " no NICs" % nic_op,
9833                                      errors.ECODE_INVAL)
9834         if nic_op < 0 or nic_op >= len(instance.nics):
9835           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9836                                      " are 0 to %d" %
9837                                      (nic_op, len(instance.nics) - 1),
9838                                      errors.ECODE_INVAL)
9839         old_nic_params = instance.nics[nic_op].nicparams
9840         old_nic_ip = instance.nics[nic_op].ip
9841       else:
9842         old_nic_params = {}
9843         old_nic_ip = None
9844
9845       update_params_dict = dict([(key, nic_dict[key])
9846                                  for key in constants.NICS_PARAMETERS
9847                                  if key in nic_dict])
9848
9849       if 'bridge' in nic_dict:
9850         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9851
9852       new_nic_params = _GetUpdatedParams(old_nic_params,
9853                                          update_params_dict)
9854       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9855       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9856       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9857       self.nic_pinst[nic_op] = new_nic_params
9858       self.nic_pnew[nic_op] = new_filled_nic_params
9859       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9860
9861       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9862         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9863         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9864         if msg:
9865           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9866           if self.op.force:
9867             self.warn.append(msg)
9868           else:
9869             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9870       if new_nic_mode == constants.NIC_MODE_ROUTED:
9871         if 'ip' in nic_dict:
9872           nic_ip = nic_dict['ip']
9873         else:
9874           nic_ip = old_nic_ip
9875         if nic_ip is None:
9876           raise errors.OpPrereqError('Cannot set the nic ip to None'
9877                                      ' on a routed nic', errors.ECODE_INVAL)
9878       if 'mac' in nic_dict:
9879         nic_mac = nic_dict['mac']
9880         if nic_mac is None:
9881           raise errors.OpPrereqError('Cannot set the nic mac to None',
9882                                      errors.ECODE_INVAL)
9883         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9884           # otherwise generate the mac
9885           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9886         else:
9887           # or validate/reserve the current one
9888           try:
9889             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9890           except errors.ReservationError:
9891             raise errors.OpPrereqError("MAC address %s already in use"
9892                                        " in cluster" % nic_mac,
9893                                        errors.ECODE_NOTUNIQUE)
9894
9895     # DISK processing
9896     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9897       raise errors.OpPrereqError("Disk operations not supported for"
9898                                  " diskless instances",
9899                                  errors.ECODE_INVAL)
9900     for disk_op, _ in self.op.disks:
9901       if disk_op == constants.DDM_REMOVE:
9902         if len(instance.disks) == 1:
9903           raise errors.OpPrereqError("Cannot remove the last disk of"
9904                                      " an instance", errors.ECODE_INVAL)
9905         _CheckInstanceDown(self, instance, "cannot remove disks")
9906
9907       if (disk_op == constants.DDM_ADD and
9908           len(instance.disks) >= constants.MAX_DISKS):
9909         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9910                                    " add more" % constants.MAX_DISKS,
9911                                    errors.ECODE_STATE)
9912       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9913         # an existing disk
9914         if disk_op < 0 or disk_op >= len(instance.disks):
9915           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9916                                      " are 0 to %d" %
9917                                      (disk_op, len(instance.disks)),
9918                                      errors.ECODE_INVAL)
9919
9920     return
9921
9922   def _ConvertPlainToDrbd(self, feedback_fn):
9923     """Converts an instance from plain to drbd.
9924
9925     """
9926     feedback_fn("Converting template to drbd")
9927     instance = self.instance
9928     pnode = instance.primary_node
9929     snode = self.op.remote_node
9930
9931     # create a fake disk info for _GenerateDiskTemplate
9932     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9933     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9934                                       instance.name, pnode, [snode],
9935                                       disk_info, None, None, 0, feedback_fn)
9936     info = _GetInstanceInfoText(instance)
9937     feedback_fn("Creating aditional volumes...")
9938     # first, create the missing data and meta devices
9939     for disk in new_disks:
9940       # unfortunately this is... not too nice
9941       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9942                             info, True)
9943       for child in disk.children:
9944         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9945     # at this stage, all new LVs have been created, we can rename the
9946     # old ones
9947     feedback_fn("Renaming original volumes...")
9948     rename_list = [(o, n.children[0].logical_id)
9949                    for (o, n) in zip(instance.disks, new_disks)]
9950     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9951     result.Raise("Failed to rename original LVs")
9952
9953     feedback_fn("Initializing DRBD devices...")
9954     # all child devices are in place, we can now create the DRBD devices
9955     for disk in new_disks:
9956       for node in [pnode, snode]:
9957         f_create = node == pnode
9958         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9959
9960     # at this point, the instance has been modified
9961     instance.disk_template = constants.DT_DRBD8
9962     instance.disks = new_disks
9963     self.cfg.Update(instance, feedback_fn)
9964
9965     # disks are created, waiting for sync
9966     disk_abort = not _WaitForSync(self, instance)
9967     if disk_abort:
9968       raise errors.OpExecError("There are some degraded disks for"
9969                                " this instance, please cleanup manually")
9970
9971   def _ConvertDrbdToPlain(self, feedback_fn):
9972     """Converts an instance from drbd to plain.
9973
9974     """
9975     instance = self.instance
9976     assert len(instance.secondary_nodes) == 1
9977     pnode = instance.primary_node
9978     snode = instance.secondary_nodes[0]
9979     feedback_fn("Converting template to plain")
9980
9981     old_disks = instance.disks
9982     new_disks = [d.children[0] for d in old_disks]
9983
9984     # copy over size and mode
9985     for parent, child in zip(old_disks, new_disks):
9986       child.size = parent.size
9987       child.mode = parent.mode
9988
9989     # update instance structure
9990     instance.disks = new_disks
9991     instance.disk_template = constants.DT_PLAIN
9992     self.cfg.Update(instance, feedback_fn)
9993
9994     feedback_fn("Removing volumes on the secondary node...")
9995     for disk in old_disks:
9996       self.cfg.SetDiskID(disk, snode)
9997       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9998       if msg:
9999         self.LogWarning("Could not remove block device %s on node %s,"
10000                         " continuing anyway: %s", disk.iv_name, snode, msg)
10001
10002     feedback_fn("Removing unneeded volumes on the primary node...")
10003     for idx, disk in enumerate(old_disks):
10004       meta = disk.children[1]
10005       self.cfg.SetDiskID(meta, pnode)
10006       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10007       if msg:
10008         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10009                         " continuing anyway: %s", idx, pnode, msg)
10010
10011   def Exec(self, feedback_fn):
10012     """Modifies an instance.
10013
10014     All parameters take effect only at the next restart of the instance.
10015
10016     """
10017     # Process here the warnings from CheckPrereq, as we don't have a
10018     # feedback_fn there.
10019     for warn in self.warn:
10020       feedback_fn("WARNING: %s" % warn)
10021
10022     result = []
10023     instance = self.instance
10024     # disk changes
10025     for disk_op, disk_dict in self.op.disks:
10026       if disk_op == constants.DDM_REMOVE:
10027         # remove the last disk
10028         device = instance.disks.pop()
10029         device_idx = len(instance.disks)
10030         for node, disk in device.ComputeNodeTree(instance.primary_node):
10031           self.cfg.SetDiskID(disk, node)
10032           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10033           if msg:
10034             self.LogWarning("Could not remove disk/%d on node %s: %s,"
10035                             " continuing anyway", device_idx, node, msg)
10036         result.append(("disk/%d" % device_idx, "remove"))
10037       elif disk_op == constants.DDM_ADD:
10038         # add a new disk
10039         if instance.disk_template in (constants.DT_FILE,
10040                                         constants.DT_SHARED_FILE):
10041           file_driver, file_path = instance.disks[0].logical_id
10042           file_path = os.path.dirname(file_path)
10043         else:
10044           file_driver = file_path = None
10045         disk_idx_base = len(instance.disks)
10046         new_disk = _GenerateDiskTemplate(self,
10047                                          instance.disk_template,
10048                                          instance.name, instance.primary_node,
10049                                          instance.secondary_nodes,
10050                                          [disk_dict],
10051                                          file_path,
10052                                          file_driver,
10053                                          disk_idx_base, feedback_fn)[0]
10054         instance.disks.append(new_disk)
10055         info = _GetInstanceInfoText(instance)
10056
10057         logging.info("Creating volume %s for instance %s",
10058                      new_disk.iv_name, instance.name)
10059         # Note: this needs to be kept in sync with _CreateDisks
10060         #HARDCODE
10061         for node in instance.all_nodes:
10062           f_create = node == instance.primary_node
10063           try:
10064             _CreateBlockDev(self, node, instance, new_disk,
10065                             f_create, info, f_create)
10066           except errors.OpExecError, err:
10067             self.LogWarning("Failed to create volume %s (%s) on"
10068                             " node %s: %s",
10069                             new_disk.iv_name, new_disk, node, err)
10070         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10071                        (new_disk.size, new_disk.mode)))
10072       else:
10073         # change a given disk
10074         instance.disks[disk_op].mode = disk_dict['mode']
10075         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
10076
10077     if self.op.disk_template:
10078       r_shut = _ShutdownInstanceDisks(self, instance)
10079       if not r_shut:
10080         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10081                                  " proceed with disk template conversion")
10082       mode = (instance.disk_template, self.op.disk_template)
10083       try:
10084         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10085       except:
10086         self.cfg.ReleaseDRBDMinors(instance.name)
10087         raise
10088       result.append(("disk_template", self.op.disk_template))
10089
10090     # NIC changes
10091     for nic_op, nic_dict in self.op.nics:
10092       if nic_op == constants.DDM_REMOVE:
10093         # remove the last nic
10094         del instance.nics[-1]
10095         result.append(("nic.%d" % len(instance.nics), "remove"))
10096       elif nic_op == constants.DDM_ADD:
10097         # mac and bridge should be set, by now
10098         mac = nic_dict['mac']
10099         ip = nic_dict.get('ip', None)
10100         nicparams = self.nic_pinst[constants.DDM_ADD]
10101         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10102         instance.nics.append(new_nic)
10103         result.append(("nic.%d" % (len(instance.nics) - 1),
10104                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
10105                        (new_nic.mac, new_nic.ip,
10106                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10107                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10108                        )))
10109       else:
10110         for key in 'mac', 'ip':
10111           if key in nic_dict:
10112             setattr(instance.nics[nic_op], key, nic_dict[key])
10113         if nic_op in self.nic_pinst:
10114           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10115         for key, val in nic_dict.iteritems():
10116           result.append(("nic.%s/%d" % (key, nic_op), val))
10117
10118     # hvparams changes
10119     if self.op.hvparams:
10120       instance.hvparams = self.hv_inst
10121       for key, val in self.op.hvparams.iteritems():
10122         result.append(("hv/%s" % key, val))
10123
10124     # beparams changes
10125     if self.op.beparams:
10126       instance.beparams = self.be_inst
10127       for key, val in self.op.beparams.iteritems():
10128         result.append(("be/%s" % key, val))
10129
10130     # OS change
10131     if self.op.os_name:
10132       instance.os = self.op.os_name
10133
10134     # osparams changes
10135     if self.op.osparams:
10136       instance.osparams = self.os_inst
10137       for key, val in self.op.osparams.iteritems():
10138         result.append(("os/%s" % key, val))
10139
10140     self.cfg.Update(instance, feedback_fn)
10141
10142     return result
10143
10144   _DISK_CONVERSIONS = {
10145     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10146     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10147     }
10148
10149
10150 class LUBackupQuery(NoHooksLU):
10151   """Query the exports list
10152
10153   """
10154   REQ_BGL = False
10155
10156   def ExpandNames(self):
10157     self.needed_locks = {}
10158     self.share_locks[locking.LEVEL_NODE] = 1
10159     if not self.op.nodes:
10160       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10161     else:
10162       self.needed_locks[locking.LEVEL_NODE] = \
10163         _GetWantedNodes(self, self.op.nodes)
10164
10165   def Exec(self, feedback_fn):
10166     """Compute the list of all the exported system images.
10167
10168     @rtype: dict
10169     @return: a dictionary with the structure node->(export-list)
10170         where export-list is a list of the instances exported on
10171         that node.
10172
10173     """
10174     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10175     rpcresult = self.rpc.call_export_list(self.nodes)
10176     result = {}
10177     for node in rpcresult:
10178       if rpcresult[node].fail_msg:
10179         result[node] = False
10180       else:
10181         result[node] = rpcresult[node].payload
10182
10183     return result
10184
10185
10186 class LUBackupPrepare(NoHooksLU):
10187   """Prepares an instance for an export and returns useful information.
10188
10189   """
10190   REQ_BGL = False
10191
10192   def ExpandNames(self):
10193     self._ExpandAndLockInstance()
10194
10195   def CheckPrereq(self):
10196     """Check prerequisites.
10197
10198     """
10199     instance_name = self.op.instance_name
10200
10201     self.instance = self.cfg.GetInstanceInfo(instance_name)
10202     assert self.instance is not None, \
10203           "Cannot retrieve locked instance %s" % self.op.instance_name
10204     _CheckNodeOnline(self, self.instance.primary_node)
10205
10206     self._cds = _GetClusterDomainSecret()
10207
10208   def Exec(self, feedback_fn):
10209     """Prepares an instance for an export.
10210
10211     """
10212     instance = self.instance
10213
10214     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10215       salt = utils.GenerateSecret(8)
10216
10217       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10218       result = self.rpc.call_x509_cert_create(instance.primary_node,
10219                                               constants.RIE_CERT_VALIDITY)
10220       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10221
10222       (name, cert_pem) = result.payload
10223
10224       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10225                                              cert_pem)
10226
10227       return {
10228         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10229         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10230                           salt),
10231         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10232         }
10233
10234     return None
10235
10236
10237 class LUBackupExport(LogicalUnit):
10238   """Export an instance to an image in the cluster.
10239
10240   """
10241   HPATH = "instance-export"
10242   HTYPE = constants.HTYPE_INSTANCE
10243   REQ_BGL = False
10244
10245   def CheckArguments(self):
10246     """Check the arguments.
10247
10248     """
10249     self.x509_key_name = self.op.x509_key_name
10250     self.dest_x509_ca_pem = self.op.destination_x509_ca
10251
10252     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10253       if not self.x509_key_name:
10254         raise errors.OpPrereqError("Missing X509 key name for encryption",
10255                                    errors.ECODE_INVAL)
10256
10257       if not self.dest_x509_ca_pem:
10258         raise errors.OpPrereqError("Missing destination X509 CA",
10259                                    errors.ECODE_INVAL)
10260
10261   def ExpandNames(self):
10262     self._ExpandAndLockInstance()
10263
10264     # Lock all nodes for local exports
10265     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10266       # FIXME: lock only instance primary and destination node
10267       #
10268       # Sad but true, for now we have do lock all nodes, as we don't know where
10269       # the previous export might be, and in this LU we search for it and
10270       # remove it from its current node. In the future we could fix this by:
10271       #  - making a tasklet to search (share-lock all), then create the
10272       #    new one, then one to remove, after
10273       #  - removing the removal operation altogether
10274       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10275
10276   def DeclareLocks(self, level):
10277     """Last minute lock declaration."""
10278     # All nodes are locked anyway, so nothing to do here.
10279
10280   def BuildHooksEnv(self):
10281     """Build hooks env.
10282
10283     This will run on the master, primary node and target node.
10284
10285     """
10286     env = {
10287       "EXPORT_MODE": self.op.mode,
10288       "EXPORT_NODE": self.op.target_node,
10289       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10290       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10291       # TODO: Generic function for boolean env variables
10292       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10293       }
10294
10295     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10296
10297     return env
10298
10299   def BuildHooksNodes(self):
10300     """Build hooks nodes.
10301
10302     """
10303     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10304
10305     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10306       nl.append(self.op.target_node)
10307
10308     return (nl, nl)
10309
10310   def CheckPrereq(self):
10311     """Check prerequisites.
10312
10313     This checks that the instance and node names are valid.
10314
10315     """
10316     instance_name = self.op.instance_name
10317
10318     self.instance = self.cfg.GetInstanceInfo(instance_name)
10319     assert self.instance is not None, \
10320           "Cannot retrieve locked instance %s" % self.op.instance_name
10321     _CheckNodeOnline(self, self.instance.primary_node)
10322
10323     if (self.op.remove_instance and self.instance.admin_up and
10324         not self.op.shutdown):
10325       raise errors.OpPrereqError("Can not remove instance without shutting it"
10326                                  " down before")
10327
10328     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10329       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10330       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10331       assert self.dst_node is not None
10332
10333       _CheckNodeOnline(self, self.dst_node.name)
10334       _CheckNodeNotDrained(self, self.dst_node.name)
10335
10336       self._cds = None
10337       self.dest_disk_info = None
10338       self.dest_x509_ca = None
10339
10340     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10341       self.dst_node = None
10342
10343       if len(self.op.target_node) != len(self.instance.disks):
10344         raise errors.OpPrereqError(("Received destination information for %s"
10345                                     " disks, but instance %s has %s disks") %
10346                                    (len(self.op.target_node), instance_name,
10347                                     len(self.instance.disks)),
10348                                    errors.ECODE_INVAL)
10349
10350       cds = _GetClusterDomainSecret()
10351
10352       # Check X509 key name
10353       try:
10354         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10355       except (TypeError, ValueError), err:
10356         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10357
10358       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10359         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10360                                    errors.ECODE_INVAL)
10361
10362       # Load and verify CA
10363       try:
10364         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10365       except OpenSSL.crypto.Error, err:
10366         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10367                                    (err, ), errors.ECODE_INVAL)
10368
10369       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10370       if errcode is not None:
10371         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10372                                    (msg, ), errors.ECODE_INVAL)
10373
10374       self.dest_x509_ca = cert
10375
10376       # Verify target information
10377       disk_info = []
10378       for idx, disk_data in enumerate(self.op.target_node):
10379         try:
10380           (host, port, magic) = \
10381             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10382         except errors.GenericError, err:
10383           raise errors.OpPrereqError("Target info for disk %s: %s" %
10384                                      (idx, err), errors.ECODE_INVAL)
10385
10386         disk_info.append((host, port, magic))
10387
10388       assert len(disk_info) == len(self.op.target_node)
10389       self.dest_disk_info = disk_info
10390
10391     else:
10392       raise errors.ProgrammerError("Unhandled export mode %r" %
10393                                    self.op.mode)
10394
10395     # instance disk type verification
10396     # TODO: Implement export support for file-based disks
10397     for disk in self.instance.disks:
10398       if disk.dev_type == constants.LD_FILE:
10399         raise errors.OpPrereqError("Export not supported for instances with"
10400                                    " file-based disks", errors.ECODE_INVAL)
10401
10402   def _CleanupExports(self, feedback_fn):
10403     """Removes exports of current instance from all other nodes.
10404
10405     If an instance in a cluster with nodes A..D was exported to node C, its
10406     exports will be removed from the nodes A, B and D.
10407
10408     """
10409     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10410
10411     nodelist = self.cfg.GetNodeList()
10412     nodelist.remove(self.dst_node.name)
10413
10414     # on one-node clusters nodelist will be empty after the removal
10415     # if we proceed the backup would be removed because OpBackupQuery
10416     # substitutes an empty list with the full cluster node list.
10417     iname = self.instance.name
10418     if nodelist:
10419       feedback_fn("Removing old exports for instance %s" % iname)
10420       exportlist = self.rpc.call_export_list(nodelist)
10421       for node in exportlist:
10422         if exportlist[node].fail_msg:
10423           continue
10424         if iname in exportlist[node].payload:
10425           msg = self.rpc.call_export_remove(node, iname).fail_msg
10426           if msg:
10427             self.LogWarning("Could not remove older export for instance %s"
10428                             " on node %s: %s", iname, node, msg)
10429
10430   def Exec(self, feedback_fn):
10431     """Export an instance to an image in the cluster.
10432
10433     """
10434     assert self.op.mode in constants.EXPORT_MODES
10435
10436     instance = self.instance
10437     src_node = instance.primary_node
10438
10439     if self.op.shutdown:
10440       # shutdown the instance, but not the disks
10441       feedback_fn("Shutting down instance %s" % instance.name)
10442       result = self.rpc.call_instance_shutdown(src_node, instance,
10443                                                self.op.shutdown_timeout)
10444       # TODO: Maybe ignore failures if ignore_remove_failures is set
10445       result.Raise("Could not shutdown instance %s on"
10446                    " node %s" % (instance.name, src_node))
10447
10448     # set the disks ID correctly since call_instance_start needs the
10449     # correct drbd minor to create the symlinks
10450     for disk in instance.disks:
10451       self.cfg.SetDiskID(disk, src_node)
10452
10453     activate_disks = (not instance.admin_up)
10454
10455     if activate_disks:
10456       # Activate the instance disks if we'exporting a stopped instance
10457       feedback_fn("Activating disks for %s" % instance.name)
10458       _StartInstanceDisks(self, instance, None)
10459
10460     try:
10461       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10462                                                      instance)
10463
10464       helper.CreateSnapshots()
10465       try:
10466         if (self.op.shutdown and instance.admin_up and
10467             not self.op.remove_instance):
10468           assert not activate_disks
10469           feedback_fn("Starting instance %s" % instance.name)
10470           result = self.rpc.call_instance_start(src_node, instance, None, None)
10471           msg = result.fail_msg
10472           if msg:
10473             feedback_fn("Failed to start instance: %s" % msg)
10474             _ShutdownInstanceDisks(self, instance)
10475             raise errors.OpExecError("Could not start instance: %s" % msg)
10476
10477         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10478           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10479         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10480           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10481           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10482
10483           (key_name, _, _) = self.x509_key_name
10484
10485           dest_ca_pem = \
10486             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10487                                             self.dest_x509_ca)
10488
10489           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10490                                                      key_name, dest_ca_pem,
10491                                                      timeouts)
10492       finally:
10493         helper.Cleanup()
10494
10495       # Check for backwards compatibility
10496       assert len(dresults) == len(instance.disks)
10497       assert compat.all(isinstance(i, bool) for i in dresults), \
10498              "Not all results are boolean: %r" % dresults
10499
10500     finally:
10501       if activate_disks:
10502         feedback_fn("Deactivating disks for %s" % instance.name)
10503         _ShutdownInstanceDisks(self, instance)
10504
10505     if not (compat.all(dresults) and fin_resu):
10506       failures = []
10507       if not fin_resu:
10508         failures.append("export finalization")
10509       if not compat.all(dresults):
10510         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10511                                if not dsk)
10512         failures.append("disk export: disk(s) %s" % fdsk)
10513
10514       raise errors.OpExecError("Export failed, errors in %s" %
10515                                utils.CommaJoin(failures))
10516
10517     # At this point, the export was successful, we can cleanup/finish
10518
10519     # Remove instance if requested
10520     if self.op.remove_instance:
10521       feedback_fn("Removing instance %s" % instance.name)
10522       _RemoveInstance(self, feedback_fn, instance,
10523                       self.op.ignore_remove_failures)
10524
10525     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10526       self._CleanupExports(feedback_fn)
10527
10528     return fin_resu, dresults
10529
10530
10531 class LUBackupRemove(NoHooksLU):
10532   """Remove exports related to the named instance.
10533
10534   """
10535   REQ_BGL = False
10536
10537   def ExpandNames(self):
10538     self.needed_locks = {}
10539     # We need all nodes to be locked in order for RemoveExport to work, but we
10540     # don't need to lock the instance itself, as nothing will happen to it (and
10541     # we can remove exports also for a removed instance)
10542     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10543
10544   def Exec(self, feedback_fn):
10545     """Remove any export.
10546
10547     """
10548     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10549     # If the instance was not found we'll try with the name that was passed in.
10550     # This will only work if it was an FQDN, though.
10551     fqdn_warn = False
10552     if not instance_name:
10553       fqdn_warn = True
10554       instance_name = self.op.instance_name
10555
10556     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10557     exportlist = self.rpc.call_export_list(locked_nodes)
10558     found = False
10559     for node in exportlist:
10560       msg = exportlist[node].fail_msg
10561       if msg:
10562         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10563         continue
10564       if instance_name in exportlist[node].payload:
10565         found = True
10566         result = self.rpc.call_export_remove(node, instance_name)
10567         msg = result.fail_msg
10568         if msg:
10569           logging.error("Could not remove export for instance %s"
10570                         " on node %s: %s", instance_name, node, msg)
10571
10572     if fqdn_warn and not found:
10573       feedback_fn("Export not found. If trying to remove an export belonging"
10574                   " to a deleted instance please use its Fully Qualified"
10575                   " Domain Name.")
10576
10577
10578 class LUGroupAdd(LogicalUnit):
10579   """Logical unit for creating node groups.
10580
10581   """
10582   HPATH = "group-add"
10583   HTYPE = constants.HTYPE_GROUP
10584   REQ_BGL = False
10585
10586   def ExpandNames(self):
10587     # We need the new group's UUID here so that we can create and acquire the
10588     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10589     # that it should not check whether the UUID exists in the configuration.
10590     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10591     self.needed_locks = {}
10592     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10593
10594   def CheckPrereq(self):
10595     """Check prerequisites.
10596
10597     This checks that the given group name is not an existing node group
10598     already.
10599
10600     """
10601     try:
10602       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10603     except errors.OpPrereqError:
10604       pass
10605     else:
10606       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10607                                  " node group (UUID: %s)" %
10608                                  (self.op.group_name, existing_uuid),
10609                                  errors.ECODE_EXISTS)
10610
10611     if self.op.ndparams:
10612       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10613
10614   def BuildHooksEnv(self):
10615     """Build hooks env.
10616
10617     """
10618     return {
10619       "GROUP_NAME": self.op.group_name,
10620       }
10621
10622   def BuildHooksNodes(self):
10623     """Build hooks nodes.
10624
10625     """
10626     mn = self.cfg.GetMasterNode()
10627     return ([mn], [mn])
10628
10629   def Exec(self, feedback_fn):
10630     """Add the node group to the cluster.
10631
10632     """
10633     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10634                                   uuid=self.group_uuid,
10635                                   alloc_policy=self.op.alloc_policy,
10636                                   ndparams=self.op.ndparams)
10637
10638     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10639     del self.remove_locks[locking.LEVEL_NODEGROUP]
10640
10641
10642 class LUGroupAssignNodes(NoHooksLU):
10643   """Logical unit for assigning nodes to groups.
10644
10645   """
10646   REQ_BGL = False
10647
10648   def ExpandNames(self):
10649     # These raise errors.OpPrereqError on their own:
10650     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10651     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10652
10653     # We want to lock all the affected nodes and groups. We have readily
10654     # available the list of nodes, and the *destination* group. To gather the
10655     # list of "source" groups, we need to fetch node information.
10656     self.node_data = self.cfg.GetAllNodesInfo()
10657     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10658     affected_groups.add(self.group_uuid)
10659
10660     self.needed_locks = {
10661       locking.LEVEL_NODEGROUP: list(affected_groups),
10662       locking.LEVEL_NODE: self.op.nodes,
10663       }
10664
10665   def CheckPrereq(self):
10666     """Check prerequisites.
10667
10668     """
10669     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10670     instance_data = self.cfg.GetAllInstancesInfo()
10671
10672     if self.group is None:
10673       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10674                                (self.op.group_name, self.group_uuid))
10675
10676     (new_splits, previous_splits) = \
10677       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10678                                              for node in self.op.nodes],
10679                                             self.node_data, instance_data)
10680
10681     if new_splits:
10682       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10683
10684       if not self.op.force:
10685         raise errors.OpExecError("The following instances get split by this"
10686                                  " change and --force was not given: %s" %
10687                                  fmt_new_splits)
10688       else:
10689         self.LogWarning("This operation will split the following instances: %s",
10690                         fmt_new_splits)
10691
10692         if previous_splits:
10693           self.LogWarning("In addition, these already-split instances continue"
10694                           " to be spit across groups: %s",
10695                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10696
10697   def Exec(self, feedback_fn):
10698     """Assign nodes to a new group.
10699
10700     """
10701     for node in self.op.nodes:
10702       self.node_data[node].group = self.group_uuid
10703
10704     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10705
10706   @staticmethod
10707   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10708     """Check for split instances after a node assignment.
10709
10710     This method considers a series of node assignments as an atomic operation,
10711     and returns information about split instances after applying the set of
10712     changes.
10713
10714     In particular, it returns information about newly split instances, and
10715     instances that were already split, and remain so after the change.
10716
10717     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10718     considered.
10719
10720     @type changes: list of (node_name, new_group_uuid) pairs.
10721     @param changes: list of node assignments to consider.
10722     @param node_data: a dict with data for all nodes
10723     @param instance_data: a dict with all instances to consider
10724     @rtype: a two-tuple
10725     @return: a list of instances that were previously okay and result split as a
10726       consequence of this change, and a list of instances that were previously
10727       split and this change does not fix.
10728
10729     """
10730     changed_nodes = dict((node, group) for node, group in changes
10731                          if node_data[node].group != group)
10732
10733     all_split_instances = set()
10734     previously_split_instances = set()
10735
10736     def InstanceNodes(instance):
10737       return [instance.primary_node] + list(instance.secondary_nodes)
10738
10739     for inst in instance_data.values():
10740       if inst.disk_template not in constants.DTS_INT_MIRROR:
10741         continue
10742
10743       instance_nodes = InstanceNodes(inst)
10744
10745       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10746         previously_split_instances.add(inst.name)
10747
10748       if len(set(changed_nodes.get(node, node_data[node].group)
10749                  for node in instance_nodes)) > 1:
10750         all_split_instances.add(inst.name)
10751
10752     return (list(all_split_instances - previously_split_instances),
10753             list(previously_split_instances & all_split_instances))
10754
10755
10756 class _GroupQuery(_QueryBase):
10757   FIELDS = query.GROUP_FIELDS
10758
10759   def ExpandNames(self, lu):
10760     lu.needed_locks = {}
10761
10762     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10763     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10764
10765     if not self.names:
10766       self.wanted = [name_to_uuid[name]
10767                      for name in utils.NiceSort(name_to_uuid.keys())]
10768     else:
10769       # Accept names to be either names or UUIDs.
10770       missing = []
10771       self.wanted = []
10772       all_uuid = frozenset(self._all_groups.keys())
10773
10774       for name in self.names:
10775         if name in all_uuid:
10776           self.wanted.append(name)
10777         elif name in name_to_uuid:
10778           self.wanted.append(name_to_uuid[name])
10779         else:
10780           missing.append(name)
10781
10782       if missing:
10783         raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10784                                    errors.ECODE_NOENT)
10785
10786   def DeclareLocks(self, lu, level):
10787     pass
10788
10789   def _GetQueryData(self, lu):
10790     """Computes the list of node groups and their attributes.
10791
10792     """
10793     do_nodes = query.GQ_NODE in self.requested_data
10794     do_instances = query.GQ_INST in self.requested_data
10795
10796     group_to_nodes = None
10797     group_to_instances = None
10798
10799     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10800     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10801     # latter GetAllInstancesInfo() is not enough, for we have to go through
10802     # instance->node. Hence, we will need to process nodes even if we only need
10803     # instance information.
10804     if do_nodes or do_instances:
10805       all_nodes = lu.cfg.GetAllNodesInfo()
10806       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10807       node_to_group = {}
10808
10809       for node in all_nodes.values():
10810         if node.group in group_to_nodes:
10811           group_to_nodes[node.group].append(node.name)
10812           node_to_group[node.name] = node.group
10813
10814       if do_instances:
10815         all_instances = lu.cfg.GetAllInstancesInfo()
10816         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10817
10818         for instance in all_instances.values():
10819           node = instance.primary_node
10820           if node in node_to_group:
10821             group_to_instances[node_to_group[node]].append(instance.name)
10822
10823         if not do_nodes:
10824           # Do not pass on node information if it was not requested.
10825           group_to_nodes = None
10826
10827     return query.GroupQueryData([self._all_groups[uuid]
10828                                  for uuid in self.wanted],
10829                                 group_to_nodes, group_to_instances)
10830
10831
10832 class LUGroupQuery(NoHooksLU):
10833   """Logical unit for querying node groups.
10834
10835   """
10836   REQ_BGL = False
10837
10838   def CheckArguments(self):
10839     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10840                           self.op.output_fields, False)
10841
10842   def ExpandNames(self):
10843     self.gq.ExpandNames(self)
10844
10845   def Exec(self, feedback_fn):
10846     return self.gq.OldStyleQuery(self)
10847
10848
10849 class LUGroupSetParams(LogicalUnit):
10850   """Modifies the parameters of a node group.
10851
10852   """
10853   HPATH = "group-modify"
10854   HTYPE = constants.HTYPE_GROUP
10855   REQ_BGL = False
10856
10857   def CheckArguments(self):
10858     all_changes = [
10859       self.op.ndparams,
10860       self.op.alloc_policy,
10861       ]
10862
10863     if all_changes.count(None) == len(all_changes):
10864       raise errors.OpPrereqError("Please pass at least one modification",
10865                                  errors.ECODE_INVAL)
10866
10867   def ExpandNames(self):
10868     # This raises errors.OpPrereqError on its own:
10869     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10870
10871     self.needed_locks = {
10872       locking.LEVEL_NODEGROUP: [self.group_uuid],
10873       }
10874
10875   def CheckPrereq(self):
10876     """Check prerequisites.
10877
10878     """
10879     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10880
10881     if self.group is None:
10882       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10883                                (self.op.group_name, self.group_uuid))
10884
10885     if self.op.ndparams:
10886       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10887       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10888       self.new_ndparams = new_ndparams
10889
10890   def BuildHooksEnv(self):
10891     """Build hooks env.
10892
10893     """
10894     return {
10895       "GROUP_NAME": self.op.group_name,
10896       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10897       }
10898
10899   def BuildHooksNodes(self):
10900     """Build hooks nodes.
10901
10902     """
10903     mn = self.cfg.GetMasterNode()
10904     return ([mn], [mn])
10905
10906   def Exec(self, feedback_fn):
10907     """Modifies the node group.
10908
10909     """
10910     result = []
10911
10912     if self.op.ndparams:
10913       self.group.ndparams = self.new_ndparams
10914       result.append(("ndparams", str(self.group.ndparams)))
10915
10916     if self.op.alloc_policy:
10917       self.group.alloc_policy = self.op.alloc_policy
10918
10919     self.cfg.Update(self.group, feedback_fn)
10920     return result
10921
10922
10923
10924 class LUGroupRemove(LogicalUnit):
10925   HPATH = "group-remove"
10926   HTYPE = constants.HTYPE_GROUP
10927   REQ_BGL = False
10928
10929   def ExpandNames(self):
10930     # This will raises errors.OpPrereqError on its own:
10931     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10932     self.needed_locks = {
10933       locking.LEVEL_NODEGROUP: [self.group_uuid],
10934       }
10935
10936   def CheckPrereq(self):
10937     """Check prerequisites.
10938
10939     This checks that the given group name exists as a node group, that is
10940     empty (i.e., contains no nodes), and that is not the last group of the
10941     cluster.
10942
10943     """
10944     # Verify that the group is empty.
10945     group_nodes = [node.name
10946                    for node in self.cfg.GetAllNodesInfo().values()
10947                    if node.group == self.group_uuid]
10948
10949     if group_nodes:
10950       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10951                                  " nodes: %s" %
10952                                  (self.op.group_name,
10953                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10954                                  errors.ECODE_STATE)
10955
10956     # Verify the cluster would not be left group-less.
10957     if len(self.cfg.GetNodeGroupList()) == 1:
10958       raise errors.OpPrereqError("Group '%s' is the only group,"
10959                                  " cannot be removed" %
10960                                  self.op.group_name,
10961                                  errors.ECODE_STATE)
10962
10963   def BuildHooksEnv(self):
10964     """Build hooks env.
10965
10966     """
10967     return {
10968       "GROUP_NAME": self.op.group_name,
10969       }
10970
10971   def BuildHooksNodes(self):
10972     """Build hooks nodes.
10973
10974     """
10975     mn = self.cfg.GetMasterNode()
10976     return ([mn], [mn])
10977
10978   def Exec(self, feedback_fn):
10979     """Remove the node group.
10980
10981     """
10982     try:
10983       self.cfg.RemoveNodeGroup(self.group_uuid)
10984     except errors.ConfigurationError:
10985       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10986                                (self.op.group_name, self.group_uuid))
10987
10988     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10989
10990
10991 class LUGroupRename(LogicalUnit):
10992   HPATH = "group-rename"
10993   HTYPE = constants.HTYPE_GROUP
10994   REQ_BGL = False
10995
10996   def ExpandNames(self):
10997     # This raises errors.OpPrereqError on its own:
10998     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10999
11000     self.needed_locks = {
11001       locking.LEVEL_NODEGROUP: [self.group_uuid],
11002       }
11003
11004   def CheckPrereq(self):
11005     """Check prerequisites.
11006
11007     Ensures requested new name is not yet used.
11008
11009     """
11010     try:
11011       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11012     except errors.OpPrereqError:
11013       pass
11014     else:
11015       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11016                                  " node group (UUID: %s)" %
11017                                  (self.op.new_name, new_name_uuid),
11018                                  errors.ECODE_EXISTS)
11019
11020   def BuildHooksEnv(self):
11021     """Build hooks env.
11022
11023     """
11024     return {
11025       "OLD_NAME": self.op.group_name,
11026       "NEW_NAME": self.op.new_name,
11027       }
11028
11029   def BuildHooksNodes(self):
11030     """Build hooks nodes.
11031
11032     """
11033     mn = self.cfg.GetMasterNode()
11034
11035     all_nodes = self.cfg.GetAllNodesInfo()
11036     all_nodes.pop(mn, None)
11037
11038     run_nodes = [mn]
11039     run_nodes.extend(node.name for node in all_nodes.values()
11040                      if node.group == self.group_uuid)
11041
11042     return (run_nodes, run_nodes)
11043
11044   def Exec(self, feedback_fn):
11045     """Rename the node group.
11046
11047     """
11048     group = self.cfg.GetNodeGroup(self.group_uuid)
11049
11050     if group is None:
11051       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11052                                (self.op.group_name, self.group_uuid))
11053
11054     group.name = self.op.new_name
11055     self.cfg.Update(group, feedback_fn)
11056
11057     return self.op.new_name
11058
11059
11060 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11061   """Generic tags LU.
11062
11063   This is an abstract class which is the parent of all the other tags LUs.
11064
11065   """
11066
11067   def ExpandNames(self):
11068     self.needed_locks = {}
11069     if self.op.kind == constants.TAG_NODE:
11070       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11071       self.needed_locks[locking.LEVEL_NODE] = self.op.name
11072     elif self.op.kind == constants.TAG_INSTANCE:
11073       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11074       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11075
11076     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11077     # not possible to acquire the BGL based on opcode parameters)
11078
11079   def CheckPrereq(self):
11080     """Check prerequisites.
11081
11082     """
11083     if self.op.kind == constants.TAG_CLUSTER:
11084       self.target = self.cfg.GetClusterInfo()
11085     elif self.op.kind == constants.TAG_NODE:
11086       self.target = self.cfg.GetNodeInfo(self.op.name)
11087     elif self.op.kind == constants.TAG_INSTANCE:
11088       self.target = self.cfg.GetInstanceInfo(self.op.name)
11089     else:
11090       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11091                                  str(self.op.kind), errors.ECODE_INVAL)
11092
11093
11094 class LUTagsGet(TagsLU):
11095   """Returns the tags of a given object.
11096
11097   """
11098   REQ_BGL = False
11099
11100   def ExpandNames(self):
11101     TagsLU.ExpandNames(self)
11102
11103     # Share locks as this is only a read operation
11104     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11105
11106   def Exec(self, feedback_fn):
11107     """Returns the tag list.
11108
11109     """
11110     return list(self.target.GetTags())
11111
11112
11113 class LUTagsSearch(NoHooksLU):
11114   """Searches the tags for a given pattern.
11115
11116   """
11117   REQ_BGL = False
11118
11119   def ExpandNames(self):
11120     self.needed_locks = {}
11121
11122   def CheckPrereq(self):
11123     """Check prerequisites.
11124
11125     This checks the pattern passed for validity by compiling it.
11126
11127     """
11128     try:
11129       self.re = re.compile(self.op.pattern)
11130     except re.error, err:
11131       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11132                                  (self.op.pattern, err), errors.ECODE_INVAL)
11133
11134   def Exec(self, feedback_fn):
11135     """Returns the tag list.
11136
11137     """
11138     cfg = self.cfg
11139     tgts = [("/cluster", cfg.GetClusterInfo())]
11140     ilist = cfg.GetAllInstancesInfo().values()
11141     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11142     nlist = cfg.GetAllNodesInfo().values()
11143     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11144     results = []
11145     for path, target in tgts:
11146       for tag in target.GetTags():
11147         if self.re.search(tag):
11148           results.append((path, tag))
11149     return results
11150
11151
11152 class LUTagsSet(TagsLU):
11153   """Sets a tag on a given object.
11154
11155   """
11156   REQ_BGL = False
11157
11158   def CheckPrereq(self):
11159     """Check prerequisites.
11160
11161     This checks the type and length of the tag name and value.
11162
11163     """
11164     TagsLU.CheckPrereq(self)
11165     for tag in self.op.tags:
11166       objects.TaggableObject.ValidateTag(tag)
11167
11168   def Exec(self, feedback_fn):
11169     """Sets the tag.
11170
11171     """
11172     try:
11173       for tag in self.op.tags:
11174         self.target.AddTag(tag)
11175     except errors.TagError, err:
11176       raise errors.OpExecError("Error while setting tag: %s" % str(err))
11177     self.cfg.Update(self.target, feedback_fn)
11178
11179
11180 class LUTagsDel(TagsLU):
11181   """Delete a list of tags from a given object.
11182
11183   """
11184   REQ_BGL = False
11185
11186   def CheckPrereq(self):
11187     """Check prerequisites.
11188
11189     This checks that we have the given tag.
11190
11191     """
11192     TagsLU.CheckPrereq(self)
11193     for tag in self.op.tags:
11194       objects.TaggableObject.ValidateTag(tag)
11195     del_tags = frozenset(self.op.tags)
11196     cur_tags = self.target.GetTags()
11197
11198     diff_tags = del_tags - cur_tags
11199     if diff_tags:
11200       diff_names = ("'%s'" % i for i in sorted(diff_tags))
11201       raise errors.OpPrereqError("Tag(s) %s not found" %
11202                                  (utils.CommaJoin(diff_names), ),
11203                                  errors.ECODE_NOENT)
11204
11205   def Exec(self, feedback_fn):
11206     """Remove the tag from the object.
11207
11208     """
11209     for tag in self.op.tags:
11210       self.target.RemoveTag(tag)
11211     self.cfg.Update(self.target, feedback_fn)
11212
11213
11214 class LUTestDelay(NoHooksLU):
11215   """Sleep for a specified amount of time.
11216
11217   This LU sleeps on the master and/or nodes for a specified amount of
11218   time.
11219
11220   """
11221   REQ_BGL = False
11222
11223   def ExpandNames(self):
11224     """Expand names and set required locks.
11225
11226     This expands the node list, if any.
11227
11228     """
11229     self.needed_locks = {}
11230     if self.op.on_nodes:
11231       # _GetWantedNodes can be used here, but is not always appropriate to use
11232       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11233       # more information.
11234       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11235       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11236
11237   def _TestDelay(self):
11238     """Do the actual sleep.
11239
11240     """
11241     if self.op.on_master:
11242       if not utils.TestDelay(self.op.duration):
11243         raise errors.OpExecError("Error during master delay test")
11244     if self.op.on_nodes:
11245       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11246       for node, node_result in result.items():
11247         node_result.Raise("Failure during rpc call to node %s" % node)
11248
11249   def Exec(self, feedback_fn):
11250     """Execute the test delay opcode, with the wanted repetitions.
11251
11252     """
11253     if self.op.repeat == 0:
11254       self._TestDelay()
11255     else:
11256       top_value = self.op.repeat - 1
11257       for i in range(self.op.repeat):
11258         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11259         self._TestDelay()
11260
11261
11262 class LUTestJqueue(NoHooksLU):
11263   """Utility LU to test some aspects of the job queue.
11264
11265   """
11266   REQ_BGL = False
11267
11268   # Must be lower than default timeout for WaitForJobChange to see whether it
11269   # notices changed jobs
11270   _CLIENT_CONNECT_TIMEOUT = 20.0
11271   _CLIENT_CONFIRM_TIMEOUT = 60.0
11272
11273   @classmethod
11274   def _NotifyUsingSocket(cls, cb, errcls):
11275     """Opens a Unix socket and waits for another program to connect.
11276
11277     @type cb: callable
11278     @param cb: Callback to send socket name to client
11279     @type errcls: class
11280     @param errcls: Exception class to use for errors
11281
11282     """
11283     # Using a temporary directory as there's no easy way to create temporary
11284     # sockets without writing a custom loop around tempfile.mktemp and
11285     # socket.bind
11286     tmpdir = tempfile.mkdtemp()
11287     try:
11288       tmpsock = utils.PathJoin(tmpdir, "sock")
11289
11290       logging.debug("Creating temporary socket at %s", tmpsock)
11291       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11292       try:
11293         sock.bind(tmpsock)
11294         sock.listen(1)
11295
11296         # Send details to client
11297         cb(tmpsock)
11298
11299         # Wait for client to connect before continuing
11300         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11301         try:
11302           (conn, _) = sock.accept()
11303         except socket.error, err:
11304           raise errcls("Client didn't connect in time (%s)" % err)
11305       finally:
11306         sock.close()
11307     finally:
11308       # Remove as soon as client is connected
11309       shutil.rmtree(tmpdir)
11310
11311     # Wait for client to close
11312     try:
11313       try:
11314         # pylint: disable-msg=E1101
11315         # Instance of '_socketobject' has no ... member
11316         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11317         conn.recv(1)
11318       except socket.error, err:
11319         raise errcls("Client failed to confirm notification (%s)" % err)
11320     finally:
11321       conn.close()
11322
11323   def _SendNotification(self, test, arg, sockname):
11324     """Sends a notification to the client.
11325
11326     @type test: string
11327     @param test: Test name
11328     @param arg: Test argument (depends on test)
11329     @type sockname: string
11330     @param sockname: Socket path
11331
11332     """
11333     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11334
11335   def _Notify(self, prereq, test, arg):
11336     """Notifies the client of a test.
11337
11338     @type prereq: bool
11339     @param prereq: Whether this is a prereq-phase test
11340     @type test: string
11341     @param test: Test name
11342     @param arg: Test argument (depends on test)
11343
11344     """
11345     if prereq:
11346       errcls = errors.OpPrereqError
11347     else:
11348       errcls = errors.OpExecError
11349
11350     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11351                                                   test, arg),
11352                                    errcls)
11353
11354   def CheckArguments(self):
11355     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11356     self.expandnames_calls = 0
11357
11358   def ExpandNames(self):
11359     checkargs_calls = getattr(self, "checkargs_calls", 0)
11360     if checkargs_calls < 1:
11361       raise errors.ProgrammerError("CheckArguments was not called")
11362
11363     self.expandnames_calls += 1
11364
11365     if self.op.notify_waitlock:
11366       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11367
11368     self.LogInfo("Expanding names")
11369
11370     # Get lock on master node (just to get a lock, not for a particular reason)
11371     self.needed_locks = {
11372       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11373       }
11374
11375   def Exec(self, feedback_fn):
11376     if self.expandnames_calls < 1:
11377       raise errors.ProgrammerError("ExpandNames was not called")
11378
11379     if self.op.notify_exec:
11380       self._Notify(False, constants.JQT_EXEC, None)
11381
11382     self.LogInfo("Executing")
11383
11384     if self.op.log_messages:
11385       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11386       for idx, msg in enumerate(self.op.log_messages):
11387         self.LogInfo("Sending log message %s", idx + 1)
11388         feedback_fn(constants.JQT_MSGPREFIX + msg)
11389         # Report how many test messages have been sent
11390         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11391
11392     if self.op.fail:
11393       raise errors.OpExecError("Opcode failure was requested")
11394
11395     return True
11396
11397
11398 class IAllocator(object):
11399   """IAllocator framework.
11400
11401   An IAllocator instance has three sets of attributes:
11402     - cfg that is needed to query the cluster
11403     - input data (all members of the _KEYS class attribute are required)
11404     - four buffer attributes (in|out_data|text), that represent the
11405       input (to the external script) in text and data structure format,
11406       and the output from it, again in two formats
11407     - the result variables from the script (success, info, nodes) for
11408       easy usage
11409
11410   """
11411   # pylint: disable-msg=R0902
11412   # lots of instance attributes
11413   _ALLO_KEYS = [
11414     "name", "mem_size", "disks", "disk_template",
11415     "os", "tags", "nics", "vcpus", "hypervisor",
11416     ]
11417   _RELO_KEYS = [
11418     "name", "relocate_from",
11419     ]
11420   _EVAC_KEYS = [
11421     "evac_nodes",
11422     ]
11423
11424   def __init__(self, cfg, rpc, mode, **kwargs):
11425     self.cfg = cfg
11426     self.rpc = rpc
11427     # init buffer variables
11428     self.in_text = self.out_text = self.in_data = self.out_data = None
11429     # init all input fields so that pylint is happy
11430     self.mode = mode
11431     self.mem_size = self.disks = self.disk_template = None
11432     self.os = self.tags = self.nics = self.vcpus = None
11433     self.hypervisor = None
11434     self.relocate_from = None
11435     self.name = None
11436     self.evac_nodes = None
11437     # computed fields
11438     self.required_nodes = None
11439     # init result fields
11440     self.success = self.info = self.result = None
11441     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11442       keyset = self._ALLO_KEYS
11443       fn = self._AddNewInstance
11444     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11445       keyset = self._RELO_KEYS
11446       fn = self._AddRelocateInstance
11447     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11448       keyset = self._EVAC_KEYS
11449       fn = self._AddEvacuateNodes
11450     else:
11451       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11452                                    " IAllocator" % self.mode)
11453     for key in kwargs:
11454       if key not in keyset:
11455         raise errors.ProgrammerError("Invalid input parameter '%s' to"
11456                                      " IAllocator" % key)
11457       setattr(self, key, kwargs[key])
11458
11459     for key in keyset:
11460       if key not in kwargs:
11461         raise errors.ProgrammerError("Missing input parameter '%s' to"
11462                                      " IAllocator" % key)
11463     self._BuildInputData(fn)
11464
11465   def _ComputeClusterData(self):
11466     """Compute the generic allocator input data.
11467
11468     This is the data that is independent of the actual operation.
11469
11470     """
11471     cfg = self.cfg
11472     cluster_info = cfg.GetClusterInfo()
11473     # cluster data
11474     data = {
11475       "version": constants.IALLOCATOR_VERSION,
11476       "cluster_name": cfg.GetClusterName(),
11477       "cluster_tags": list(cluster_info.GetTags()),
11478       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11479       # we don't have job IDs
11480       }
11481     ninfo = cfg.GetAllNodesInfo()
11482     iinfo = cfg.GetAllInstancesInfo().values()
11483     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11484
11485     # node data
11486     node_list = [n.name for n in ninfo.values() if n.vm_capable]
11487
11488     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11489       hypervisor_name = self.hypervisor
11490     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11491       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11492     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11493       hypervisor_name = cluster_info.enabled_hypervisors[0]
11494
11495     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11496                                         hypervisor_name)
11497     node_iinfo = \
11498       self.rpc.call_all_instances_info(node_list,
11499                                        cluster_info.enabled_hypervisors)
11500
11501     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11502
11503     config_ndata = self._ComputeBasicNodeData(ninfo)
11504     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11505                                                  i_list, config_ndata)
11506     assert len(data["nodes"]) == len(ninfo), \
11507         "Incomplete node data computed"
11508
11509     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11510
11511     self.in_data = data
11512
11513   @staticmethod
11514   def _ComputeNodeGroupData(cfg):
11515     """Compute node groups data.
11516
11517     """
11518     ng = {}
11519     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11520       ng[guuid] = {
11521         "name": gdata.name,
11522         "alloc_policy": gdata.alloc_policy,
11523         }
11524     return ng
11525
11526   @staticmethod
11527   def _ComputeBasicNodeData(node_cfg):
11528     """Compute global node data.
11529
11530     @rtype: dict
11531     @returns: a dict of name: (node dict, node config)
11532
11533     """
11534     node_results = {}
11535     for ninfo in node_cfg.values():
11536       # fill in static (config-based) values
11537       pnr = {
11538         "tags": list(ninfo.GetTags()),
11539         "primary_ip": ninfo.primary_ip,
11540         "secondary_ip": ninfo.secondary_ip,
11541         "offline": ninfo.offline,
11542         "drained": ninfo.drained,
11543         "master_candidate": ninfo.master_candidate,
11544         "group": ninfo.group,
11545         "master_capable": ninfo.master_capable,
11546         "vm_capable": ninfo.vm_capable,
11547         }
11548
11549       node_results[ninfo.name] = pnr
11550
11551     return node_results
11552
11553   @staticmethod
11554   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11555                               node_results):
11556     """Compute global node data.
11557
11558     @param node_results: the basic node structures as filled from the config
11559
11560     """
11561     # make a copy of the current dict
11562     node_results = dict(node_results)
11563     for nname, nresult in node_data.items():
11564       assert nname in node_results, "Missing basic data for node %s" % nname
11565       ninfo = node_cfg[nname]
11566
11567       if not (ninfo.offline or ninfo.drained):
11568         nresult.Raise("Can't get data for node %s" % nname)
11569         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11570                                 nname)
11571         remote_info = nresult.payload
11572
11573         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11574                      'vg_size', 'vg_free', 'cpu_total']:
11575           if attr not in remote_info:
11576             raise errors.OpExecError("Node '%s' didn't return attribute"
11577                                      " '%s'" % (nname, attr))
11578           if not isinstance(remote_info[attr], int):
11579             raise errors.OpExecError("Node '%s' returned invalid value"
11580                                      " for '%s': %s" %
11581                                      (nname, attr, remote_info[attr]))
11582         # compute memory used by primary instances
11583         i_p_mem = i_p_up_mem = 0
11584         for iinfo, beinfo in i_list:
11585           if iinfo.primary_node == nname:
11586             i_p_mem += beinfo[constants.BE_MEMORY]
11587             if iinfo.name not in node_iinfo[nname].payload:
11588               i_used_mem = 0
11589             else:
11590               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11591             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11592             remote_info['memory_free'] -= max(0, i_mem_diff)
11593
11594             if iinfo.admin_up:
11595               i_p_up_mem += beinfo[constants.BE_MEMORY]
11596
11597         # compute memory used by instances
11598         pnr_dyn = {
11599           "total_memory": remote_info['memory_total'],
11600           "reserved_memory": remote_info['memory_dom0'],
11601           "free_memory": remote_info['memory_free'],
11602           "total_disk": remote_info['vg_size'],
11603           "free_disk": remote_info['vg_free'],
11604           "total_cpus": remote_info['cpu_total'],
11605           "i_pri_memory": i_p_mem,
11606           "i_pri_up_memory": i_p_up_mem,
11607           }
11608         pnr_dyn.update(node_results[nname])
11609         node_results[nname] = pnr_dyn
11610
11611     return node_results
11612
11613   @staticmethod
11614   def _ComputeInstanceData(cluster_info, i_list):
11615     """Compute global instance data.
11616
11617     """
11618     instance_data = {}
11619     for iinfo, beinfo in i_list:
11620       nic_data = []
11621       for nic in iinfo.nics:
11622         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11623         nic_dict = {"mac": nic.mac,
11624                     "ip": nic.ip,
11625                     "mode": filled_params[constants.NIC_MODE],
11626                     "link": filled_params[constants.NIC_LINK],
11627                    }
11628         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11629           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11630         nic_data.append(nic_dict)
11631       pir = {
11632         "tags": list(iinfo.GetTags()),
11633         "admin_up": iinfo.admin_up,
11634         "vcpus": beinfo[constants.BE_VCPUS],
11635         "memory": beinfo[constants.BE_MEMORY],
11636         "os": iinfo.os,
11637         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11638         "nics": nic_data,
11639         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11640         "disk_template": iinfo.disk_template,
11641         "hypervisor": iinfo.hypervisor,
11642         }
11643       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11644                                                  pir["disks"])
11645       instance_data[iinfo.name] = pir
11646
11647     return instance_data
11648
11649   def _AddNewInstance(self):
11650     """Add new instance data to allocator structure.
11651
11652     This in combination with _AllocatorGetClusterData will create the
11653     correct structure needed as input for the allocator.
11654
11655     The checks for the completeness of the opcode must have already been
11656     done.
11657
11658     """
11659     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11660
11661     if self.disk_template in constants.DTS_INT_MIRROR:
11662       self.required_nodes = 2
11663     else:
11664       self.required_nodes = 1
11665     request = {
11666       "name": self.name,
11667       "disk_template": self.disk_template,
11668       "tags": self.tags,
11669       "os": self.os,
11670       "vcpus": self.vcpus,
11671       "memory": self.mem_size,
11672       "disks": self.disks,
11673       "disk_space_total": disk_space,
11674       "nics": self.nics,
11675       "required_nodes": self.required_nodes,
11676       }
11677     return request
11678
11679   def _AddRelocateInstance(self):
11680     """Add relocate instance data to allocator structure.
11681
11682     This in combination with _IAllocatorGetClusterData will create the
11683     correct structure needed as input for the allocator.
11684
11685     The checks for the completeness of the opcode must have already been
11686     done.
11687
11688     """
11689     instance = self.cfg.GetInstanceInfo(self.name)
11690     if instance is None:
11691       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11692                                    " IAllocator" % self.name)
11693
11694     if instance.disk_template not in constants.DTS_MIRRORED:
11695       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11696                                  errors.ECODE_INVAL)
11697
11698     if instance.disk_template in constants.DTS_INT_MIRROR and \
11699         len(instance.secondary_nodes) != 1:
11700       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11701                                  errors.ECODE_STATE)
11702
11703     self.required_nodes = 1
11704     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11705     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11706
11707     request = {
11708       "name": self.name,
11709       "disk_space_total": disk_space,
11710       "required_nodes": self.required_nodes,
11711       "relocate_from": self.relocate_from,
11712       }
11713     return request
11714
11715   def _AddEvacuateNodes(self):
11716     """Add evacuate nodes data to allocator structure.
11717
11718     """
11719     request = {
11720       "evac_nodes": self.evac_nodes
11721       }
11722     return request
11723
11724   def _BuildInputData(self, fn):
11725     """Build input data structures.
11726
11727     """
11728     self._ComputeClusterData()
11729
11730     request = fn()
11731     request["type"] = self.mode
11732     self.in_data["request"] = request
11733
11734     self.in_text = serializer.Dump(self.in_data)
11735
11736   def Run(self, name, validate=True, call_fn=None):
11737     """Run an instance allocator and return the results.
11738
11739     """
11740     if call_fn is None:
11741       call_fn = self.rpc.call_iallocator_runner
11742
11743     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11744     result.Raise("Failure while running the iallocator script")
11745
11746     self.out_text = result.payload
11747     if validate:
11748       self._ValidateResult()
11749
11750   def _ValidateResult(self):
11751     """Process the allocator results.
11752
11753     This will process and if successful save the result in
11754     self.out_data and the other parameters.
11755
11756     """
11757     try:
11758       rdict = serializer.Load(self.out_text)
11759     except Exception, err:
11760       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11761
11762     if not isinstance(rdict, dict):
11763       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11764
11765     # TODO: remove backwards compatiblity in later versions
11766     if "nodes" in rdict and "result" not in rdict:
11767       rdict["result"] = rdict["nodes"]
11768       del rdict["nodes"]
11769
11770     for key in "success", "info", "result":
11771       if key not in rdict:
11772         raise errors.OpExecError("Can't parse iallocator results:"
11773                                  " missing key '%s'" % key)
11774       setattr(self, key, rdict[key])
11775
11776     if not isinstance(rdict["result"], list):
11777       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11778                                " is not a list")
11779     self.out_data = rdict
11780
11781
11782 class LUTestAllocator(NoHooksLU):
11783   """Run allocator tests.
11784
11785   This LU runs the allocator tests
11786
11787   """
11788   def CheckPrereq(self):
11789     """Check prerequisites.
11790
11791     This checks the opcode parameters depending on the director and mode test.
11792
11793     """
11794     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11795       for attr in ["mem_size", "disks", "disk_template",
11796                    "os", "tags", "nics", "vcpus"]:
11797         if not hasattr(self.op, attr):
11798           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11799                                      attr, errors.ECODE_INVAL)
11800       iname = self.cfg.ExpandInstanceName(self.op.name)
11801       if iname is not None:
11802         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11803                                    iname, errors.ECODE_EXISTS)
11804       if not isinstance(self.op.nics, list):
11805         raise errors.OpPrereqError("Invalid parameter 'nics'",
11806                                    errors.ECODE_INVAL)
11807       if not isinstance(self.op.disks, list):
11808         raise errors.OpPrereqError("Invalid parameter 'disks'",
11809                                    errors.ECODE_INVAL)
11810       for row in self.op.disks:
11811         if (not isinstance(row, dict) or
11812             "size" not in row or
11813             not isinstance(row["size"], int) or
11814             "mode" not in row or
11815             row["mode"] not in ['r', 'w']):
11816           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11817                                      " parameter", errors.ECODE_INVAL)
11818       if self.op.hypervisor is None:
11819         self.op.hypervisor = self.cfg.GetHypervisorType()
11820     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11821       fname = _ExpandInstanceName(self.cfg, self.op.name)
11822       self.op.name = fname
11823       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11824     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11825       if not hasattr(self.op, "evac_nodes"):
11826         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11827                                    " opcode input", errors.ECODE_INVAL)
11828     else:
11829       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11830                                  self.op.mode, errors.ECODE_INVAL)
11831
11832     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11833       if self.op.allocator is None:
11834         raise errors.OpPrereqError("Missing allocator name",
11835                                    errors.ECODE_INVAL)
11836     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11837       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11838                                  self.op.direction, errors.ECODE_INVAL)
11839
11840   def Exec(self, feedback_fn):
11841     """Run the allocator test.
11842
11843     """
11844     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11845       ial = IAllocator(self.cfg, self.rpc,
11846                        mode=self.op.mode,
11847                        name=self.op.name,
11848                        mem_size=self.op.mem_size,
11849                        disks=self.op.disks,
11850                        disk_template=self.op.disk_template,
11851                        os=self.op.os,
11852                        tags=self.op.tags,
11853                        nics=self.op.nics,
11854                        vcpus=self.op.vcpus,
11855                        hypervisor=self.op.hypervisor,
11856                        )
11857     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11858       ial = IAllocator(self.cfg, self.rpc,
11859                        mode=self.op.mode,
11860                        name=self.op.name,
11861                        relocate_from=list(self.relocate_from),
11862                        )
11863     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11864       ial = IAllocator(self.cfg, self.rpc,
11865                        mode=self.op.mode,
11866                        evac_nodes=self.op.evac_nodes)
11867     else:
11868       raise errors.ProgrammerError("Uncatched mode %s in"
11869                                    " LUTestAllocator.Exec", self.op.mode)
11870
11871     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11872       result = ial.in_text
11873     else:
11874       ial.Run(self.op.allocator, validate=False)
11875       result = ial.out_text
11876     return result
11877
11878
11879 #: Query type implementations
11880 _QUERY_IMPL = {
11881   constants.QR_INSTANCE: _InstanceQuery,
11882   constants.QR_NODE: _NodeQuery,
11883   constants.QR_GROUP: _GroupQuery,
11884   constants.QR_OS: _OsQuery,
11885   }
11886
11887 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
11888
11889
11890 def _GetQueryImplementation(name):
11891   """Returns the implemtnation for a query type.
11892
11893   @param name: Query type, must be one of L{constants.QR_VIA_OP}
11894
11895   """
11896   try:
11897     return _QUERY_IMPL[name]
11898   except KeyError:
11899     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11900                                errors.ECODE_INVAL)