code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_UP = [constants.ADMINST_UP]
  72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
  74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that level
 197       - don't put anything for the BGL level
 198       - if you want all locks at a level use locking.ALL_SET as a value
 199
 200     If you need to share locks (rather than acquire them exclusively) at one
 201     level you can modify self.share_locks, setting a true value (usually 1) for
 202     that level. By default locks are not shared.
 203
 204     This function can also define a list of tasklets, which then will be
 205     executed in order instead of the usual LU-level CheckPrereq and Exec
 206     functions, if those are not defined by the LU.
 207
 208     Examples::
 209
 210       # Acquire all nodes and one instance
 211       self.needed_locks = {
 212         locking.LEVEL_NODE: locking.ALL_SET,
 213         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 214       }
 215       # Acquire just two nodes
 216       self.needed_locks = {
 217         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 218       }
 219       # Acquire no locks
 220       self.needed_locks = {} # No, you can't leave it to the default value None
 221
 222     """
 223     # The implementation of this method is mandatory only if the new LU is
 224     # concurrent, so that old LUs don't need to be changed all at the same
 225     # time.
 226     if self.REQ_BGL:
 227       self.needed_locks = {} # Exclusive LUs don't need locks.
 228     else:
 229       raise NotImplementedError
 230
 231   def DeclareLocks(self, level):
 232     """Declare LU locking needs for a level
 233
 234     While most LUs can just declare their locking needs at ExpandNames time,
 235     sometimes there's the need to calculate some locks after having acquired
 236     the ones before. This function is called just before acquiring locks at a
 237     particular level, but after acquiring the ones at lower levels, and permits
 238     such calculations. It can be used to modify self.needed_locks, and by
 239     default it does nothing.
 240
 241     This function is only called if you have something already set in
 242     self.needed_locks for the level.
 243
 244     @param level: Locking level which is going to be locked
 245     @type level: member of ganeti.locking.LEVELS
 246
 247     """
 248
 249   def CheckPrereq(self):
 250     """Check prerequisites for this LU.
 251
 252     This method should check that the prerequisites for the execution
 253     of this LU are fulfilled. It can do internode communication, but
 254     it should be idempotent - no cluster or system changes are
 255     allowed.
 256
 257     The method should raise errors.OpPrereqError in case something is
 258     not fulfilled. Its return value is ignored.
 259
 260     This method should also update all the parameters of the opcode to
 261     their canonical form if it hasn't been done by ExpandNames before.
 262
 263     """
 264     if self.tasklets is not None:
 265       for (idx, tl) in enumerate(self.tasklets):
 266         logging.debug("Checking prerequisites for tasklet %s/%s",
 267                       idx + 1, len(self.tasklets))
 268         tl.CheckPrereq()
 269     else:
 270       pass
 271
 272   def Exec(self, feedback_fn):
 273     """Execute the LU.
 274
 275     This method should implement the actual work. It should raise
 276     errors.OpExecError for failures that are somewhat dealt with in
 277     code, or expected.
 278
 279     """
 280     if self.tasklets is not None:
 281       for (idx, tl) in enumerate(self.tasklets):
 282         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 283         tl.Exec(feedback_fn)
 284     else:
 285       raise NotImplementedError
 286
 287   def BuildHooksEnv(self):
 288     """Build hooks environment for this LU.
 289
 290     @rtype: dict
 291     @return: Dictionary containing the environment that will be used for
 292       running the hooks for this LU. The keys of the dict must not be prefixed
 293       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 294       will extend the environment with additional variables. If no environment
 295       should be defined, an empty dictionary should be returned (not C{None}).
 296     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 297       will not be called.
 298
 299     """
 300     raise NotImplementedError
 301
 302   def BuildHooksNodes(self):
 303     """Build list of nodes to run LU's hooks.
 304
 305     @rtype: tuple; (list, list)
 306     @return: Tuple containing a list of node names on which the hook
 307       should run before the execution and a list of node names on which the
 308       hook should run after the execution. No nodes should be returned as an
 309       empty list (and not None).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 317     """Notify the LU about the results of its hooks.
 318
 319     This method is called every time a hooks phase is executed, and notifies
 320     the Logical Unit about the hooks' result. The LU can then use it to alter
 321     its result based on the hooks.  By default the method does nothing and the
 322     previous result is passed back unchanged but any LU can define it if it
 323     wants to use the local cluster hook-scripts somehow.
 324
 325     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 326         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 327     @param hook_results: the results of the multi-node hooks rpc call
 328     @param feedback_fn: function used send feedback back to the caller
 329     @param lu_result: the previous Exec result this LU had, or None
 330         in the PRE phase
 331     @return: the new Exec result, based on the previous result
 332         and hook results
 333
 334     """
 335     # API must be kept, thus we ignore the unused argument and could
 336     # be a function warnings
 337     # pylint: disable=W0613,R0201
 338     return lu_result
 339
 340   def _ExpandAndLockInstance(self):
 341     """Helper function to expand and lock an instance.
 342
 343     Many LUs that work on an instance take its name in self.op.instance_name
 344     and need to expand it and then declare the expanded name for locking. This
 345     function does it, and then updates self.op.instance_name to the expanded
 346     name. It also initializes needed_locks as a dict, if this hasn't been done
 347     before.
 348
 349     """
 350     if self.needed_locks is None:
 351       self.needed_locks = {}
 352     else:
 353       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 354         "_ExpandAndLockInstance called with instance-level locks set"
 355     self.op.instance_name = _ExpandInstanceName(self.cfg,
 356                                                 self.op.instance_name)
 357     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 358
 359   def _LockInstancesNodes(self, primary_only=False,
 360                           level=locking.LEVEL_NODE):
 361     """Helper function to declare instances' nodes for locking.
 362
 363     This function should be called after locking one or more instances to lock
 364     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 365     with all primary or secondary nodes for instances already locked and
 366     present in self.needed_locks[locking.LEVEL_INSTANCE].
 367
 368     It should be called from DeclareLocks, and for safety only works if
 369     self.recalculate_locks[locking.LEVEL_NODE] is set.
 370
 371     In the future it may grow parameters to just lock some instance's nodes, or
 372     to just lock primaries or secondary nodes, if needed.
 373
 374     If should be called in DeclareLocks in a way similar to::
 375
 376       if level == locking.LEVEL_NODE:
 377         self._LockInstancesNodes()
 378
 379     @type primary_only: boolean
 380     @param primary_only: only lock primary nodes of locked instances
 381     @param level: Which lock level to use for locking nodes
 382
 383     """
 384     assert level in self.recalculate_locks, \
 385       "_LockInstancesNodes helper function called with no nodes to recalculate"
 386
 387     # TODO: check if we're really been called with the instance locks held
 388
 389     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 390     # future we might want to have different behaviors depending on the value
 391     # of self.recalculate_locks[locking.LEVEL_NODE]
 392     wanted_nodes = []
 393     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 394     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 395       wanted_nodes.append(instance.primary_node)
 396       if not primary_only:
 397         wanted_nodes.extend(instance.secondary_nodes)
 398
 399     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 400       self.needed_locks[level] = wanted_nodes
 401     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 402       self.needed_locks[level].extend(wanted_nodes)
 403     else:
 404       raise errors.ProgrammerError("Unknown recalculation mode")
 405
 406     del self.recalculate_locks[level]
 407
 408
 409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 410   """Simple LU which runs no hooks.
 411
 412   This LU is intended as a parent for other LogicalUnits which will
 413   run no hooks, in order to reduce duplicate code.
 414
 415   """
 416   HPATH = None
 417   HTYPE = None
 418
 419   def BuildHooksEnv(self):
 420     """Empty BuildHooksEnv for NoHooksLu.
 421
 422     This just raises an error.
 423
 424     """
 425     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 426
 427   def BuildHooksNodes(self):
 428     """Empty BuildHooksNodes for NoHooksLU.
 429
 430     """
 431     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 432
 433
 434 class Tasklet:
 435   """Tasklet base class.
 436
 437   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 438   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 439   tasklets know nothing about locks.
 440
 441   Subclasses must follow these rules:
 442     - Implement CheckPrereq
 443     - Implement Exec
 444
 445   """
 446   def __init__(self, lu):
 447     self.lu = lu
 448
 449     # Shortcuts
 450     self.cfg = lu.cfg
 451     self.rpc = lu.rpc
 452
 453   def CheckPrereq(self):
 454     """Check prerequisites for this tasklets.
 455
 456     This method should check whether the prerequisites for the execution of
 457     this tasklet are fulfilled. It can do internode communication, but it
 458     should be idempotent - no cluster or system changes are allowed.
 459
 460     The method should raise errors.OpPrereqError in case something is not
 461     fulfilled. Its return value is ignored.
 462
 463     This method should also update all parameters to their canonical form if it
 464     hasn't been done before.
 465
 466     """
 467     pass
 468
 469   def Exec(self, feedback_fn):
 470     """Execute the tasklet.
 471
 472     This method should implement the actual work. It should raise
 473     errors.OpExecError for failures that are somewhat dealt with in code, or
 474     expected.
 475
 476     """
 477     raise NotImplementedError
 478
 479
 480 class _QueryBase:
 481   """Base for query utility classes.
 482
 483   """
 484   #: Attribute holding field definitions
 485   FIELDS = None
 486
 487   def __init__(self, qfilter, fields, use_locking):
 488     """Initializes this class.
 489
 490     """
 491     self.use_locking = use_locking
 492
 493     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 494                              namefield="name")
 495     self.requested_data = self.query.RequestedData()
 496     self.names = self.query.RequestedNames()
 497
 498     # Sort only if no names were requested
 499     self.sort_by_name = not self.names
 500
 501     self.do_locking = None
 502     self.wanted = None
 503
 504   def _GetNames(self, lu, all_names, lock_level):
 505     """Helper function to determine names asked for in the query.
 506
 507     """
 508     if self.do_locking:
 509       names = lu.owned_locks(lock_level)
 510     else:
 511       names = all_names
 512
 513     if self.wanted == locking.ALL_SET:
 514       assert not self.names
 515       # caller didn't specify names, so ordering is not important
 516       return utils.NiceSort(names)
 517
 518     # caller specified names and we must keep the same order
 519     assert self.names
 520     assert not self.do_locking or lu.glm.is_owned(lock_level)
 521
 522     missing = set(self.wanted).difference(names)
 523     if missing:
 524       raise errors.OpExecError("Some items were removed before retrieving"
 525                                " their data: %s" % missing)
 526
 527     # Return expanded names
 528     return self.wanted
 529
 530   def ExpandNames(self, lu):
 531     """Expand names for this query.
 532
 533     See L{LogicalUnit.ExpandNames}.
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def DeclareLocks(self, lu, level):
 539     """Declare locks for this query.
 540
 541     See L{LogicalUnit.DeclareLocks}.
 542
 543     """
 544     raise NotImplementedError()
 545
 546   def _GetQueryData(self, lu):
 547     """Collects all data for this query.
 548
 549     @return: Query data object
 550
 551     """
 552     raise NotImplementedError()
 553
 554   def NewStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 559                                   sort_by_name=self.sort_by_name)
 560
 561   def OldStyleQuery(self, lu):
 562     """Collect data and execute query.
 563
 564     """
 565     return self.query.OldStyleQuery(self._GetQueryData(lu),
 566                                     sort_by_name=self.sort_by_name)
 567
 568
 569 def _ShareAll():
 570   """Returns a dict declaring all lock levels shared.
 571
 572   """
 573   return dict.fromkeys(locking.LEVELS, 1)
 574
 575
 576 def _MakeLegacyNodeInfo(data):
 577   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 578
 579   Converts the data into a single dictionary. This is fine for most use cases,
 580   but some require information from more than one volume group or hypervisor.
 581
 582   """
 583   (bootid, (vg_info, ), (hv_info, )) = data
 584
 585   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 586     "bootid": bootid,
 587     })
 588
 589
 590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 591   """Checks if the owned node groups are still correct for an instance.
 592
 593   @type cfg: L{config.ConfigWriter}
 594   @param cfg: The cluster configuration
 595   @type instance_name: string
 596   @param instance_name: Instance name
 597   @type owned_groups: set or frozenset
 598   @param owned_groups: List of currently owned node groups
 599
 600   """
 601   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 602
 603   if not owned_groups.issuperset(inst_groups):
 604     raise errors.OpPrereqError("Instance %s's node groups changed since"
 605                                " locks were acquired, current groups are"
 606                                " are '%s', owning groups '%s'; retry the"
 607                                " operation" %
 608                                (instance_name,
 609                                 utils.CommaJoin(inst_groups),
 610                                 utils.CommaJoin(owned_groups)),
 611                                errors.ECODE_STATE)
 612
 613   return inst_groups
 614
 615
 616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 617   """Checks if the instances in a node group are still correct.
 618
 619   @type cfg: L{config.ConfigWriter}
 620   @param cfg: The cluster configuration
 621   @type group_uuid: string
 622   @param group_uuid: Node group UUID
 623   @type owned_instances: set or frozenset
 624   @param owned_instances: List of currently owned instances
 625
 626   """
 627   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 628   if owned_instances != wanted_instances:
 629     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 630                                " locks were acquired, wanted '%s', have '%s';"
 631                                " retry the operation" %
 632                                (group_uuid,
 633                                 utils.CommaJoin(wanted_instances),
 634                                 utils.CommaJoin(owned_instances)),
 635                                errors.ECODE_STATE)
 636
 637   return wanted_instances
 638
 639
 640 def _SupportsOob(cfg, node):
 641   """Tells if node supports OOB.
 642
 643   @type cfg: L{config.ConfigWriter}
 644   @param cfg: The cluster configuration
 645   @type node: L{objects.Node}
 646   @param node: The node
 647   @return: The OOB script if supported or an empty string otherwise
 648
 649   """
 650   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 651
 652
 653 def _GetWantedNodes(lu, nodes):
 654   """Returns list of checked and expanded node names.
 655
 656   @type lu: L{LogicalUnit}
 657   @param lu: the logical unit on whose behalf we execute
 658   @type nodes: list
 659   @param nodes: list of node names or None for all nodes
 660   @rtype: list
 661   @return: the list of nodes, sorted
 662   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 663
 664   """
 665   if nodes:
 666     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 667
 668   return utils.NiceSort(lu.cfg.GetNodeList())
 669
 670
 671 def _GetWantedInstances(lu, instances):
 672   """Returns list of checked and expanded instance names.
 673
 674   @type lu: L{LogicalUnit}
 675   @param lu: the logical unit on whose behalf we execute
 676   @type instances: list
 677   @param instances: list of instance names or None for all instances
 678   @rtype: list
 679   @return: the list of instances, sorted
 680   @raise errors.OpPrereqError: if the instances parameter is wrong type
 681   @raise errors.OpPrereqError: if any of the passed instances is not found
 682
 683   """
 684   if instances:
 685     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 686   else:
 687     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 688   return wanted
 689
 690
 691 def _GetUpdatedParams(old_params, update_dict,
 692                       use_default=True, use_none=False):
 693   """Return the new version of a parameter dictionary.
 694
 695   @type old_params: dict
 696   @param old_params: old parameters
 697   @type update_dict: dict
 698   @param update_dict: dict containing new parameter values, or
 699       constants.VALUE_DEFAULT to reset the parameter to its default
 700       value
 701   @param use_default: boolean
 702   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 703       values as 'to be deleted' values
 704   @param use_none: boolean
 705   @type use_none: whether to recognise C{None} values as 'to be
 706       deleted' values
 707   @rtype: dict
 708   @return: the new parameter dictionary
 709
 710   """
 711   params_copy = copy.deepcopy(old_params)
 712   for key, val in update_dict.iteritems():
 713     if ((use_default and val == constants.VALUE_DEFAULT) or
 714         (use_none and val is None)):
 715       try:
 716         del params_copy[key]
 717       except KeyError:
 718         pass
 719     else:
 720       params_copy[key] = val
 721   return params_copy
 722
 723
 724 def _UpdateAndVerifySubDict(base, updates, type_check):
 725   """Updates and verifies a dict with sub dicts of the same type.
 726
 727   @param base: The dict with the old data
 728   @param updates: The dict with the new data
 729   @param type_check: Dict suitable to ForceDictType to verify correct types
 730   @returns: A new dict with updated and verified values
 731
 732   """
 733   def fn(old, value):
 734     new = _GetUpdatedParams(old, value)
 735     utils.ForceDictType(new, type_check)
 736     return new
 737
 738   ret = copy.deepcopy(base)
 739   ret.update(dict((key, fn(base.get(key, {}), value))
 740                   for key, value in updates.items()))
 741   return ret
 742
 743
 744 def _MergeAndVerifyHvState(op_input, obj_input):
 745   """Combines the hv state from an opcode with the one of the object
 746
 747   @param op_input: The input dict from the opcode
 748   @param obj_input: The input dict from the objects
 749   @return: The verified and updated dict
 750
 751   """
 752   if op_input:
 753     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 754     if invalid_hvs:
 755       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 756                                  " %s" % utils.CommaJoin(invalid_hvs),
 757                                  errors.ECODE_INVAL)
 758     if obj_input is None:
 759       obj_input = {}
 760     type_check = constants.HVSTS_PARAMETER_TYPES
 761     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 762
 763   return None
 764
 765
 766 def _MergeAndVerifyDiskState(op_input, obj_input):
 767   """Combines the disk state from an opcode with the one of the object
 768
 769   @param op_input: The input dict from the opcode
 770   @param obj_input: The input dict from the objects
 771   @return: The verified and updated dict
 772   """
 773   if op_input:
 774     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 775     if invalid_dst:
 776       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 777                                  utils.CommaJoin(invalid_dst),
 778                                  errors.ECODE_INVAL)
 779     type_check = constants.DSS_PARAMETER_TYPES
 780     if obj_input is None:
 781       obj_input = {}
 782     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 783                                               type_check))
 784                 for key, value in op_input.items())
 785
 786   return None
 787
 788
 789 def _ReleaseLocks(lu, level, names=None, keep=None):
 790   """Releases locks owned by an LU.
 791
 792   @type lu: L{LogicalUnit}
 793   @param level: Lock level
 794   @type names: list or None
 795   @param names: Names of locks to release
 796   @type keep: list or None
 797   @param keep: Names of locks to retain
 798
 799   """
 800   assert not (keep is not None and names is not None), \
 801          "Only one of the 'names' and the 'keep' parameters can be given"
 802
 803   if names is not None:
 804     should_release = names.__contains__
 805   elif keep:
 806     should_release = lambda name: name not in keep
 807   else:
 808     should_release = None
 809
 810   owned = lu.owned_locks(level)
 811   if not owned:
 812     # Not owning any lock at this level, do nothing
 813     pass
 814
 815   elif should_release:
 816     retain = []
 817     release = []
 818
 819     # Determine which locks to release
 820     for name in owned:
 821       if should_release(name):
 822         release.append(name)
 823       else:
 824         retain.append(name)
 825
 826     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 827
 828     # Release just some locks
 829     lu.glm.release(level, names=release)
 830
 831     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 832   else:
 833     # Release everything
 834     lu.glm.release(level)
 835
 836     assert not lu.glm.is_owned(level), "No locks should be owned"
 837
 838
 839 def _MapInstanceDisksToNodes(instances):
 840   """Creates a map from (node, volume) to instance name.
 841
 842   @type instances: list of L{objects.Instance}
 843   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 844
 845   """
 846   return dict(((node, vol), inst.name)
 847               for inst in instances
 848               for (node, vols) in inst.MapLVsByNode().items()
 849               for vol in vols)
 850
 851
 852 def _RunPostHook(lu, node_name):
 853   """Runs the post-hook for an opcode on a single node.
 854
 855   """
 856   hm = lu.proc.BuildHooksManager(lu)
 857   try:
 858     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 859   except:
 860     # pylint: disable=W0702
 861     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 862
 863
 864 def _CheckOutputFields(static, dynamic, selected):
 865   """Checks whether all selected fields are valid.
 866
 867   @type static: L{utils.FieldSet}
 868   @param static: static fields set
 869   @type dynamic: L{utils.FieldSet}
 870   @param dynamic: dynamic fields set
 871
 872   """
 873   f = utils.FieldSet()
 874   f.Extend(static)
 875   f.Extend(dynamic)
 876
 877   delta = f.NonMatching(selected)
 878   if delta:
 879     raise errors.OpPrereqError("Unknown output fields selected: %s"
 880                                % ",".join(delta), errors.ECODE_INVAL)
 881
 882
 883 def _CheckGlobalHvParams(params):
 884   """Validates that given hypervisor params are not global ones.
 885
 886   This will ensure that instances don't get customised versions of
 887   global params.
 888
 889   """
 890   used_globals = constants.HVC_GLOBALS.intersection(params)
 891   if used_globals:
 892     msg = ("The following hypervisor parameters are global and cannot"
 893            " be customized at instance level, please modify them at"
 894            " cluster level: %s" % utils.CommaJoin(used_globals))
 895     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 896
 897
 898 def _CheckNodeOnline(lu, node, msg=None):
 899   """Ensure that a given node is online.
 900
 901   @param lu: the LU on behalf of which we make the check
 902   @param node: the node to check
 903   @param msg: if passed, should be a message to replace the default one
 904   @raise errors.OpPrereqError: if the node is offline
 905
 906   """
 907   if msg is None:
 908     msg = "Can't use offline node"
 909   if lu.cfg.GetNodeInfo(node).offline:
 910     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 911
 912
 913 def _CheckNodeNotDrained(lu, node):
 914   """Ensure that a given node is not drained.
 915
 916   @param lu: the LU on behalf of which we make the check
 917   @param node: the node to check
 918   @raise errors.OpPrereqError: if the node is drained
 919
 920   """
 921   if lu.cfg.GetNodeInfo(node).drained:
 922     raise errors.OpPrereqError("Can't use drained node %s" % node,
 923                                errors.ECODE_STATE)
 924
 925
 926 def _CheckNodeVmCapable(lu, node):
 927   """Ensure that a given node is vm capable.
 928
 929   @param lu: the LU on behalf of which we make the check
 930   @param node: the node to check
 931   @raise errors.OpPrereqError: if the node is not vm capable
 932
 933   """
 934   if not lu.cfg.GetNodeInfo(node).vm_capable:
 935     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 936                                errors.ECODE_STATE)
 937
 938
 939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 940   """Ensure that a node supports a given OS.
 941
 942   @param lu: the LU on behalf of which we make the check
 943   @param node: the node to check
 944   @param os_name: the OS to query about
 945   @param force_variant: whether to ignore variant errors
 946   @raise errors.OpPrereqError: if the node is not supporting the OS
 947
 948   """
 949   result = lu.rpc.call_os_get(node, os_name)
 950   result.Raise("OS '%s' not in supported OS list for node %s" %
 951                (os_name, node),
 952                prereq=True, ecode=errors.ECODE_INVAL)
 953   if not force_variant:
 954     _CheckOSVariant(result.payload, os_name)
 955
 956
 957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 958   """Ensure that a node has the given secondary ip.
 959
 960   @type lu: L{LogicalUnit}
 961   @param lu: the LU on behalf of which we make the check
 962   @type node: string
 963   @param node: the node to check
 964   @type secondary_ip: string
 965   @param secondary_ip: the ip to check
 966   @type prereq: boolean
 967   @param prereq: whether to throw a prerequisite or an execute error
 968   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 969   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 970
 971   """
 972   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 973   result.Raise("Failure checking secondary ip on node %s" % node,
 974                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 975   if not result.payload:
 976     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 977            " please fix and re-run this command" % secondary_ip)
 978     if prereq:
 979       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 980     else:
 981       raise errors.OpExecError(msg)
 982
 983
 984 def _GetClusterDomainSecret():
 985   """Reads the cluster domain secret.
 986
 987   """
 988   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 989                                strict=True)
 990
 991
 992 def _CheckInstanceState(lu, instance, req_states, msg=None):
 993   """Ensure that an instance is in one of the required states.
 994
 995   @param lu: the LU on behalf of which we make the check
 996   @param instance: the instance to check
 997   @param msg: if passed, should be a message to replace the default one
 998   @raise errors.OpPrereqError: if the instance is not in the required state
 999
1000   """
1001   if msg is None:
1002     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003   if instance.admin_state not in req_states:
1004     raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005                                (instance, instance.admin_state, msg),
1006                                errors.ECODE_STATE)
1007
1008   if constants.ADMINST_UP not in req_states:
1009     pnode = instance.primary_node
1010     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012                 prereq=True, ecode=errors.ECODE_ENVIRON)
1013
1014     if instance.name in ins_l.payload:
1015       raise errors.OpPrereqError("Instance %s is running, %s" %
1016                                  (instance.name, msg), errors.ECODE_STATE)
1017
1018
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020   """Checks if value is in the desired range.
1021
1022   @param name: name of the parameter for which we perform the check
1023   @param ipolicy: dictionary containing min, max and std values
1024   @param value: actual value that we want to use
1025   @return: None or element not meeting the criteria
1026
1027
1028   """
1029   if value in [None, constants.VALUE_AUTO]:
1030     return None
1031   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1032   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1033   if value > max_v or min_v > value:
1034     return ("%s value %s is not in range [%s, %s]" %
1035             (name, value, min_v, max_v))
1036   return None
1037
1038
1039 def _ExpandItemName(fn, name, kind):
1040   """Expand an item name.
1041
1042   @param fn: the function to use for expansion
1043   @param name: requested item name
1044   @param kind: text description ('Node' or 'Instance')
1045   @return: the resolved (full) name
1046   @raise errors.OpPrereqError: if the item is not found
1047
1048   """
1049   full_name = fn(name)
1050   if full_name is None:
1051     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1052                                errors.ECODE_NOENT)
1053   return full_name
1054
1055
1056 def _ExpandNodeName(cfg, name):
1057   """Wrapper over L{_ExpandItemName} for nodes."""
1058   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1059
1060
1061 def _ExpandInstanceName(cfg, name):
1062   """Wrapper over L{_ExpandItemName} for instance."""
1063   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1064
1065
1066 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1067                           minmem, maxmem, vcpus, nics, disk_template, disks,
1068                           bep, hvp, hypervisor_name, tags):
1069   """Builds instance related env variables for hooks
1070
1071   This builds the hook environment from individual variables.
1072
1073   @type name: string
1074   @param name: the name of the instance
1075   @type primary_node: string
1076   @param primary_node: the name of the instance's primary node
1077   @type secondary_nodes: list
1078   @param secondary_nodes: list of secondary nodes as strings
1079   @type os_type: string
1080   @param os_type: the name of the instance's OS
1081   @type status: string
1082   @param status: the desired status of the instance
1083   @type minmem: string
1084   @param minmem: the minimum memory size of the instance
1085   @type maxmem: string
1086   @param maxmem: the maximum memory size of the instance
1087   @type vcpus: string
1088   @param vcpus: the count of VCPUs the instance has
1089   @type nics: list
1090   @param nics: list of tuples (ip, mac, mode, link) representing
1091       the NICs the instance has
1092   @type disk_template: string
1093   @param disk_template: the disk template of the instance
1094   @type disks: list
1095   @param disks: the list of (size, mode) pairs
1096   @type bep: dict
1097   @param bep: the backend parameters for the instance
1098   @type hvp: dict
1099   @param hvp: the hypervisor parameters for the instance
1100   @type hypervisor_name: string
1101   @param hypervisor_name: the hypervisor for the instance
1102   @type tags: list
1103   @param tags: list of instance tags as strings
1104   @rtype: dict
1105   @return: the hook environment for this instance
1106
1107   """
1108   env = {
1109     "OP_TARGET": name,
1110     "INSTANCE_NAME": name,
1111     "INSTANCE_PRIMARY": primary_node,
1112     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1113     "INSTANCE_OS_TYPE": os_type,
1114     "INSTANCE_STATUS": status,
1115     "INSTANCE_MINMEM": minmem,
1116     "INSTANCE_MAXMEM": maxmem,
1117     # TODO(2.7) remove deprecated "memory" value
1118     "INSTANCE_MEMORY": maxmem,
1119     "INSTANCE_VCPUS": vcpus,
1120     "INSTANCE_DISK_TEMPLATE": disk_template,
1121     "INSTANCE_HYPERVISOR": hypervisor_name,
1122   }
1123   if nics:
1124     nic_count = len(nics)
1125     for idx, (ip, mac, mode, link) in enumerate(nics):
1126       if ip is None:
1127         ip = ""
1128       env["INSTANCE_NIC%d_IP" % idx] = ip
1129       env["INSTANCE_NIC%d_MAC" % idx] = mac
1130       env["INSTANCE_NIC%d_MODE" % idx] = mode
1131       env["INSTANCE_NIC%d_LINK" % idx] = link
1132       if mode == constants.NIC_MODE_BRIDGED:
1133         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1134   else:
1135     nic_count = 0
1136
1137   env["INSTANCE_NIC_COUNT"] = nic_count
1138
1139   if disks:
1140     disk_count = len(disks)
1141     for idx, (size, mode) in enumerate(disks):
1142       env["INSTANCE_DISK%d_SIZE" % idx] = size
1143       env["INSTANCE_DISK%d_MODE" % idx] = mode
1144   else:
1145     disk_count = 0
1146
1147   env["INSTANCE_DISK_COUNT"] = disk_count
1148
1149   if not tags:
1150     tags = []
1151
1152   env["INSTANCE_TAGS"] = " ".join(tags)
1153
1154   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1155     for key, value in source.items():
1156       env["INSTANCE_%s_%s" % (kind, key)] = value
1157
1158   return env
1159
1160
1161 def _NICListToTuple(lu, nics):
1162   """Build a list of nic information tuples.
1163
1164   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1165   value in LUInstanceQueryData.
1166
1167   @type lu:  L{LogicalUnit}
1168   @param lu: the logical unit on whose behalf we execute
1169   @type nics: list of L{objects.NIC}
1170   @param nics: list of nics to convert to hooks tuples
1171
1172   """
1173   hooks_nics = []
1174   cluster = lu.cfg.GetClusterInfo()
1175   for nic in nics:
1176     ip = nic.ip
1177     mac = nic.mac
1178     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1179     mode = filled_params[constants.NIC_MODE]
1180     link = filled_params[constants.NIC_LINK]
1181     hooks_nics.append((ip, mac, mode, link))
1182   return hooks_nics
1183
1184
1185 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1186   """Builds instance related env variables for hooks from an object.
1187
1188   @type lu: L{LogicalUnit}
1189   @param lu: the logical unit on whose behalf we execute
1190   @type instance: L{objects.Instance}
1191   @param instance: the instance for which we should build the
1192       environment
1193   @type override: dict
1194   @param override: dictionary with key/values that will override
1195       our values
1196   @rtype: dict
1197   @return: the hook environment dictionary
1198
1199   """
1200   cluster = lu.cfg.GetClusterInfo()
1201   bep = cluster.FillBE(instance)
1202   hvp = cluster.FillHV(instance)
1203   args = {
1204     "name": instance.name,
1205     "primary_node": instance.primary_node,
1206     "secondary_nodes": instance.secondary_nodes,
1207     "os_type": instance.os,
1208     "status": instance.admin_state,
1209     "maxmem": bep[constants.BE_MAXMEM],
1210     "minmem": bep[constants.BE_MINMEM],
1211     "vcpus": bep[constants.BE_VCPUS],
1212     "nics": _NICListToTuple(lu, instance.nics),
1213     "disk_template": instance.disk_template,
1214     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1215     "bep": bep,
1216     "hvp": hvp,
1217     "hypervisor_name": instance.hypervisor,
1218     "tags": instance.tags,
1219   }
1220   if override:
1221     args.update(override)
1222   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1223
1224
1225 def _AdjustCandidatePool(lu, exceptions):
1226   """Adjust the candidate pool after node operations.
1227
1228   """
1229   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1230   if mod_list:
1231     lu.LogInfo("Promoted nodes to master candidate role: %s",
1232                utils.CommaJoin(node.name for node in mod_list))
1233     for name in mod_list:
1234       lu.context.ReaddNode(name)
1235   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1236   if mc_now > mc_max:
1237     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1238                (mc_now, mc_max))
1239
1240
1241 def _DecideSelfPromotion(lu, exceptions=None):
1242   """Decide whether I should promote myself as a master candidate.
1243
1244   """
1245   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1246   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1247   # the new node will increase mc_max with one, so:
1248   mc_should = min(mc_should + 1, cp_size)
1249   return mc_now < mc_should
1250
1251
1252 def _CalculateGroupIPolicy(cfg, group):
1253   """Calculate instance policy for group.
1254
1255   """
1256   cluster = cfg.GetClusterInfo()
1257   return cluster.SimpleFillIPolicy(group.ipolicy)
1258
1259
1260 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1261   """Check that the brigdes needed by a list of nics exist.
1262
1263   """
1264   cluster = lu.cfg.GetClusterInfo()
1265   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1266   brlist = [params[constants.NIC_LINK] for params in paramslist
1267             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1268   if brlist:
1269     result = lu.rpc.call_bridges_exist(target_node, brlist)
1270     result.Raise("Error checking bridges on destination node '%s'" %
1271                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1272
1273
1274 def _CheckInstanceBridgesExist(lu, instance, node=None):
1275   """Check that the brigdes needed by an instance exist.
1276
1277   """
1278   if node is None:
1279     node = instance.primary_node
1280   _CheckNicsBridgesExist(lu, instance.nics, node)
1281
1282
1283 def _CheckOSVariant(os_obj, name):
1284   """Check whether an OS name conforms to the os variants specification.
1285
1286   @type os_obj: L{objects.OS}
1287   @param os_obj: OS object to check
1288   @type name: string
1289   @param name: OS name passed by the user, to check for validity
1290
1291   """
1292   variant = objects.OS.GetVariant(name)
1293   if not os_obj.supported_variants:
1294     if variant:
1295       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1296                                  " passed)" % (os_obj.name, variant),
1297                                  errors.ECODE_INVAL)
1298     return
1299   if not variant:
1300     raise errors.OpPrereqError("OS name must include a variant",
1301                                errors.ECODE_INVAL)
1302
1303   if variant not in os_obj.supported_variants:
1304     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1305
1306
1307 def _GetNodeInstancesInner(cfg, fn):
1308   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1309
1310
1311 def _GetNodeInstances(cfg, node_name):
1312   """Returns a list of all primary and secondary instances on a node.
1313
1314   """
1315
1316   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1317
1318
1319 def _GetNodePrimaryInstances(cfg, node_name):
1320   """Returns primary instances on a node.
1321
1322   """
1323   return _GetNodeInstancesInner(cfg,
1324                                 lambda inst: node_name == inst.primary_node)
1325
1326
1327 def _GetNodeSecondaryInstances(cfg, node_name):
1328   """Returns secondary instances on a node.
1329
1330   """
1331   return _GetNodeInstancesInner(cfg,
1332                                 lambda inst: node_name in inst.secondary_nodes)
1333
1334
1335 def _GetStorageTypeArgs(cfg, storage_type):
1336   """Returns the arguments for a storage type.
1337
1338   """
1339   # Special case for file storage
1340   if storage_type == constants.ST_FILE:
1341     # storage.FileStorage wants a list of storage directories
1342     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1343
1344   return []
1345
1346
1347 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1348   faulty = []
1349
1350   for dev in instance.disks:
1351     cfg.SetDiskID(dev, node_name)
1352
1353   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1354   result.Raise("Failed to get disk status from node %s" % node_name,
1355                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1356
1357   for idx, bdev_status in enumerate(result.payload):
1358     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1359       faulty.append(idx)
1360
1361   return faulty
1362
1363
1364 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1365   """Check the sanity of iallocator and node arguments and use the
1366   cluster-wide iallocator if appropriate.
1367
1368   Check that at most one of (iallocator, node) is specified. If none is
1369   specified, then the LU's opcode's iallocator slot is filled with the
1370   cluster-wide default iallocator.
1371
1372   @type iallocator_slot: string
1373   @param iallocator_slot: the name of the opcode iallocator slot
1374   @type node_slot: string
1375   @param node_slot: the name of the opcode target node slot
1376
1377   """
1378   node = getattr(lu.op, node_slot, None)
1379   iallocator = getattr(lu.op, iallocator_slot, None)
1380
1381   if node is not None and iallocator is not None:
1382     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1383                                errors.ECODE_INVAL)
1384   elif node is None and iallocator is None:
1385     default_iallocator = lu.cfg.GetDefaultIAllocator()
1386     if default_iallocator:
1387       setattr(lu.op, iallocator_slot, default_iallocator)
1388     else:
1389       raise errors.OpPrereqError("No iallocator or node given and no"
1390                                  " cluster-wide default iallocator found;"
1391                                  " please specify either an iallocator or a"
1392                                  " node, or set a cluster-wide default"
1393                                  " iallocator")
1394
1395
1396 def _GetDefaultIAllocator(cfg, iallocator):
1397   """Decides on which iallocator to use.
1398
1399   @type cfg: L{config.ConfigWriter}
1400   @param cfg: Cluster configuration object
1401   @type iallocator: string or None
1402   @param iallocator: Iallocator specified in opcode
1403   @rtype: string
1404   @return: Iallocator name
1405
1406   """
1407   if not iallocator:
1408     # Use default iallocator
1409     iallocator = cfg.GetDefaultIAllocator()
1410
1411   if not iallocator:
1412     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1413                                " opcode nor as a cluster-wide default",
1414                                errors.ECODE_INVAL)
1415
1416   return iallocator
1417
1418
1419 class LUClusterPostInit(LogicalUnit):
1420   """Logical unit for running hooks after cluster initialization.
1421
1422   """
1423   HPATH = "cluster-init"
1424   HTYPE = constants.HTYPE_CLUSTER
1425
1426   def BuildHooksEnv(self):
1427     """Build hooks env.
1428
1429     """
1430     return {
1431       "OP_TARGET": self.cfg.GetClusterName(),
1432       }
1433
1434   def BuildHooksNodes(self):
1435     """Build hooks nodes.
1436
1437     """
1438     return ([], [self.cfg.GetMasterNode()])
1439
1440   def Exec(self, feedback_fn):
1441     """Nothing to do.
1442
1443     """
1444     return True
1445
1446
1447 class LUClusterDestroy(LogicalUnit):
1448   """Logical unit for destroying the cluster.
1449
1450   """
1451   HPATH = "cluster-destroy"
1452   HTYPE = constants.HTYPE_CLUSTER
1453
1454   def BuildHooksEnv(self):
1455     """Build hooks env.
1456
1457     """
1458     return {
1459       "OP_TARGET": self.cfg.GetClusterName(),
1460       }
1461
1462   def BuildHooksNodes(self):
1463     """Build hooks nodes.
1464
1465     """
1466     return ([], [])
1467
1468   def CheckPrereq(self):
1469     """Check prerequisites.
1470
1471     This checks whether the cluster is empty.
1472
1473     Any errors are signaled by raising errors.OpPrereqError.
1474
1475     """
1476     master = self.cfg.GetMasterNode()
1477
1478     nodelist = self.cfg.GetNodeList()
1479     if len(nodelist) != 1 or nodelist[0] != master:
1480       raise errors.OpPrereqError("There are still %d node(s) in"
1481                                  " this cluster." % (len(nodelist) - 1),
1482                                  errors.ECODE_INVAL)
1483     instancelist = self.cfg.GetInstanceList()
1484     if instancelist:
1485       raise errors.OpPrereqError("There are still %d instance(s) in"
1486                                  " this cluster." % len(instancelist),
1487                                  errors.ECODE_INVAL)
1488
1489   def Exec(self, feedback_fn):
1490     """Destroys the cluster.
1491
1492     """
1493     master_params = self.cfg.GetMasterNetworkParameters()
1494
1495     # Run post hooks on master node before it's removed
1496     _RunPostHook(self, master_params.name)
1497
1498     ems = self.cfg.GetUseExternalMipScript()
1499     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1500                                                      master_params, ems)
1501     result.Raise("Could not disable the master role")
1502
1503     return master_params.name
1504
1505
1506 def _VerifyCertificate(filename):
1507   """Verifies a certificate for L{LUClusterVerifyConfig}.
1508
1509   @type filename: string
1510   @param filename: Path to PEM file
1511
1512   """
1513   try:
1514     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1515                                            utils.ReadFile(filename))
1516   except Exception, err: # pylint: disable=W0703
1517     return (LUClusterVerifyConfig.ETYPE_ERROR,
1518             "Failed to load X509 certificate %s: %s" % (filename, err))
1519
1520   (errcode, msg) = \
1521     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1522                                 constants.SSL_CERT_EXPIRATION_ERROR)
1523
1524   if msg:
1525     fnamemsg = "While verifying %s: %s" % (filename, msg)
1526   else:
1527     fnamemsg = None
1528
1529   if errcode is None:
1530     return (None, fnamemsg)
1531   elif errcode == utils.CERT_WARNING:
1532     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1533   elif errcode == utils.CERT_ERROR:
1534     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1535
1536   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1537
1538
1539 def _GetAllHypervisorParameters(cluster, instances):
1540   """Compute the set of all hypervisor parameters.
1541
1542   @type cluster: L{objects.Cluster}
1543   @param cluster: the cluster object
1544   @param instances: list of L{objects.Instance}
1545   @param instances: additional instances from which to obtain parameters
1546   @rtype: list of (origin, hypervisor, parameters)
1547   @return: a list with all parameters found, indicating the hypervisor they
1548        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1549
1550   """
1551   hvp_data = []
1552
1553   for hv_name in cluster.enabled_hypervisors:
1554     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1555
1556   for os_name, os_hvp in cluster.os_hvp.items():
1557     for hv_name, hv_params in os_hvp.items():
1558       if hv_params:
1559         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1560         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1561
1562   # TODO: collapse identical parameter values in a single one
1563   for instance in instances:
1564     if instance.hvparams:
1565       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1566                        cluster.FillHV(instance)))
1567
1568   return hvp_data
1569
1570
1571 class _VerifyErrors(object):
1572   """Mix-in for cluster/group verify LUs.
1573
1574   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1575   self.op and self._feedback_fn to be available.)
1576
1577   """
1578
1579   ETYPE_FIELD = "code"
1580   ETYPE_ERROR = "ERROR"
1581   ETYPE_WARNING = "WARNING"
1582
1583   def _Error(self, ecode, item, msg, *args, **kwargs):
1584     """Format an error message.
1585
1586     Based on the opcode's error_codes parameter, either format a
1587     parseable error code, or a simpler error string.
1588
1589     This must be called only from Exec and functions called from Exec.
1590
1591     """
1592     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1593     itype, etxt, _ = ecode
1594     # first complete the msg
1595     if args:
1596       msg = msg % args
1597     # then format the whole message
1598     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1599       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1600     else:
1601       if item:
1602         item = " " + item
1603       else:
1604         item = ""
1605       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1606     # and finally report it via the feedback_fn
1607     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1608
1609   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1610     """Log an error message if the passed condition is True.
1611
1612     """
1613     cond = (bool(cond)
1614             or self.op.debug_simulate_errors) # pylint: disable=E1101
1615
1616     # If the error code is in the list of ignored errors, demote the error to a
1617     # warning
1618     (_, etxt, _) = ecode
1619     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1620       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1621
1622     if cond:
1623       self._Error(ecode, *args, **kwargs)
1624
1625     # do not mark the operation as failed for WARN cases only
1626     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1627       self.bad = self.bad or cond
1628
1629
1630 class LUClusterVerify(NoHooksLU):
1631   """Submits all jobs necessary to verify the cluster.
1632
1633   """
1634   REQ_BGL = False
1635
1636   def ExpandNames(self):
1637     self.needed_locks = {}
1638
1639   def Exec(self, feedback_fn):
1640     jobs = []
1641
1642     if self.op.group_name:
1643       groups = [self.op.group_name]
1644       depends_fn = lambda: None
1645     else:
1646       groups = self.cfg.GetNodeGroupList()
1647
1648       # Verify global configuration
1649       jobs.append([
1650         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1651         ])
1652
1653       # Always depend on global verification
1654       depends_fn = lambda: [(-len(jobs), [])]
1655
1656     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1657                                             ignore_errors=self.op.ignore_errors,
1658                                             depends=depends_fn())]
1659                 for group in groups)
1660
1661     # Fix up all parameters
1662     for op in itertools.chain(*jobs): # pylint: disable=W0142
1663       op.debug_simulate_errors = self.op.debug_simulate_errors
1664       op.verbose = self.op.verbose
1665       op.error_codes = self.op.error_codes
1666       try:
1667         op.skip_checks = self.op.skip_checks
1668       except AttributeError:
1669         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1670
1671     return ResultWithJobs(jobs)
1672
1673
1674 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1675   """Verifies the cluster config.
1676
1677   """
1678   REQ_BGL = True
1679
1680   def _VerifyHVP(self, hvp_data):
1681     """Verifies locally the syntax of the hypervisor parameters.
1682
1683     """
1684     for item, hv_name, hv_params in hvp_data:
1685       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1686              (item, hv_name))
1687       try:
1688         hv_class = hypervisor.GetHypervisor(hv_name)
1689         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1690         hv_class.CheckParameterSyntax(hv_params)
1691       except errors.GenericError, err:
1692         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1693
1694   def ExpandNames(self):
1695     # Information can be safely retrieved as the BGL is acquired in exclusive
1696     # mode
1697     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1698     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1699     self.all_node_info = self.cfg.GetAllNodesInfo()
1700     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1701     self.needed_locks = {}
1702
1703   def Exec(self, feedback_fn):
1704     """Verify integrity of cluster, performing various test on nodes.
1705
1706     """
1707     self.bad = False
1708     self._feedback_fn = feedback_fn
1709
1710     feedback_fn("* Verifying cluster config")
1711
1712     for msg in self.cfg.VerifyConfig():
1713       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1714
1715     feedback_fn("* Verifying cluster certificate files")
1716
1717     for cert_filename in constants.ALL_CERT_FILES:
1718       (errcode, msg) = _VerifyCertificate(cert_filename)
1719       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1720
1721     feedback_fn("* Verifying hypervisor parameters")
1722
1723     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1724                                                 self.all_inst_info.values()))
1725
1726     feedback_fn("* Verifying all nodes belong to an existing group")
1727
1728     # We do this verification here because, should this bogus circumstance
1729     # occur, it would never be caught by VerifyGroup, which only acts on
1730     # nodes/instances reachable from existing node groups.
1731
1732     dangling_nodes = set(node.name for node in self.all_node_info.values()
1733                          if node.group not in self.all_group_info)
1734
1735     dangling_instances = {}
1736     no_node_instances = []
1737
1738     for inst in self.all_inst_info.values():
1739       if inst.primary_node in dangling_nodes:
1740         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1741       elif inst.primary_node not in self.all_node_info:
1742         no_node_instances.append(inst.name)
1743
1744     pretty_dangling = [
1745         "%s (%s)" %
1746         (node.name,
1747          utils.CommaJoin(dangling_instances.get(node.name,
1748                                                 ["no instances"])))
1749         for node in dangling_nodes]
1750
1751     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1752                   None,
1753                   "the following nodes (and their instances) belong to a non"
1754                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1755
1756     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1757                   None,
1758                   "the following instances have a non-existing primary-node:"
1759                   " %s", utils.CommaJoin(no_node_instances))
1760
1761     return not self.bad
1762
1763
1764 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1765   """Verifies the status of a node group.
1766
1767   """
1768   HPATH = "cluster-verify"
1769   HTYPE = constants.HTYPE_CLUSTER
1770   REQ_BGL = False
1771
1772   _HOOKS_INDENT_RE = re.compile("^", re.M)
1773
1774   class NodeImage(object):
1775     """A class representing the logical and physical status of a node.
1776
1777     @type name: string
1778     @ivar name: the node name to which this object refers
1779     @ivar volumes: a structure as returned from
1780         L{ganeti.backend.GetVolumeList} (runtime)
1781     @ivar instances: a list of running instances (runtime)
1782     @ivar pinst: list of configured primary instances (config)
1783     @ivar sinst: list of configured secondary instances (config)
1784     @ivar sbp: dictionary of {primary-node: list of instances} for all
1785         instances for which this node is secondary (config)
1786     @ivar mfree: free memory, as reported by hypervisor (runtime)
1787     @ivar dfree: free disk, as reported by the node (runtime)
1788     @ivar offline: the offline status (config)
1789     @type rpc_fail: boolean
1790     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1791         not whether the individual keys were correct) (runtime)
1792     @type lvm_fail: boolean
1793     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1794     @type hyp_fail: boolean
1795     @ivar hyp_fail: whether the RPC call didn't return the instance list
1796     @type ghost: boolean
1797     @ivar ghost: whether this is a known node or not (config)
1798     @type os_fail: boolean
1799     @ivar os_fail: whether the RPC call didn't return valid OS data
1800     @type oslist: list
1801     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1802     @type vm_capable: boolean
1803     @ivar vm_capable: whether the node can host instances
1804
1805     """
1806     def __init__(self, offline=False, name=None, vm_capable=True):
1807       self.name = name
1808       self.volumes = {}
1809       self.instances = []
1810       self.pinst = []
1811       self.sinst = []
1812       self.sbp = {}
1813       self.mfree = 0
1814       self.dfree = 0
1815       self.offline = offline
1816       self.vm_capable = vm_capable
1817       self.rpc_fail = False
1818       self.lvm_fail = False
1819       self.hyp_fail = False
1820       self.ghost = False
1821       self.os_fail = False
1822       self.oslist = {}
1823
1824   def ExpandNames(self):
1825     # This raises errors.OpPrereqError on its own:
1826     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1827
1828     # Get instances in node group; this is unsafe and needs verification later
1829     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1830
1831     self.needed_locks = {
1832       locking.LEVEL_INSTANCE: inst_names,
1833       locking.LEVEL_NODEGROUP: [self.group_uuid],
1834       locking.LEVEL_NODE: [],
1835       }
1836
1837     self.share_locks = _ShareAll()
1838
1839   def DeclareLocks(self, level):
1840     if level == locking.LEVEL_NODE:
1841       # Get members of node group; this is unsafe and needs verification later
1842       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1843
1844       all_inst_info = self.cfg.GetAllInstancesInfo()
1845
1846       # In Exec(), we warn about mirrored instances that have primary and
1847       # secondary living in separate node groups. To fully verify that
1848       # volumes for these instances are healthy, we will need to do an
1849       # extra call to their secondaries. We ensure here those nodes will
1850       # be locked.
1851       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1852         # Important: access only the instances whose lock is owned
1853         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1854           nodes.update(all_inst_info[inst].secondary_nodes)
1855
1856       self.needed_locks[locking.LEVEL_NODE] = nodes
1857
1858   def CheckPrereq(self):
1859     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1860     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1861
1862     group_nodes = set(self.group_info.members)
1863     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1864
1865     unlocked_nodes = \
1866         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1867
1868     unlocked_instances = \
1869         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1870
1871     if unlocked_nodes:
1872       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1873                                  utils.CommaJoin(unlocked_nodes))
1874
1875     if unlocked_instances:
1876       raise errors.OpPrereqError("Missing lock for instances: %s" %
1877                                  utils.CommaJoin(unlocked_instances))
1878
1879     self.all_node_info = self.cfg.GetAllNodesInfo()
1880     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1881
1882     self.my_node_names = utils.NiceSort(group_nodes)
1883     self.my_inst_names = utils.NiceSort(group_instances)
1884
1885     self.my_node_info = dict((name, self.all_node_info[name])
1886                              for name in self.my_node_names)
1887
1888     self.my_inst_info = dict((name, self.all_inst_info[name])
1889                              for name in self.my_inst_names)
1890
1891     # We detect here the nodes that will need the extra RPC calls for verifying
1892     # split LV volumes; they should be locked.
1893     extra_lv_nodes = set()
1894
1895     for inst in self.my_inst_info.values():
1896       if inst.disk_template in constants.DTS_INT_MIRROR:
1897         group = self.my_node_info[inst.primary_node].group
1898         for nname in inst.secondary_nodes:
1899           if self.all_node_info[nname].group != group:
1900             extra_lv_nodes.add(nname)
1901
1902     unlocked_lv_nodes = \
1903         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1904
1905     if unlocked_lv_nodes:
1906       raise errors.OpPrereqError("these nodes could be locked: %s" %
1907                                  utils.CommaJoin(unlocked_lv_nodes))
1908     self.extra_lv_nodes = list(extra_lv_nodes)
1909
1910   def _VerifyNode(self, ninfo, nresult):
1911     """Perform some basic validation on data returned from a node.
1912
1913       - check the result data structure is well formed and has all the
1914         mandatory fields
1915       - check ganeti version
1916
1917     @type ninfo: L{objects.Node}
1918     @param ninfo: the node to check
1919     @param nresult: the results from the node
1920     @rtype: boolean
1921     @return: whether overall this call was successful (and we can expect
1922          reasonable values in the respose)
1923
1924     """
1925     node = ninfo.name
1926     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1927
1928     # main result, nresult should be a non-empty dict
1929     test = not nresult or not isinstance(nresult, dict)
1930     _ErrorIf(test, constants.CV_ENODERPC, node,
1931                   "unable to verify node: no data returned")
1932     if test:
1933       return False
1934
1935     # compares ganeti version
1936     local_version = constants.PROTOCOL_VERSION
1937     remote_version = nresult.get("version", None)
1938     test = not (remote_version and
1939                 isinstance(remote_version, (list, tuple)) and
1940                 len(remote_version) == 2)
1941     _ErrorIf(test, constants.CV_ENODERPC, node,
1942              "connection to node returned invalid data")
1943     if test:
1944       return False
1945
1946     test = local_version != remote_version[0]
1947     _ErrorIf(test, constants.CV_ENODEVERSION, node,
1948              "incompatible protocol versions: master %s,"
1949              " node %s", local_version, remote_version[0])
1950     if test:
1951       return False
1952
1953     # node seems compatible, we can actually try to look into its results
1954
1955     # full package version
1956     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1957                   constants.CV_ENODEVERSION, node,
1958                   "software version mismatch: master %s, node %s",
1959                   constants.RELEASE_VERSION, remote_version[1],
1960                   code=self.ETYPE_WARNING)
1961
1962     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1963     if ninfo.vm_capable and isinstance(hyp_result, dict):
1964       for hv_name, hv_result in hyp_result.iteritems():
1965         test = hv_result is not None
1966         _ErrorIf(test, constants.CV_ENODEHV, node,
1967                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1968
1969     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1970     if ninfo.vm_capable and isinstance(hvp_result, list):
1971       for item, hv_name, hv_result in hvp_result:
1972         _ErrorIf(True, constants.CV_ENODEHV, node,
1973                  "hypervisor %s parameter verify failure (source %s): %s",
1974                  hv_name, item, hv_result)
1975
1976     test = nresult.get(constants.NV_NODESETUP,
1977                        ["Missing NODESETUP results"])
1978     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1979              "; ".join(test))
1980
1981     return True
1982
1983   def _VerifyNodeTime(self, ninfo, nresult,
1984                       nvinfo_starttime, nvinfo_endtime):
1985     """Check the node time.
1986
1987     @type ninfo: L{objects.Node}
1988     @param ninfo: the node to check
1989     @param nresult: the remote results for the node
1990     @param nvinfo_starttime: the start time of the RPC call
1991     @param nvinfo_endtime: the end time of the RPC call
1992
1993     """
1994     node = ninfo.name
1995     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1996
1997     ntime = nresult.get(constants.NV_TIME, None)
1998     try:
1999       ntime_merged = utils.MergeTime(ntime)
2000     except (ValueError, TypeError):
2001       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2002       return
2003
2004     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2005       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2006     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2007       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2008     else:
2009       ntime_diff = None
2010
2011     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2012              "Node time diverges by at least %s from master node time",
2013              ntime_diff)
2014
2015   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2016     """Check the node LVM results.
2017
2018     @type ninfo: L{objects.Node}
2019     @param ninfo: the node to check
2020     @param nresult: the remote results for the node
2021     @param vg_name: the configured VG name
2022
2023     """
2024     if vg_name is None:
2025       return
2026
2027     node = ninfo.name
2028     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2029
2030     # checks vg existence and size > 20G
2031     vglist = nresult.get(constants.NV_VGLIST, None)
2032     test = not vglist
2033     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2034     if not test:
2035       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2036                                             constants.MIN_VG_SIZE)
2037       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2038
2039     # check pv names
2040     pvlist = nresult.get(constants.NV_PVLIST, None)
2041     test = pvlist is None
2042     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2043     if not test:
2044       # check that ':' is not present in PV names, since it's a
2045       # special character for lvcreate (denotes the range of PEs to
2046       # use on the PV)
2047       for _, pvname, owner_vg in pvlist:
2048         test = ":" in pvname
2049         _ErrorIf(test, constants.CV_ENODELVM, node,
2050                  "Invalid character ':' in PV '%s' of VG '%s'",
2051                  pvname, owner_vg)
2052
2053   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2054     """Check the node bridges.
2055
2056     @type ninfo: L{objects.Node}
2057     @param ninfo: the node to check
2058     @param nresult: the remote results for the node
2059     @param bridges: the expected list of bridges
2060
2061     """
2062     if not bridges:
2063       return
2064
2065     node = ninfo.name
2066     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2067
2068     missing = nresult.get(constants.NV_BRIDGES, None)
2069     test = not isinstance(missing, list)
2070     _ErrorIf(test, constants.CV_ENODENET, node,
2071              "did not return valid bridge information")
2072     if not test:
2073       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2074                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2075
2076   def _VerifyNodeUserScripts(self, ninfo, nresult):
2077     """Check the results of user scripts presence and executability on the node
2078
2079     @type ninfo: L{objects.Node}
2080     @param ninfo: the node to check
2081     @param nresult: the remote results for the node
2082
2083     """
2084     node = ninfo.name
2085
2086     test = not constants.NV_USERSCRIPTS in nresult
2087     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2088                   "did not return user scripts information")
2089
2090     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2091     if not test:
2092       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2093                     "user scripts not present or not executable: %s" %
2094                     utils.CommaJoin(sorted(broken_scripts)))
2095
2096   def _VerifyNodeNetwork(self, ninfo, nresult):
2097     """Check the node network connectivity results.
2098
2099     @type ninfo: L{objects.Node}
2100     @param ninfo: the node to check
2101     @param nresult: the remote results for the node
2102
2103     """
2104     node = ninfo.name
2105     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2106
2107     test = constants.NV_NODELIST not in nresult
2108     _ErrorIf(test, constants.CV_ENODESSH, node,
2109              "node hasn't returned node ssh connectivity data")
2110     if not test:
2111       if nresult[constants.NV_NODELIST]:
2112         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2113           _ErrorIf(True, constants.CV_ENODESSH, node,
2114                    "ssh communication with node '%s': %s", a_node, a_msg)
2115
2116     test = constants.NV_NODENETTEST not in nresult
2117     _ErrorIf(test, constants.CV_ENODENET, node,
2118              "node hasn't returned node tcp connectivity data")
2119     if not test:
2120       if nresult[constants.NV_NODENETTEST]:
2121         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2122         for anode in nlist:
2123           _ErrorIf(True, constants.CV_ENODENET, node,
2124                    "tcp communication with node '%s': %s",
2125                    anode, nresult[constants.NV_NODENETTEST][anode])
2126
2127     test = constants.NV_MASTERIP not in nresult
2128     _ErrorIf(test, constants.CV_ENODENET, node,
2129              "node hasn't returned node master IP reachability data")
2130     if not test:
2131       if not nresult[constants.NV_MASTERIP]:
2132         if node == self.master_node:
2133           msg = "the master node cannot reach the master IP (not configured?)"
2134         else:
2135           msg = "cannot reach the master IP"
2136         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2137
2138   def _VerifyInstancePolicy(self, instance):
2139     """Verify instance specs against instance policy set on node group level.
2140
2141
2142     """
2143     cluster = self.cfg.GetClusterInfo()
2144     full_beparams = cluster.FillBE(instance)
2145     ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
2146
2147     mem_size = full_beparams.get(constants.BE_MAXMEM, None)
2148     cpu_count = full_beparams.get(constants.BE_VCPUS, None)
2149     disk_count = len(instance.disks)
2150     disk_sizes = [disk.size for disk in instance.disks]
2151     nic_count = len(instance.nics)
2152
2153     test_settings = [
2154       (constants.ISPEC_MEM_SIZE, mem_size),
2155       (constants.ISPEC_CPU_COUNT, cpu_count),
2156       (constants.ISPEC_DISK_COUNT, disk_count),
2157       (constants.ISPEC_NIC_COUNT, nic_count),
2158       ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
2159
2160     for (name, value) in test_settings:
2161       test_result = _CheckMinMaxSpecs(name, ipolicy, value)
2162       self._ErrorIf(test_result is not None,
2163                     constants.CV_EINSTANCEPOLICY, instance.name,
2164                     test_result)
2165
2166   def _VerifyInstance(self, instance, instanceconfig, node_image,
2167                       diskstatus):
2168     """Verify an instance.
2169
2170     This function checks to see if the required block devices are
2171     available on the instance's node.
2172
2173     """
2174     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2175     node_current = instanceconfig.primary_node
2176
2177     node_vol_should = {}
2178     instanceconfig.MapLVsByNode(node_vol_should)
2179
2180     self._VerifyInstancePolicy(instanceconfig)
2181
2182     for node in node_vol_should:
2183       n_img = node_image[node]
2184       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2185         # ignore missing volumes on offline or broken nodes
2186         continue
2187       for volume in node_vol_should[node]:
2188         test = volume not in n_img.volumes
2189         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2190                  "volume %s missing on node %s", volume, node)
2191
2192     if instanceconfig.admin_state == constants.ADMINST_UP:
2193       pri_img = node_image[node_current]
2194       test = instance not in pri_img.instances and not pri_img.offline
2195       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2196                "instance not running on its primary node %s",
2197                node_current)
2198
2199     diskdata = [(nname, success, status, idx)
2200                 for (nname, disks) in diskstatus.items()
2201                 for idx, (success, status) in enumerate(disks)]
2202
2203     for nname, success, bdev_status, idx in diskdata:
2204       # the 'ghost node' construction in Exec() ensures that we have a
2205       # node here
2206       snode = node_image[nname]
2207       bad_snode = snode.ghost or snode.offline
2208       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2209                not success and not bad_snode,
2210                constants.CV_EINSTANCEFAULTYDISK, instance,
2211                "couldn't retrieve status for disk/%s on %s: %s",
2212                idx, nname, bdev_status)
2213       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2214                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2215                constants.CV_EINSTANCEFAULTYDISK, instance,
2216                "disk/%s on %s is faulty", idx, nname)
2217
2218   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2219     """Verify if there are any unknown volumes in the cluster.
2220
2221     The .os, .swap and backup volumes are ignored. All other volumes are
2222     reported as unknown.
2223
2224     @type reserved: L{ganeti.utils.FieldSet}
2225     @param reserved: a FieldSet of reserved volume names
2226
2227     """
2228     for node, n_img in node_image.items():
2229       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2230         # skip non-healthy nodes
2231         continue
2232       for volume in n_img.volumes:
2233         test = ((node not in node_vol_should or
2234                 volume not in node_vol_should[node]) and
2235                 not reserved.Matches(volume))
2236         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2237                       "volume %s is unknown", volume)
2238
2239   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2240     """Verify N+1 Memory Resilience.
2241
2242     Check that if one single node dies we can still start all the
2243     instances it was primary for.
2244
2245     """
2246     cluster_info = self.cfg.GetClusterInfo()
2247     for node, n_img in node_image.items():
2248       # This code checks that every node which is now listed as
2249       # secondary has enough memory to host all instances it is
2250       # supposed to should a single other node in the cluster fail.
2251       # FIXME: not ready for failover to an arbitrary node
2252       # FIXME: does not support file-backed instances
2253       # WARNING: we currently take into account down instances as well
2254       # as up ones, considering that even if they're down someone
2255       # might want to start them even in the event of a node failure.
2256       if n_img.offline:
2257         # we're skipping offline nodes from the N+1 warning, since
2258         # most likely we don't have good memory infromation from them;
2259         # we already list instances living on such nodes, and that's
2260         # enough warning
2261         continue
2262       #TODO(dynmem): use MINMEM for checking
2263       #TODO(dynmem): also consider ballooning out other instances
2264       for prinode, instances in n_img.sbp.items():
2265         needed_mem = 0
2266         for instance in instances:
2267           bep = cluster_info.FillBE(instance_cfg[instance])
2268           if bep[constants.BE_AUTO_BALANCE]:
2269             needed_mem += bep[constants.BE_MAXMEM]
2270         test = n_img.mfree < needed_mem
2271         self._ErrorIf(test, constants.CV_ENODEN1, node,
2272                       "not enough memory to accomodate instance failovers"
2273                       " should node %s fail (%dMiB needed, %dMiB available)",
2274                       prinode, needed_mem, n_img.mfree)
2275
2276   @classmethod
2277   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2278                    (files_all, files_opt, files_mc, files_vm)):
2279     """Verifies file checksums collected from all nodes.
2280
2281     @param errorif: Callback for reporting errors
2282     @param nodeinfo: List of L{objects.Node} objects
2283     @param master_node: Name of master node
2284     @param all_nvinfo: RPC results
2285
2286     """
2287     # Define functions determining which nodes to consider for a file
2288     files2nodefn = [
2289       (files_all, None),
2290       (files_mc, lambda node: (node.master_candidate or
2291                                node.name == master_node)),
2292       (files_vm, lambda node: node.vm_capable),
2293       ]
2294
2295     # Build mapping from filename to list of nodes which should have the file
2296     nodefiles = {}
2297     for (files, fn) in files2nodefn:
2298       if fn is None:
2299         filenodes = nodeinfo
2300       else:
2301         filenodes = filter(fn, nodeinfo)
2302       nodefiles.update((filename,
2303                         frozenset(map(operator.attrgetter("name"), filenodes)))
2304                        for filename in files)
2305
2306     assert set(nodefiles) == (files_all | files_mc | files_vm)
2307
2308     fileinfo = dict((filename, {}) for filename in nodefiles)
2309     ignore_nodes = set()
2310
2311     for node in nodeinfo:
2312       if node.offline:
2313         ignore_nodes.add(node.name)
2314         continue
2315
2316       nresult = all_nvinfo[node.name]
2317
2318       if nresult.fail_msg or not nresult.payload:
2319         node_files = None
2320       else:
2321         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2322
2323       test = not (node_files and isinstance(node_files, dict))
2324       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2325               "Node did not return file checksum data")
2326       if test:
2327         ignore_nodes.add(node.name)
2328         continue
2329
2330       # Build per-checksum mapping from filename to nodes having it
2331       for (filename, checksum) in node_files.items():
2332         assert filename in nodefiles
2333         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2334
2335     for (filename, checksums) in fileinfo.items():
2336       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2337
2338       # Nodes having the file
2339       with_file = frozenset(node_name
2340                             for nodes in fileinfo[filename].values()
2341                             for node_name in nodes) - ignore_nodes
2342
2343       expected_nodes = nodefiles[filename] - ignore_nodes
2344
2345       # Nodes missing file
2346       missing_file = expected_nodes - with_file
2347
2348       if filename in files_opt:
2349         # All or no nodes
2350         errorif(missing_file and missing_file != expected_nodes,
2351                 constants.CV_ECLUSTERFILECHECK, None,
2352                 "File %s is optional, but it must exist on all or no"
2353                 " nodes (not found on %s)",
2354                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2355       else:
2356         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2357                 "File %s is missing from node(s) %s", filename,
2358                 utils.CommaJoin(utils.NiceSort(missing_file)))
2359
2360         # Warn if a node has a file it shouldn't
2361         unexpected = with_file - expected_nodes
2362         errorif(unexpected,
2363                 constants.CV_ECLUSTERFILECHECK, None,
2364                 "File %s should not exist on node(s) %s",
2365                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2366
2367       # See if there are multiple versions of the file
2368       test = len(checksums) > 1
2369       if test:
2370         variants = ["variant %s on %s" %
2371                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2372                     for (idx, (checksum, nodes)) in
2373                       enumerate(sorted(checksums.items()))]
2374       else:
2375         variants = []
2376
2377       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2378               "File %s found with %s different checksums (%s)",
2379               filename, len(checksums), "; ".join(variants))
2380
2381   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2382                       drbd_map):
2383     """Verifies and the node DRBD status.
2384
2385     @type ninfo: L{objects.Node}
2386     @param ninfo: the node to check
2387     @param nresult: the remote results for the node
2388     @param instanceinfo: the dict of instances
2389     @param drbd_helper: the configured DRBD usermode helper
2390     @param drbd_map: the DRBD map as returned by
2391         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2392
2393     """
2394     node = ninfo.name
2395     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2396
2397     if drbd_helper:
2398       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2399       test = (helper_result == None)
2400       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2401                "no drbd usermode helper returned")
2402       if helper_result:
2403         status, payload = helper_result
2404         test = not status
2405         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2406                  "drbd usermode helper check unsuccessful: %s", payload)
2407         test = status and (payload != drbd_helper)
2408         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2409                  "wrong drbd usermode helper: %s", payload)
2410
2411     # compute the DRBD minors
2412     node_drbd = {}
2413     for minor, instance in drbd_map[node].items():
2414       test = instance not in instanceinfo
2415       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2416                "ghost instance '%s' in temporary DRBD map", instance)
2417         # ghost instance should not be running, but otherwise we
2418         # don't give double warnings (both ghost instance and
2419         # unallocated minor in use)
2420       if test:
2421         node_drbd[minor] = (instance, False)
2422       else:
2423         instance = instanceinfo[instance]
2424         node_drbd[minor] = (instance.name,
2425                             instance.admin_state == constants.ADMINST_UP)
2426
2427     # and now check them
2428     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2429     test = not isinstance(used_minors, (tuple, list))
2430     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2431              "cannot parse drbd status file: %s", str(used_minors))
2432     if test:
2433       # we cannot check drbd status
2434       return
2435
2436     for minor, (iname, must_exist) in node_drbd.items():
2437       test = minor not in used_minors and must_exist
2438       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2439                "drbd minor %d of instance %s is not active", minor, iname)
2440     for minor in used_minors:
2441       test = minor not in node_drbd
2442       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2443                "unallocated drbd minor %d is in use", minor)
2444
2445   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2446     """Builds the node OS structures.
2447
2448     @type ninfo: L{objects.Node}
2449     @param ninfo: the node to check
2450     @param nresult: the remote results for the node
2451     @param nimg: the node image object
2452
2453     """
2454     node = ninfo.name
2455     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2456
2457     remote_os = nresult.get(constants.NV_OSLIST, None)
2458     test = (not isinstance(remote_os, list) or
2459             not compat.all(isinstance(v, list) and len(v) == 7
2460                            for v in remote_os))
2461
2462     _ErrorIf(test, constants.CV_ENODEOS, node,
2463              "node hasn't returned valid OS data")
2464
2465     nimg.os_fail = test
2466
2467     if test:
2468       return
2469
2470     os_dict = {}
2471
2472     for (name, os_path, status, diagnose,
2473          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2474
2475       if name not in os_dict:
2476         os_dict[name] = []
2477
2478       # parameters is a list of lists instead of list of tuples due to
2479       # JSON lacking a real tuple type, fix it:
2480       parameters = [tuple(v) for v in parameters]
2481       os_dict[name].append((os_path, status, diagnose,
2482                             set(variants), set(parameters), set(api_ver)))
2483
2484     nimg.oslist = os_dict
2485
2486   def _VerifyNodeOS(self, ninfo, nimg, base):
2487     """Verifies the node OS list.
2488
2489     @type ninfo: L{objects.Node}
2490     @param ninfo: the node to check
2491     @param nimg: the node image object
2492     @param base: the 'template' node we match against (e.g. from the master)
2493
2494     """
2495     node = ninfo.name
2496     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2497
2498     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2499
2500     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2501     for os_name, os_data in nimg.oslist.items():
2502       assert os_data, "Empty OS status for OS %s?!" % os_name
2503       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2504       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2505                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2506       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2507                "OS '%s' has multiple entries (first one shadows the rest): %s",
2508                os_name, utils.CommaJoin([v[0] for v in os_data]))
2509       # comparisons with the 'base' image
2510       test = os_name not in base.oslist
2511       _ErrorIf(test, constants.CV_ENODEOS, node,
2512                "Extra OS %s not present on reference node (%s)",
2513                os_name, base.name)
2514       if test:
2515         continue
2516       assert base.oslist[os_name], "Base node has empty OS status?"
2517       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2518       if not b_status:
2519         # base OS is invalid, skipping
2520         continue
2521       for kind, a, b in [("API version", f_api, b_api),
2522                          ("variants list", f_var, b_var),
2523                          ("parameters", beautify_params(f_param),
2524                           beautify_params(b_param))]:
2525         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2526                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2527                  kind, os_name, base.name,
2528                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2529
2530     # check any missing OSes
2531     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2532     _ErrorIf(missing, constants.CV_ENODEOS, node,
2533              "OSes present on reference node %s but missing on this node: %s",
2534              base.name, utils.CommaJoin(missing))
2535
2536   def _VerifyOob(self, ninfo, nresult):
2537     """Verifies out of band functionality of a node.
2538
2539     @type ninfo: L{objects.Node}
2540     @param ninfo: the node to check
2541     @param nresult: the remote results for the node
2542
2543     """
2544     node = ninfo.name
2545     # We just have to verify the paths on master and/or master candidates
2546     # as the oob helper is invoked on the master
2547     if ((ninfo.master_candidate or ninfo.master_capable) and
2548         constants.NV_OOB_PATHS in nresult):
2549       for path_result in nresult[constants.NV_OOB_PATHS]:
2550         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2551
2552   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2553     """Verifies and updates the node volume data.
2554
2555     This function will update a L{NodeImage}'s internal structures
2556     with data from the remote call.
2557
2558     @type ninfo: L{objects.Node}
2559     @param ninfo: the node to check
2560     @param nresult: the remote results for the node
2561     @param nimg: the node image object
2562     @param vg_name: the configured VG name
2563
2564     """
2565     node = ninfo.name
2566     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2567
2568     nimg.lvm_fail = True
2569     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2570     if vg_name is None:
2571       pass
2572     elif isinstance(lvdata, basestring):
2573       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2574                utils.SafeEncode(lvdata))
2575     elif not isinstance(lvdata, dict):
2576       _ErrorIf(True, constants.CV_ENODELVM, node,
2577                "rpc call to node failed (lvlist)")
2578     else:
2579       nimg.volumes = lvdata
2580       nimg.lvm_fail = False
2581
2582   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2583     """Verifies and updates the node instance list.
2584
2585     If the listing was successful, then updates this node's instance
2586     list. Otherwise, it marks the RPC call as failed for the instance
2587     list key.
2588
2589     @type ninfo: L{objects.Node}
2590     @param ninfo: the node to check
2591     @param nresult: the remote results for the node
2592     @param nimg: the node image object
2593
2594     """
2595     idata = nresult.get(constants.NV_INSTANCELIST, None)
2596     test = not isinstance(idata, list)
2597     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2598                   "rpc call to node failed (instancelist): %s",
2599                   utils.SafeEncode(str(idata)))
2600     if test:
2601       nimg.hyp_fail = True
2602     else:
2603       nimg.instances = idata
2604
2605   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2606     """Verifies and computes a node information map
2607
2608     @type ninfo: L{objects.Node}
2609     @param ninfo: the node to check
2610     @param nresult: the remote results for the node
2611     @param nimg: the node image object
2612     @param vg_name: the configured VG name
2613
2614     """
2615     node = ninfo.name
2616     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2617
2618     # try to read free memory (from the hypervisor)
2619     hv_info = nresult.get(constants.NV_HVINFO, None)
2620     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2621     _ErrorIf(test, constants.CV_ENODEHV, node,
2622              "rpc call to node failed (hvinfo)")
2623     if not test:
2624       try:
2625         nimg.mfree = int(hv_info["memory_free"])
2626       except (ValueError, TypeError):
2627         _ErrorIf(True, constants.CV_ENODERPC, node,
2628                  "node returned invalid nodeinfo, check hypervisor")
2629
2630     # FIXME: devise a free space model for file based instances as well
2631     if vg_name is not None:
2632       test = (constants.NV_VGLIST not in nresult or
2633               vg_name not in nresult[constants.NV_VGLIST])
2634       _ErrorIf(test, constants.CV_ENODELVM, node,
2635                "node didn't return data for the volume group '%s'"
2636                " - it is either missing or broken", vg_name)
2637       if not test:
2638         try:
2639           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2640         except (ValueError, TypeError):
2641           _ErrorIf(True, constants.CV_ENODERPC, node,
2642                    "node returned invalid LVM info, check LVM status")
2643
2644   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2645     """Gets per-disk status information for all instances.
2646
2647     @type nodelist: list of strings
2648     @param nodelist: Node names
2649     @type node_image: dict of (name, L{objects.Node})
2650     @param node_image: Node objects
2651     @type instanceinfo: dict of (name, L{objects.Instance})
2652     @param instanceinfo: Instance objects
2653     @rtype: {instance: {node: [(succes, payload)]}}
2654     @return: a dictionary of per-instance dictionaries with nodes as
2655         keys and disk information as values; the disk information is a
2656         list of tuples (success, payload)
2657
2658     """
2659     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2660
2661     node_disks = {}
2662     node_disks_devonly = {}
2663     diskless_instances = set()
2664     diskless = constants.DT_DISKLESS
2665
2666     for nname in nodelist:
2667       node_instances = list(itertools.chain(node_image[nname].pinst,
2668                                             node_image[nname].sinst))
2669       diskless_instances.update(inst for inst in node_instances
2670                                 if instanceinfo[inst].disk_template == diskless)
2671       disks = [(inst, disk)
2672                for inst in node_instances
2673                for disk in instanceinfo[inst].disks]
2674
2675       if not disks:
2676         # No need to collect data
2677         continue
2678
2679       node_disks[nname] = disks
2680
2681       # Creating copies as SetDiskID below will modify the objects and that can
2682       # lead to incorrect data returned from nodes
2683       devonly = [dev.Copy() for (_, dev) in disks]
2684
2685       for dev in devonly:
2686         self.cfg.SetDiskID(dev, nname)
2687
2688       node_disks_devonly[nname] = devonly
2689
2690     assert len(node_disks) == len(node_disks_devonly)
2691
2692     # Collect data from all nodes with disks
2693     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2694                                                           node_disks_devonly)
2695
2696     assert len(result) == len(node_disks)
2697
2698     instdisk = {}
2699
2700     for (nname, nres) in result.items():
2701       disks = node_disks[nname]
2702
2703       if nres.offline:
2704         # No data from this node
2705         data = len(disks) * [(False, "node offline")]
2706       else:
2707         msg = nres.fail_msg
2708         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2709                  "while getting disk information: %s", msg)
2710         if msg:
2711           # No data from this node
2712           data = len(disks) * [(False, msg)]
2713         else:
2714           data = []
2715           for idx, i in enumerate(nres.payload):
2716             if isinstance(i, (tuple, list)) and len(i) == 2:
2717               data.append(i)
2718             else:
2719               logging.warning("Invalid result from node %s, entry %d: %s",
2720                               nname, idx, i)
2721               data.append((False, "Invalid result from the remote node"))
2722
2723       for ((inst, _), status) in zip(disks, data):
2724         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2725
2726     # Add empty entries for diskless instances.
2727     for inst in diskless_instances:
2728       assert inst not in instdisk
2729       instdisk[inst] = {}
2730
2731     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2732                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2733                       compat.all(isinstance(s, (tuple, list)) and
2734                                  len(s) == 2 for s in statuses)
2735                       for inst, nnames in instdisk.items()
2736                       for nname, statuses in nnames.items())
2737     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2738
2739     return instdisk
2740
2741   @staticmethod
2742   def _SshNodeSelector(group_uuid, all_nodes):
2743     """Create endless iterators for all potential SSH check hosts.
2744
2745     """
2746     nodes = [node for node in all_nodes
2747              if (node.group != group_uuid and
2748                  not node.offline)]
2749     keyfunc = operator.attrgetter("group")
2750
2751     return map(itertools.cycle,
2752                [sorted(map(operator.attrgetter("name"), names))
2753                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2754                                                   keyfunc)])
2755
2756   @classmethod
2757   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2758     """Choose which nodes should talk to which other nodes.
2759
2760     We will make nodes contact all nodes in their group, and one node from
2761     every other group.
2762
2763     @warning: This algorithm has a known issue if one node group is much
2764       smaller than others (e.g. just one node). In such a case all other
2765       nodes will talk to the single node.
2766
2767     """
2768     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2769     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2770
2771     return (online_nodes,
2772             dict((name, sorted([i.next() for i in sel]))
2773                  for name in online_nodes))
2774
2775   def BuildHooksEnv(self):
2776     """Build hooks env.
2777
2778     Cluster-Verify hooks just ran in the post phase and their failure makes
2779     the output be logged in the verify output and the verification to fail.
2780
2781     """
2782     env = {
2783       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2784       }
2785
2786     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2787                for node in self.my_node_info.values())
2788
2789     return env
2790
2791   def BuildHooksNodes(self):
2792     """Build hooks nodes.
2793
2794     """
2795     return ([], self.my_node_names)
2796
2797   def Exec(self, feedback_fn):
2798     """Verify integrity of the node group, performing various test on nodes.
2799
2800     """
2801     # This method has too many local variables. pylint: disable=R0914
2802     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2803
2804     if not self.my_node_names:
2805       # empty node group
2806       feedback_fn("* Empty node group, skipping verification")
2807       return True
2808
2809     self.bad = False
2810     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2811     verbose = self.op.verbose
2812     self._feedback_fn = feedback_fn
2813
2814     vg_name = self.cfg.GetVGName()
2815     drbd_helper = self.cfg.GetDRBDHelper()
2816     cluster = self.cfg.GetClusterInfo()
2817     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2818     hypervisors = cluster.enabled_hypervisors
2819     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2820
2821     i_non_redundant = [] # Non redundant instances
2822     i_non_a_balanced = [] # Non auto-balanced instances
2823     i_offline = 0 # Count of offline instances
2824     n_offline = 0 # Count of offline nodes
2825     n_drained = 0 # Count of nodes being drained
2826     node_vol_should = {}
2827
2828     # FIXME: verify OS list
2829
2830     # File verification
2831     filemap = _ComputeAncillaryFiles(cluster, False)
2832
2833     # do local checksums
2834     master_node = self.master_node = self.cfg.GetMasterNode()
2835     master_ip = self.cfg.GetMasterIP()
2836
2837     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2838
2839     user_scripts = []
2840     if self.cfg.GetUseExternalMipScript():
2841       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2842
2843     node_verify_param = {
2844       constants.NV_FILELIST:
2845         utils.UniqueSequence(filename
2846                              for files in filemap
2847                              for filename in files),
2848       constants.NV_NODELIST:
2849         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2850                                   self.all_node_info.values()),
2851       constants.NV_HYPERVISOR: hypervisors,
2852       constants.NV_HVPARAMS:
2853         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2854       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2855                                  for node in node_data_list
2856                                  if not node.offline],
2857       constants.NV_INSTANCELIST: hypervisors,
2858       constants.NV_VERSION: None,
2859       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2860       constants.NV_NODESETUP: None,
2861       constants.NV_TIME: None,
2862       constants.NV_MASTERIP: (master_node, master_ip),
2863       constants.NV_OSLIST: None,
2864       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2865       constants.NV_USERSCRIPTS: user_scripts,
2866       }
2867
2868     if vg_name is not None:
2869       node_verify_param[constants.NV_VGLIST] = None
2870       node_verify_param[constants.NV_LVLIST] = vg_name
2871       node_verify_param[constants.NV_PVLIST] = [vg_name]
2872       node_verify_param[constants.NV_DRBDLIST] = None
2873
2874     if drbd_helper:
2875       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2876
2877     # bridge checks
2878     # FIXME: this needs to be changed per node-group, not cluster-wide
2879     bridges = set()
2880     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2881     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2882       bridges.add(default_nicpp[constants.NIC_LINK])
2883     for instance in self.my_inst_info.values():
2884       for nic in instance.nics:
2885         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2886         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2887           bridges.add(full_nic[constants.NIC_LINK])
2888
2889     if bridges:
2890       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2891
2892     # Build our expected cluster state
2893     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2894                                                  name=node.name,
2895                                                  vm_capable=node.vm_capable))
2896                       for node in node_data_list)
2897
2898     # Gather OOB paths
2899     oob_paths = []
2900     for node in self.all_node_info.values():
2901       path = _SupportsOob(self.cfg, node)
2902       if path and path not in oob_paths:
2903         oob_paths.append(path)
2904
2905     if oob_paths:
2906       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2907
2908     for instance in self.my_inst_names:
2909       inst_config = self.my_inst_info[instance]
2910
2911       for nname in inst_config.all_nodes:
2912         if nname not in node_image:
2913           gnode = self.NodeImage(name=nname)
2914           gnode.ghost = (nname not in self.all_node_info)
2915           node_image[nname] = gnode
2916
2917       inst_config.MapLVsByNode(node_vol_should)
2918
2919       pnode = inst_config.primary_node
2920       node_image[pnode].pinst.append(instance)
2921
2922       for snode in inst_config.secondary_nodes:
2923         nimg = node_image[snode]
2924         nimg.sinst.append(instance)
2925         if pnode not in nimg.sbp:
2926           nimg.sbp[pnode] = []
2927         nimg.sbp[pnode].append(instance)
2928
2929     # At this point, we have the in-memory data structures complete,
2930     # except for the runtime information, which we'll gather next
2931
2932     # Due to the way our RPC system works, exact response times cannot be
2933     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2934     # time before and after executing the request, we can at least have a time
2935     # window.
2936     nvinfo_starttime = time.time()
2937     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2938                                            node_verify_param,
2939                                            self.cfg.GetClusterName())
2940     nvinfo_endtime = time.time()
2941
2942     if self.extra_lv_nodes and vg_name is not None:
2943       extra_lv_nvinfo = \
2944           self.rpc.call_node_verify(self.extra_lv_nodes,
2945                                     {constants.NV_LVLIST: vg_name},
2946                                     self.cfg.GetClusterName())
2947     else:
2948       extra_lv_nvinfo = {}
2949
2950     all_drbd_map = self.cfg.ComputeDRBDMap()
2951
2952     feedback_fn("* Gathering disk information (%s nodes)" %
2953                 len(self.my_node_names))
2954     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2955                                      self.my_inst_info)
2956
2957     feedback_fn("* Verifying configuration file consistency")
2958
2959     # If not all nodes are being checked, we need to make sure the master node
2960     # and a non-checked vm_capable node are in the list.
2961     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2962     if absent_nodes:
2963       vf_nvinfo = all_nvinfo.copy()
2964       vf_node_info = list(self.my_node_info.values())
2965       additional_nodes = []
2966       if master_node not in self.my_node_info:
2967         additional_nodes.append(master_node)
2968         vf_node_info.append(self.all_node_info[master_node])
2969       # Add the first vm_capable node we find which is not included
2970       for node in absent_nodes:
2971         nodeinfo = self.all_node_info[node]
2972         if nodeinfo.vm_capable and not nodeinfo.offline:
2973           additional_nodes.append(node)
2974           vf_node_info.append(self.all_node_info[node])
2975           break
2976       key = constants.NV_FILELIST
2977       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2978                                                  {key: node_verify_param[key]},
2979                                                  self.cfg.GetClusterName()))
2980     else:
2981       vf_nvinfo = all_nvinfo
2982       vf_node_info = self.my_node_info.values()
2983
2984     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2985
2986     feedback_fn("* Verifying node status")
2987
2988     refos_img = None
2989
2990     for node_i in node_data_list:
2991       node = node_i.name
2992       nimg = node_image[node]
2993
2994       if node_i.offline:
2995         if verbose:
2996           feedback_fn("* Skipping offline node %s" % (node,))
2997         n_offline += 1
2998         continue
2999
3000       if node == master_node:
3001         ntype = "master"
3002       elif node_i.master_candidate:
3003         ntype = "master candidate"
3004       elif node_i.drained:
3005         ntype = "drained"
3006         n_drained += 1
3007       else:
3008         ntype = "regular"
3009       if verbose:
3010         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3011
3012       msg = all_nvinfo[node].fail_msg
3013       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3014                msg)
3015       if msg:
3016         nimg.rpc_fail = True
3017         continue
3018
3019       nresult = all_nvinfo[node].payload
3020
3021       nimg.call_ok = self._VerifyNode(node_i, nresult)
3022       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3023       self._VerifyNodeNetwork(node_i, nresult)
3024       self._VerifyNodeUserScripts(node_i, nresult)
3025       self._VerifyOob(node_i, nresult)
3026
3027       if nimg.vm_capable:
3028         self._VerifyNodeLVM(node_i, nresult, vg_name)
3029         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3030                              all_drbd_map)
3031
3032         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3033         self._UpdateNodeInstances(node_i, nresult, nimg)
3034         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3035         self._UpdateNodeOS(node_i, nresult, nimg)
3036
3037         if not nimg.os_fail:
3038           if refos_img is None:
3039             refos_img = nimg
3040           self._VerifyNodeOS(node_i, nimg, refos_img)
3041         self._VerifyNodeBridges(node_i, nresult, bridges)
3042
3043         # Check whether all running instancies are primary for the node. (This
3044         # can no longer be done from _VerifyInstance below, since some of the
3045         # wrong instances could be from other node groups.)
3046         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3047
3048         for inst in non_primary_inst:
3049           # FIXME: investigate best way to handle offline insts
3050           if inst.admin_state == constants.ADMINST_OFFLINE:
3051             if verbose:
3052               feedback_fn("* Skipping offline instance %s" % inst.name)
3053             i_offline += 1
3054             continue
3055           test = inst in self.all_inst_info
3056           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3057                    "instance should not run on node %s", node_i.name)
3058           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3059                    "node is running unknown instance %s", inst)
3060
3061     for node, result in extra_lv_nvinfo.items():
3062       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3063                               node_image[node], vg_name)
3064
3065     feedback_fn("* Verifying instance status")
3066     for instance in self.my_inst_names:
3067       if verbose:
3068         feedback_fn("* Verifying instance %s" % instance)
3069       inst_config = self.my_inst_info[instance]
3070       self._VerifyInstance(instance, inst_config, node_image,
3071                            instdisk[instance])
3072       inst_nodes_offline = []
3073
3074       pnode = inst_config.primary_node
3075       pnode_img = node_image[pnode]
3076       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3077                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3078                " primary node failed", instance)
3079
3080       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3081                pnode_img.offline,
3082                constants.CV_EINSTANCEBADNODE, instance,
3083                "instance is marked as running and lives on offline node %s",
3084                inst_config.primary_node)
3085
3086       # If the instance is non-redundant we cannot survive losing its primary
3087       # node, so we are not N+1 compliant. On the other hand we have no disk
3088       # templates with more than one secondary so that situation is not well
3089       # supported either.
3090       # FIXME: does not support file-backed instances
3091       if not inst_config.secondary_nodes:
3092         i_non_redundant.append(instance)
3093
3094       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3095                constants.CV_EINSTANCELAYOUT,
3096                instance, "instance has multiple secondary nodes: %s",
3097                utils.CommaJoin(inst_config.secondary_nodes),
3098                code=self.ETYPE_WARNING)
3099
3100       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3101         pnode = inst_config.primary_node
3102         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3103         instance_groups = {}
3104
3105         for node in instance_nodes:
3106           instance_groups.setdefault(self.all_node_info[node].group,
3107                                      []).append(node)
3108
3109         pretty_list = [
3110           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3111           # Sort so that we always list the primary node first.
3112           for group, nodes in sorted(instance_groups.items(),
3113                                      key=lambda (_, nodes): pnode in nodes,
3114                                      reverse=True)]
3115
3116         self._ErrorIf(len(instance_groups) > 1,
3117                       constants.CV_EINSTANCESPLITGROUPS,
3118                       instance, "instance has primary and secondary nodes in"
3119                       " different groups: %s", utils.CommaJoin(pretty_list),
3120                       code=self.ETYPE_WARNING)
3121
3122       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3123         i_non_a_balanced.append(instance)
3124
3125       for snode in inst_config.secondary_nodes:
3126         s_img = node_image[snode]
3127         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3128                  snode, "instance %s, connection to secondary node failed",
3129                  instance)
3130
3131         if s_img.offline:
3132           inst_nodes_offline.append(snode)
3133
3134       # warn that the instance lives on offline nodes
3135       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3136                "instance has offline secondary node(s) %s",
3137                utils.CommaJoin(inst_nodes_offline))
3138       # ... or ghost/non-vm_capable nodes
3139       for node in inst_config.all_nodes:
3140         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3141                  instance, "instance lives on ghost node %s", node)
3142         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3143                  instance, "instance lives on non-vm_capable node %s", node)
3144
3145     feedback_fn("* Verifying orphan volumes")
3146     reserved = utils.FieldSet(*cluster.reserved_lvs)
3147
3148     # We will get spurious "unknown volume" warnings if any node of this group
3149     # is secondary for an instance whose primary is in another group. To avoid
3150     # them, we find these instances and add their volumes to node_vol_should.
3151     for inst in self.all_inst_info.values():
3152       for secondary in inst.secondary_nodes:
3153         if (secondary in self.my_node_info
3154             and inst.name not in self.my_inst_info):
3155           inst.MapLVsByNode(node_vol_should)
3156           break
3157
3158     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3159
3160     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3161       feedback_fn("* Verifying N+1 Memory redundancy")
3162       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3163
3164     feedback_fn("* Other Notes")
3165     if i_non_redundant:
3166       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3167                   % len(i_non_redundant))
3168
3169     if i_non_a_balanced:
3170       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3171                   % len(i_non_a_balanced))
3172
3173     if i_offline:
3174       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3175
3176     if n_offline:
3177       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3178
3179     if n_drained:
3180       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3181
3182     return not self.bad
3183
3184   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3185     """Analyze the post-hooks' result
3186
3187     This method analyses the hook result, handles it, and sends some
3188     nicely-formatted feedback back to the user.
3189
3190     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3191         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3192     @param hooks_results: the results of the multi-node hooks rpc call
3193     @param feedback_fn: function used send feedback back to the caller
3194     @param lu_result: previous Exec result
3195     @return: the new Exec result, based on the previous result
3196         and hook results
3197
3198     """
3199     # We only really run POST phase hooks, only for non-empty groups,
3200     # and are only interested in their results
3201     if not self.my_node_names:
3202       # empty node group
3203       pass
3204     elif phase == constants.HOOKS_PHASE_POST:
3205       # Used to change hooks' output to proper indentation
3206       feedback_fn("* Hooks Results")
3207       assert hooks_results, "invalid result from hooks"
3208
3209       for node_name in hooks_results:
3210         res = hooks_results[node_name]
3211         msg = res.fail_msg
3212         test = msg and not res.offline
3213         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3214                       "Communication failure in hooks execution: %s", msg)
3215         if res.offline or msg:
3216           # No need to investigate payload if node is offline or gave
3217           # an error.
3218           continue
3219         for script, hkr, output in res.payload:
3220           test = hkr == constants.HKR_FAIL
3221           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3222                         "Script %s failed, output:", script)
3223           if test:
3224             output = self._HOOKS_INDENT_RE.sub("      ", output)
3225             feedback_fn("%s" % output)
3226             lu_result = False
3227
3228     return lu_result
3229
3230
3231 class LUClusterVerifyDisks(NoHooksLU):
3232   """Verifies the cluster disks status.
3233
3234   """
3235   REQ_BGL = False
3236
3237   def ExpandNames(self):
3238     self.share_locks = _ShareAll()
3239     self.needed_locks = {
3240       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3241       }
3242
3243   def Exec(self, feedback_fn):
3244     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3245
3246     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3247     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3248                            for group in group_names])
3249
3250
3251 class LUGroupVerifyDisks(NoHooksLU):
3252   """Verifies the status of all disks in a node group.
3253
3254   """
3255   REQ_BGL = False
3256
3257   def ExpandNames(self):
3258     # Raises errors.OpPrereqError on its own if group can't be found
3259     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3260
3261     self.share_locks = _ShareAll()
3262     self.needed_locks = {
3263       locking.LEVEL_INSTANCE: [],
3264       locking.LEVEL_NODEGROUP: [],
3265       locking.LEVEL_NODE: [],
3266       }
3267
3268   def DeclareLocks(self, level):
3269     if level == locking.LEVEL_INSTANCE:
3270       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3271
3272       # Lock instances optimistically, needs verification once node and group
3273       # locks have been acquired
3274       self.needed_locks[locking.LEVEL_INSTANCE] = \
3275         self.cfg.GetNodeGroupInstances(self.group_uuid)
3276
3277     elif level == locking.LEVEL_NODEGROUP:
3278       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3279
3280       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3281         set([self.group_uuid] +
3282             # Lock all groups used by instances optimistically; this requires
3283             # going via the node before it's locked, requiring verification
3284             # later on
3285             [group_uuid
3286              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3287              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3288
3289     elif level == locking.LEVEL_NODE:
3290       # This will only lock the nodes in the group to be verified which contain
3291       # actual instances
3292       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3293       self._LockInstancesNodes()
3294
3295       # Lock all nodes in group to be verified
3296       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3297       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3298       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3299
3300   def CheckPrereq(self):
3301     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3302     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3303     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3304
3305     assert self.group_uuid in owned_groups
3306
3307     # Check if locked instances are still correct
3308     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3309
3310     # Get instance information
3311     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3312
3313     # Check if node groups for locked instances are still correct
3314     for (instance_name, inst) in self.instances.items():
3315       assert owned_nodes.issuperset(inst.all_nodes), \
3316         "Instance %s's nodes changed while we kept the lock" % instance_name
3317
3318       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3319                                              owned_groups)
3320
3321       assert self.group_uuid in inst_groups, \
3322         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3323
3324   def Exec(self, feedback_fn):
3325     """Verify integrity of cluster disks.
3326
3327     @rtype: tuple of three items
3328     @return: a tuple of (dict of node-to-node_error, list of instances
3329         which need activate-disks, dict of instance: (node, volume) for
3330         missing volumes
3331
3332     """
3333     res_nodes = {}
3334     res_instances = set()
3335     res_missing = {}
3336
3337     nv_dict = _MapInstanceDisksToNodes([inst
3338             for inst in self.instances.values()
3339             if inst.admin_state == constants.ADMINST_UP])
3340
3341     if nv_dict:
3342       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3343                              set(self.cfg.GetVmCapableNodeList()))
3344
3345       node_lvs = self.rpc.call_lv_list(nodes, [])
3346
3347       for (node, node_res) in node_lvs.items():
3348         if node_res.offline:
3349           continue
3350
3351         msg = node_res.fail_msg
3352         if msg:
3353           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3354           res_nodes[node] = msg
3355           continue
3356
3357         for lv_name, (_, _, lv_online) in node_res.payload.items():
3358           inst = nv_dict.pop((node, lv_name), None)
3359           if not (lv_online or inst is None):
3360             res_instances.add(inst)
3361
3362       # any leftover items in nv_dict are missing LVs, let's arrange the data
3363       # better
3364       for key, inst in nv_dict.iteritems():
3365         res_missing.setdefault(inst, []).append(list(key))
3366
3367     return (res_nodes, list(res_instances), res_missing)
3368
3369
3370 class LUClusterRepairDiskSizes(NoHooksLU):
3371   """Verifies the cluster disks sizes.
3372
3373   """
3374   REQ_BGL = False
3375
3376   def ExpandNames(self):
3377     if self.op.instances:
3378       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3379       self.needed_locks = {
3380         locking.LEVEL_NODE_RES: [],
3381         locking.LEVEL_INSTANCE: self.wanted_names,
3382         }
3383       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3384     else:
3385       self.wanted_names = None
3386       self.needed_locks = {
3387         locking.LEVEL_NODE_RES: locking.ALL_SET,
3388         locking.LEVEL_INSTANCE: locking.ALL_SET,
3389         }
3390     self.share_locks = {
3391       locking.LEVEL_NODE_RES: 1,
3392       locking.LEVEL_INSTANCE: 0,
3393       }
3394
3395   def DeclareLocks(self, level):
3396     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3397       self._LockInstancesNodes(primary_only=True, level=level)
3398
3399   def CheckPrereq(self):
3400     """Check prerequisites.
3401
3402     This only checks the optional instance list against the existing names.
3403
3404     """
3405     if self.wanted_names is None:
3406       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3407
3408     self.wanted_instances = \
3409         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3410
3411   def _EnsureChildSizes(self, disk):
3412     """Ensure children of the disk have the needed disk size.
3413
3414     This is valid mainly for DRBD8 and fixes an issue where the
3415     children have smaller disk size.
3416
3417     @param disk: an L{ganeti.objects.Disk} object
3418
3419     """
3420     if disk.dev_type == constants.LD_DRBD8:
3421       assert disk.children, "Empty children for DRBD8?"
3422       fchild = disk.children[0]
3423       mismatch = fchild.size < disk.size
3424       if mismatch:
3425         self.LogInfo("Child disk has size %d, parent %d, fixing",
3426                      fchild.size, disk.size)
3427         fchild.size = disk.size
3428
3429       # and we recurse on this child only, not on the metadev
3430       return self._EnsureChildSizes(fchild) or mismatch
3431     else:
3432       return False
3433
3434   def Exec(self, feedback_fn):
3435     """Verify the size of cluster disks.
3436
3437     """
3438     # TODO: check child disks too
3439     # TODO: check differences in size between primary/secondary nodes
3440     per_node_disks = {}
3441     for instance in self.wanted_instances:
3442       pnode = instance.primary_node
3443       if pnode not in per_node_disks:
3444         per_node_disks[pnode] = []
3445       for idx, disk in enumerate(instance.disks):
3446         per_node_disks[pnode].append((instance, idx, disk))
3447
3448     assert not (frozenset(per_node_disks.keys()) -
3449                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3450       "Not owning correct locks"
3451     assert not self.owned_locks(locking.LEVEL_NODE)
3452
3453     changed = []
3454     for node, dskl in per_node_disks.items():
3455       newl = [v[2].Copy() for v in dskl]
3456       for dsk in newl:
3457         self.cfg.SetDiskID(dsk, node)
3458       result = self.rpc.call_blockdev_getsize(node, newl)
3459       if result.fail_msg:
3460         self.LogWarning("Failure in blockdev_getsize call to node"
3461                         " %s, ignoring", node)
3462         continue
3463       if len(result.payload) != len(dskl):
3464         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3465                         " result.payload=%s", node, len(dskl), result.payload)
3466         self.LogWarning("Invalid result from node %s, ignoring node results",
3467                         node)
3468         continue
3469       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3470         if size is None:
3471           self.LogWarning("Disk %d of instance %s did not return size"
3472                           " information, ignoring", idx, instance.name)
3473           continue
3474         if not isinstance(size, (int, long)):
3475           self.LogWarning("Disk %d of instance %s did not return valid"
3476                           " size information, ignoring", idx, instance.name)
3477           continue
3478         size = size >> 20
3479         if size != disk.size:
3480           self.LogInfo("Disk %d of instance %s has mismatched size,"
3481                        " correcting: recorded %d, actual %d", idx,
3482                        instance.name, disk.size, size)
3483           disk.size = size
3484           self.cfg.Update(instance, feedback_fn)
3485           changed.append((instance.name, idx, size))
3486         if self._EnsureChildSizes(disk):
3487           self.cfg.Update(instance, feedback_fn)
3488           changed.append((instance.name, idx, disk.size))
3489     return changed
3490
3491
3492 class LUClusterRename(LogicalUnit):
3493   """Rename the cluster.
3494
3495   """
3496   HPATH = "cluster-rename"
3497   HTYPE = constants.HTYPE_CLUSTER
3498
3499   def BuildHooksEnv(self):
3500     """Build hooks env.
3501
3502     """
3503     return {
3504       "OP_TARGET": self.cfg.GetClusterName(),
3505       "NEW_NAME": self.op.name,
3506       }
3507
3508   def BuildHooksNodes(self):
3509     """Build hooks nodes.
3510
3511     """
3512     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3513
3514   def CheckPrereq(self):
3515     """Verify that the passed name is a valid one.
3516
3517     """
3518     hostname = netutils.GetHostname(name=self.op.name,
3519                                     family=self.cfg.GetPrimaryIPFamily())
3520
3521     new_name = hostname.name
3522     self.ip = new_ip = hostname.ip
3523     old_name = self.cfg.GetClusterName()
3524     old_ip = self.cfg.GetMasterIP()
3525     if new_name == old_name and new_ip == old_ip:
3526       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3527                                  " cluster has changed",
3528                                  errors.ECODE_INVAL)
3529     if new_ip != old_ip:
3530       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3531         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3532                                    " reachable on the network" %
3533                                    new_ip, errors.ECODE_NOTUNIQUE)
3534
3535     self.op.name = new_name
3536
3537   def Exec(self, feedback_fn):
3538     """Rename the cluster.
3539
3540     """
3541     clustername = self.op.name
3542     new_ip = self.ip
3543
3544     # shutdown the master IP
3545     master_params = self.cfg.GetMasterNetworkParameters()
3546     ems = self.cfg.GetUseExternalMipScript()
3547     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3548                                                      master_params, ems)
3549     result.Raise("Could not disable the master role")
3550
3551     try:
3552       cluster = self.cfg.GetClusterInfo()
3553       cluster.cluster_name = clustername
3554       cluster.master_ip = new_ip
3555       self.cfg.Update(cluster, feedback_fn)
3556
3557       # update the known hosts file
3558       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3559       node_list = self.cfg.GetOnlineNodeList()
3560       try:
3561         node_list.remove(master_params.name)
3562       except ValueError:
3563         pass
3564       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3565     finally:
3566       master_params.ip = new_ip
3567       result = self.rpc.call_node_activate_master_ip(master_params.name,
3568                                                      master_params, ems)
3569       msg = result.fail_msg
3570       if msg:
3571         self.LogWarning("Could not re-enable the master role on"
3572                         " the master, please restart manually: %s", msg)
3573
3574     return clustername
3575
3576
3577 def _ValidateNetmask(cfg, netmask):
3578   """Checks if a netmask is valid.
3579
3580   @type cfg: L{config.ConfigWriter}
3581   @param cfg: The cluster configuration
3582   @type netmask: int
3583   @param netmask: the netmask to be verified
3584   @raise errors.OpPrereqError: if the validation fails
3585
3586   """
3587   ip_family = cfg.GetPrimaryIPFamily()
3588   try:
3589     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3590   except errors.ProgrammerError:
3591     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3592                                ip_family)
3593   if not ipcls.ValidateNetmask(netmask):
3594     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3595                                 (netmask))
3596
3597
3598 class LUClusterSetParams(LogicalUnit):
3599   """Change the parameters of the cluster.
3600
3601   """
3602   HPATH = "cluster-modify"
3603   HTYPE = constants.HTYPE_CLUSTER
3604   REQ_BGL = False
3605
3606   def CheckArguments(self):
3607     """Check parameters
3608
3609     """
3610     if self.op.uid_pool:
3611       uidpool.CheckUidPool(self.op.uid_pool)
3612
3613     if self.op.add_uids:
3614       uidpool.CheckUidPool(self.op.add_uids)
3615
3616     if self.op.remove_uids:
3617       uidpool.CheckUidPool(self.op.remove_uids)
3618
3619     if self.op.master_netmask is not None:
3620       _ValidateNetmask(self.cfg, self.op.master_netmask)
3621
3622     if self.op.diskparams:
3623       for dt_params in self.op.diskparams.values():
3624         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3625
3626   def ExpandNames(self):
3627     # FIXME: in the future maybe other cluster params won't require checking on
3628     # all nodes to be modified.
3629     self.needed_locks = {
3630       locking.LEVEL_NODE: locking.ALL_SET,
3631     }
3632     self.share_locks[locking.LEVEL_NODE] = 1
3633
3634   def BuildHooksEnv(self):
3635     """Build hooks env.
3636
3637     """
3638     return {
3639       "OP_TARGET": self.cfg.GetClusterName(),
3640       "NEW_VG_NAME": self.op.vg_name,
3641       }
3642
3643   def BuildHooksNodes(self):
3644     """Build hooks nodes.
3645
3646     """
3647     mn = self.cfg.GetMasterNode()
3648     return ([mn], [mn])
3649
3650   def CheckPrereq(self):
3651     """Check prerequisites.
3652
3653     This checks whether the given params don't conflict and
3654     if the given volume group is valid.
3655
3656     """
3657     if self.op.vg_name is not None and not self.op.vg_name:
3658       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3659         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3660                                    " instances exist", errors.ECODE_INVAL)
3661
3662     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3663       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3664         raise errors.OpPrereqError("Cannot disable drbd helper while"
3665                                    " drbd-based instances exist",
3666                                    errors.ECODE_INVAL)
3667
3668     node_list = self.owned_locks(locking.LEVEL_NODE)
3669
3670     # if vg_name not None, checks given volume group on all nodes
3671     if self.op.vg_name:
3672       vglist = self.rpc.call_vg_list(node_list)
3673       for node in node_list:
3674         msg = vglist[node].fail_msg
3675         if msg:
3676           # ignoring down node
3677           self.LogWarning("Error while gathering data on node %s"
3678                           " (ignoring node): %s", node, msg)
3679           continue
3680         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3681                                               self.op.vg_name,
3682                                               constants.MIN_VG_SIZE)
3683         if vgstatus:
3684           raise errors.OpPrereqError("Error on node '%s': %s" %
3685                                      (node, vgstatus), errors.ECODE_ENVIRON)
3686
3687     if self.op.drbd_helper:
3688       # checks given drbd helper on all nodes
3689       helpers = self.rpc.call_drbd_helper(node_list)
3690       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3691         if ninfo.offline:
3692           self.LogInfo("Not checking drbd helper on offline node %s", node)
3693           continue
3694         msg = helpers[node].fail_msg
3695         if msg:
3696           raise errors.OpPrereqError("Error checking drbd helper on node"
3697                                      " '%s': %s" % (node, msg),
3698                                      errors.ECODE_ENVIRON)
3699         node_helper = helpers[node].payload
3700         if node_helper != self.op.drbd_helper:
3701           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3702                                      (node, node_helper), errors.ECODE_ENVIRON)
3703
3704     self.cluster = cluster = self.cfg.GetClusterInfo()
3705     # validate params changes
3706     if self.op.beparams:
3707       objects.UpgradeBeParams(self.op.beparams)
3708       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3709       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3710
3711     if self.op.ndparams:
3712       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3713       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3714
3715       # TODO: we need a more general way to handle resetting
3716       # cluster-level parameters to default values
3717       if self.new_ndparams["oob_program"] == "":
3718         self.new_ndparams["oob_program"] = \
3719             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3720
3721     if self.op.hv_state:
3722       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3723                                             self.cluster.hv_state_static)
3724       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3725                                for hv, values in new_hv_state.items())
3726
3727     if self.op.disk_state:
3728       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3729                                                 self.cluster.disk_state_static)
3730       self.new_disk_state = \
3731         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3732                             for name, values in svalues.items()))
3733              for storage, svalues in new_disk_state.items())
3734
3735     if self.op.ipolicy:
3736       ipolicy = {}
3737       for key, value in self.op.ipolicy.items():
3738         utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3739         ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3740                                           value)
3741       objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3742       self.new_ipolicy = ipolicy
3743
3744     if self.op.nicparams:
3745       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3746       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3747       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3748       nic_errors = []
3749
3750       # check all instances for consistency
3751       for instance in self.cfg.GetAllInstancesInfo().values():
3752         for nic_idx, nic in enumerate(instance.nics):
3753           params_copy = copy.deepcopy(nic.nicparams)
3754           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3755
3756           # check parameter syntax
3757           try:
3758             objects.NIC.CheckParameterSyntax(params_filled)
3759           except errors.ConfigurationError, err:
3760             nic_errors.append("Instance %s, nic/%d: %s" %
3761                               (instance.name, nic_idx, err))
3762
3763           # if we're moving instances to routed, check that they have an ip
3764           target_mode = params_filled[constants.NIC_MODE]
3765           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3766             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3767                               " address" % (instance.name, nic_idx))
3768       if nic_errors:
3769         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3770                                    "\n".join(nic_errors))
3771
3772     # hypervisor list/parameters
3773     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3774     if self.op.hvparams:
3775       for hv_name, hv_dict in self.op.hvparams.items():
3776         if hv_name not in self.new_hvparams:
3777           self.new_hvparams[hv_name] = hv_dict
3778         else:
3779           self.new_hvparams[hv_name].update(hv_dict)
3780
3781     # disk template parameters
3782     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3783     if self.op.diskparams:
3784       for dt_name, dt_params in self.op.diskparams.items():
3785         if dt_name not in self.op.diskparams:
3786           self.new_diskparams[dt_name] = dt_params
3787         else:
3788           self.new_diskparams[dt_name].update(dt_params)
3789
3790     # os hypervisor parameters
3791     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3792     if self.op.os_hvp:
3793       for os_name, hvs in self.op.os_hvp.items():
3794         if os_name not in self.new_os_hvp:
3795           self.new_os_hvp[os_name] = hvs
3796         else:
3797           for hv_name, hv_dict in hvs.items():
3798             if hv_name not in self.new_os_hvp[os_name]:
3799               self.new_os_hvp[os_name][hv_name] = hv_dict
3800             else:
3801               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3802
3803     # os parameters
3804     self.new_osp = objects.FillDict(cluster.osparams, {})
3805     if self.op.osparams:
3806       for os_name, osp in self.op.osparams.items():
3807         if os_name not in self.new_osp:
3808           self.new_osp[os_name] = {}
3809
3810         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3811                                                   use_none=True)
3812
3813         if not self.new_osp[os_name]:
3814           # we removed all parameters
3815           del self.new_osp[os_name]
3816         else:
3817           # check the parameter validity (remote check)
3818           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3819                          os_name, self.new_osp[os_name])
3820
3821     # changes to the hypervisor list
3822     if self.op.enabled_hypervisors is not None:
3823       self.hv_list = self.op.enabled_hypervisors
3824       for hv in self.hv_list:
3825         # if the hypervisor doesn't already exist in the cluster
3826         # hvparams, we initialize it to empty, and then (in both
3827         # cases) we make sure to fill the defaults, as we might not
3828         # have a complete defaults list if the hypervisor wasn't
3829         # enabled before
3830         if hv not in new_hvp:
3831           new_hvp[hv] = {}
3832         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3833         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3834     else:
3835       self.hv_list = cluster.enabled_hypervisors
3836
3837     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3838       # either the enabled list has changed, or the parameters have, validate
3839       for hv_name, hv_params in self.new_hvparams.items():
3840         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3841             (self.op.enabled_hypervisors and
3842              hv_name in self.op.enabled_hypervisors)):
3843           # either this is a new hypervisor, or its parameters have changed
3844           hv_class = hypervisor.GetHypervisor(hv_name)
3845           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3846           hv_class.CheckParameterSyntax(hv_params)
3847           _CheckHVParams(self, node_list, hv_name, hv_params)
3848
3849     if self.op.os_hvp:
3850       # no need to check any newly-enabled hypervisors, since the
3851       # defaults have already been checked in the above code-block
3852       for os_name, os_hvp in self.new_os_hvp.items():
3853         for hv_name, hv_params in os_hvp.items():
3854           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3855           # we need to fill in the new os_hvp on top of the actual hv_p
3856           cluster_defaults = self.new_hvparams.get(hv_name, {})
3857           new_osp = objects.FillDict(cluster_defaults, hv_params)
3858           hv_class = hypervisor.GetHypervisor(hv_name)
3859           hv_class.CheckParameterSyntax(new_osp)
3860           _CheckHVParams(self, node_list, hv_name, new_osp)
3861
3862     if self.op.default_iallocator:
3863       alloc_script = utils.FindFile(self.op.default_iallocator,
3864                                     constants.IALLOCATOR_SEARCH_PATH,
3865                                     os.path.isfile)
3866       if alloc_script is None:
3867         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3868                                    " specified" % self.op.default_iallocator,
3869                                    errors.ECODE_INVAL)
3870
3871   def Exec(self, feedback_fn):
3872     """Change the parameters of the cluster.
3873
3874     """
3875     if self.op.vg_name is not None:
3876       new_volume = self.op.vg_name
3877       if not new_volume:
3878         new_volume = None
3879       if new_volume != self.cfg.GetVGName():
3880         self.cfg.SetVGName(new_volume)
3881       else:
3882         feedback_fn("Cluster LVM configuration already in desired"
3883                     " state, not changing")
3884     if self.op.drbd_helper is not None:
3885       new_helper = self.op.drbd_helper
3886       if not new_helper:
3887         new_helper = None
3888       if new_helper != self.cfg.GetDRBDHelper():
3889         self.cfg.SetDRBDHelper(new_helper)
3890       else:
3891         feedback_fn("Cluster DRBD helper already in desired state,"
3892                     " not changing")
3893     if self.op.hvparams:
3894       self.cluster.hvparams = self.new_hvparams
3895     if self.op.os_hvp:
3896       self.cluster.os_hvp = self.new_os_hvp
3897     if self.op.enabled_hypervisors is not None:
3898       self.cluster.hvparams = self.new_hvparams
3899       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3900     if self.op.beparams:
3901       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3902     if self.op.nicparams:
3903       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3904     if self.op.ipolicy:
3905       self.cluster.ipolicy = self.new_ipolicy
3906     if self.op.osparams:
3907       self.cluster.osparams = self.new_osp
3908     if self.op.ndparams:
3909       self.cluster.ndparams = self.new_ndparams
3910     if self.op.diskparams:
3911       self.cluster.diskparams = self.new_diskparams
3912     if self.op.hv_state:
3913       self.cluster.hv_state_static = self.new_hv_state
3914     if self.op.disk_state:
3915       self.cluster.disk_state_static = self.new_disk_state
3916
3917     if self.op.candidate_pool_size is not None:
3918       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3919       # we need to update the pool size here, otherwise the save will fail
3920       _AdjustCandidatePool(self, [])
3921
3922     if self.op.maintain_node_health is not None:
3923       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3924         feedback_fn("Note: CONFD was disabled at build time, node health"
3925                     " maintenance is not useful (still enabling it)")
3926       self.cluster.maintain_node_health = self.op.maintain_node_health
3927
3928     if self.op.prealloc_wipe_disks is not None:
3929       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3930
3931     if self.op.add_uids is not None:
3932       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3933
3934     if self.op.remove_uids is not None:
3935       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3936
3937     if self.op.uid_pool is not None:
3938       self.cluster.uid_pool = self.op.uid_pool
3939
3940     if self.op.default_iallocator is not None:
3941       self.cluster.default_iallocator = self.op.default_iallocator
3942
3943     if self.op.reserved_lvs is not None:
3944       self.cluster.reserved_lvs = self.op.reserved_lvs
3945
3946     if self.op.use_external_mip_script is not None:
3947       self.cluster.use_external_mip_script = self.op.use_external_mip_script
3948
3949     def helper_os(aname, mods, desc):
3950       desc += " OS list"
3951       lst = getattr(self.cluster, aname)
3952       for key, val in mods:
3953         if key == constants.DDM_ADD:
3954           if val in lst:
3955             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3956           else:
3957             lst.append(val)
3958         elif key == constants.DDM_REMOVE:
3959           if val in lst:
3960             lst.remove(val)
3961           else:
3962             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3963         else:
3964           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3965
3966     if self.op.hidden_os:
3967       helper_os("hidden_os", self.op.hidden_os, "hidden")
3968
3969     if self.op.blacklisted_os:
3970       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3971
3972     if self.op.master_netdev:
3973       master_params = self.cfg.GetMasterNetworkParameters()
3974       ems = self.cfg.GetUseExternalMipScript()
3975       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3976                   self.cluster.master_netdev)
3977       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3978                                                        master_params, ems)
3979       result.Raise("Could not disable the master ip")
3980       feedback_fn("Changing master_netdev from %s to %s" %
3981                   (master_params.netdev, self.op.master_netdev))
3982       self.cluster.master_netdev = self.op.master_netdev
3983
3984     if self.op.master_netmask:
3985       master_params = self.cfg.GetMasterNetworkParameters()
3986       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3987       result = self.rpc.call_node_change_master_netmask(master_params.name,
3988                                                         master_params.netmask,
3989                                                         self.op.master_netmask,
3990                                                         master_params.ip,
3991                                                         master_params.netdev)
3992       if result.fail_msg:
3993         msg = "Could not change the master IP netmask: %s" % result.fail_msg
3994         feedback_fn(msg)
3995
3996       self.cluster.master_netmask = self.op.master_netmask
3997
3998     self.cfg.Update(self.cluster, feedback_fn)
3999
4000     if self.op.master_netdev:
4001       master_params = self.cfg.GetMasterNetworkParameters()
4002       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4003                   self.op.master_netdev)
4004       ems = self.cfg.GetUseExternalMipScript()
4005       result = self.rpc.call_node_activate_master_ip(master_params.name,
4006                                                      master_params, ems)
4007       if result.fail_msg:
4008         self.LogWarning("Could not re-enable the master ip on"
4009                         " the master, please restart manually: %s",
4010                         result.fail_msg)
4011
4012
4013 def _UploadHelper(lu, nodes, fname):
4014   """Helper for uploading a file and showing warnings.
4015
4016   """
4017   if os.path.exists(fname):
4018     result = lu.rpc.call_upload_file(nodes, fname)
4019     for to_node, to_result in result.items():
4020       msg = to_result.fail_msg
4021       if msg:
4022         msg = ("Copy of file %s to node %s failed: %s" %
4023                (fname, to_node, msg))
4024         lu.proc.LogWarning(msg)
4025
4026
4027 def _ComputeAncillaryFiles(cluster, redist):
4028   """Compute files external to Ganeti which need to be consistent.
4029
4030   @type redist: boolean
4031   @param redist: Whether to include files which need to be redistributed
4032
4033   """
4034   # Compute files for all nodes
4035   files_all = set([
4036     constants.SSH_KNOWN_HOSTS_FILE,
4037     constants.CONFD_HMAC_KEY,
4038     constants.CLUSTER_DOMAIN_SECRET_FILE,
4039     constants.SPICE_CERT_FILE,
4040     constants.SPICE_CACERT_FILE,
4041     constants.RAPI_USERS_FILE,
4042     ])
4043
4044   if not redist:
4045     files_all.update(constants.ALL_CERT_FILES)
4046     files_all.update(ssconf.SimpleStore().GetFileList())
4047   else:
4048     # we need to ship at least the RAPI certificate
4049     files_all.add(constants.RAPI_CERT_FILE)
4050
4051   if cluster.modify_etc_hosts:
4052     files_all.add(constants.ETC_HOSTS)
4053
4054   # Files which are optional, these must:
4055   # - be present in one other category as well
4056   # - either exist or not exist on all nodes of that category (mc, vm all)
4057   files_opt = set([
4058     constants.RAPI_USERS_FILE,
4059     ])
4060
4061   # Files which should only be on master candidates
4062   files_mc = set()
4063
4064   if not redist:
4065     files_mc.add(constants.CLUSTER_CONF_FILE)
4066
4067     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4068     # replication
4069     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4070
4071   # Files which should only be on VM-capable nodes
4072   files_vm = set(filename
4073     for hv_name in cluster.enabled_hypervisors
4074     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4075
4076   files_opt |= set(filename
4077     for hv_name in cluster.enabled_hypervisors
4078     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4079
4080   # Filenames in each category must be unique
4081   all_files_set = files_all | files_mc | files_vm
4082   assert (len(all_files_set) ==
4083           sum(map(len, [files_all, files_mc, files_vm]))), \
4084          "Found file listed in more than one file list"
4085
4086   # Optional files must be present in one other category
4087   assert all_files_set.issuperset(files_opt), \
4088          "Optional file not in a different required list"
4089
4090   return (files_all, files_opt, files_mc, files_vm)
4091
4092
4093 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4094   """Distribute additional files which are part of the cluster configuration.
4095
4096   ConfigWriter takes care of distributing the config and ssconf files, but
4097   there are more files which should be distributed to all nodes. This function
4098   makes sure those are copied.
4099
4100   @param lu: calling logical unit
4101   @param additional_nodes: list of nodes not in the config to distribute to
4102   @type additional_vm: boolean
4103   @param additional_vm: whether the additional nodes are vm-capable or not
4104
4105   """
4106   # Gather target nodes
4107   cluster = lu.cfg.GetClusterInfo()
4108   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4109
4110   online_nodes = lu.cfg.GetOnlineNodeList()
4111   vm_nodes = lu.cfg.GetVmCapableNodeList()
4112
4113   if additional_nodes is not None:
4114     online_nodes.extend(additional_nodes)
4115     if additional_vm:
4116       vm_nodes.extend(additional_nodes)
4117
4118   # Never distribute to master node
4119   for nodelist in [online_nodes, vm_nodes]:
4120     if master_info.name in nodelist:
4121       nodelist.remove(master_info.name)
4122
4123   # Gather file lists
4124   (files_all, _, files_mc, files_vm) = \
4125     _ComputeAncillaryFiles(cluster, True)
4126
4127   # Never re-distribute configuration file from here
4128   assert not (constants.CLUSTER_CONF_FILE in files_all or
4129               constants.CLUSTER_CONF_FILE in files_vm)
4130   assert not files_mc, "Master candidates not handled in this function"
4131
4132   filemap = [
4133     (online_nodes, files_all),
4134     (vm_nodes, files_vm),
4135     ]
4136
4137   # Upload the files
4138   for (node_list, files) in filemap:
4139     for fname in files:
4140       _UploadHelper(lu, node_list, fname)
4141
4142
4143 class LUClusterRedistConf(NoHooksLU):
4144   """Force the redistribution of cluster configuration.
4145
4146   This is a very simple LU.
4147
4148   """
4149   REQ_BGL = False
4150
4151   def ExpandNames(self):
4152     self.needed_locks = {
4153       locking.LEVEL_NODE: locking.ALL_SET,
4154     }
4155     self.share_locks[locking.LEVEL_NODE] = 1
4156
4157   def Exec(self, feedback_fn):
4158     """Redistribute the configuration.
4159
4160     """
4161     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4162     _RedistributeAncillaryFiles(self)
4163
4164
4165 class LUClusterActivateMasterIp(NoHooksLU):
4166   """Activate the master IP on the master node.
4167
4168   """
4169   def Exec(self, feedback_fn):
4170     """Activate the master IP.
4171
4172     """
4173     master_params = self.cfg.GetMasterNetworkParameters()
4174     ems = self.cfg.GetUseExternalMipScript()
4175     result = self.rpc.call_node_activate_master_ip(master_params.name,
4176                                                    master_params, ems)
4177     result.Raise("Could not activate the master IP")
4178
4179
4180 class LUClusterDeactivateMasterIp(NoHooksLU):
4181   """Deactivate the master IP on the master node.
4182
4183   """
4184   def Exec(self, feedback_fn):
4185     """Deactivate the master IP.
4186
4187     """
4188     master_params = self.cfg.GetMasterNetworkParameters()
4189     ems = self.cfg.GetUseExternalMipScript()
4190     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4191                                                      master_params, ems)
4192     result.Raise("Could not deactivate the master IP")
4193
4194
4195 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4196   """Sleep and poll for an instance's disk to sync.
4197
4198   """
4199   if not instance.disks or disks is not None and not disks:
4200     return True
4201
4202   disks = _ExpandCheckDisks(instance, disks)
4203
4204   if not oneshot:
4205     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4206
4207   node = instance.primary_node
4208
4209   for dev in disks:
4210     lu.cfg.SetDiskID(dev, node)
4211
4212   # TODO: Convert to utils.Retry
4213
4214   retries = 0
4215   degr_retries = 10 # in seconds, as we sleep 1 second each time
4216   while True:
4217     max_time = 0
4218     done = True
4219     cumul_degraded = False
4220     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4221     msg = rstats.fail_msg
4222     if msg:
4223       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4224       retries += 1
4225       if retries >= 10:
4226         raise errors.RemoteError("Can't contact node %s for mirror data,"
4227                                  " aborting." % node)
4228       time.sleep(6)
4229       continue
4230     rstats = rstats.payload
4231     retries = 0
4232     for i, mstat in enumerate(rstats):
4233       if mstat is None:
4234         lu.LogWarning("Can't compute data for node %s/%s",
4235                            node, disks[i].iv_name)
4236         continue
4237
4238       cumul_degraded = (cumul_degraded or
4239                         (mstat.is_degraded and mstat.sync_percent is None))
4240       if mstat.sync_percent is not None:
4241         done = False
4242         if mstat.estimated_time is not None:
4243           rem_time = ("%s remaining (estimated)" %
4244                       utils.FormatSeconds(mstat.estimated_time))
4245           max_time = mstat.estimated_time
4246         else:
4247           rem_time = "no time estimate"
4248         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4249                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4250
4251     # if we're done but degraded, let's do a few small retries, to
4252     # make sure we see a stable and not transient situation; therefore
4253     # we force restart of the loop
4254     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4255       logging.info("Degraded disks found, %d retries left", degr_retries)
4256       degr_retries -= 1
4257       time.sleep(1)
4258       continue
4259
4260     if done or oneshot:
4261       break
4262
4263     time.sleep(min(60, max_time))
4264
4265   if done:
4266     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4267   return not cumul_degraded
4268
4269
4270 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4271   """Check that mirrors are not degraded.
4272
4273   The ldisk parameter, if True, will change the test from the
4274   is_degraded attribute (which represents overall non-ok status for
4275   the device(s)) to the ldisk (representing the local storage status).
4276
4277   """
4278   lu.cfg.SetDiskID(dev, node)
4279
4280   result = True
4281
4282   if on_primary or dev.AssembleOnSecondary():
4283     rstats = lu.rpc.call_blockdev_find(node, dev)
4284     msg = rstats.fail_msg
4285     if msg:
4286       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4287       result = False
4288     elif not rstats.payload:
4289       lu.LogWarning("Can't find disk on node %s", node)
4290       result = False
4291     else:
4292       if ldisk:
4293         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4294       else:
4295         result = result and not rstats.payload.is_degraded
4296
4297   if dev.children:
4298     for child in dev.children:
4299       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4300
4301   return result
4302
4303
4304 class LUOobCommand(NoHooksLU):
4305   """Logical unit for OOB handling.
4306
4307   """
4308   REG_BGL = False
4309   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4310
4311   def ExpandNames(self):
4312     """Gather locks we need.
4313
4314     """
4315     if self.op.node_names:
4316       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4317       lock_names = self.op.node_names
4318     else:
4319       lock_names = locking.ALL_SET
4320
4321     self.needed_locks = {
4322       locking.LEVEL_NODE: lock_names,
4323       }
4324
4325   def CheckPrereq(self):
4326     """Check prerequisites.
4327
4328     This checks:
4329      - the node exists in the configuration
4330      - OOB is supported
4331
4332     Any errors are signaled by raising errors.OpPrereqError.
4333
4334     """
4335     self.nodes = []
4336     self.master_node = self.cfg.GetMasterNode()
4337
4338     assert self.op.power_delay >= 0.0
4339
4340     if self.op.node_names:
4341       if (self.op.command in self._SKIP_MASTER and
4342           self.master_node in self.op.node_names):
4343         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4344         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4345
4346         if master_oob_handler:
4347           additional_text = ("run '%s %s %s' if you want to operate on the"
4348                              " master regardless") % (master_oob_handler,
4349                                                       self.op.command,
4350                                                       self.master_node)
4351         else:
4352           additional_text = "it does not support out-of-band operations"
4353
4354         raise errors.OpPrereqError(("Operating on the master node %s is not"
4355                                     " allowed for %s; %s") %
4356                                    (self.master_node, self.op.command,
4357                                     additional_text), errors.ECODE_INVAL)
4358     else:
4359       self.op.node_names = self.cfg.GetNodeList()
4360       if self.op.command in self._SKIP_MASTER:
4361         self.op.node_names.remove(self.master_node)
4362
4363     if self.op.command in self._SKIP_MASTER:
4364       assert self.master_node not in self.op.node_names
4365
4366     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4367       if node is None:
4368         raise errors.OpPrereqError("Node %s not found" % node_name,
4369                                    errors.ECODE_NOENT)
4370       else:
4371         self.nodes.append(node)
4372
4373       if (not self.op.ignore_status and
4374           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4375         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4376                                     " not marked offline") % node_name,
4377                                    errors.ECODE_STATE)
4378
4379   def Exec(self, feedback_fn):
4380     """Execute OOB and return result if we expect any.
4381
4382     """
4383     master_node = self.master_node
4384     ret = []
4385
4386     for idx, node in enumerate(utils.NiceSort(self.nodes,
4387                                               key=lambda node: node.name)):
4388       node_entry = [(constants.RS_NORMAL, node.name)]
4389       ret.append(node_entry)
4390
4391       oob_program = _SupportsOob(self.cfg, node)
4392
4393       if not oob_program:
4394         node_entry.append((constants.RS_UNAVAIL, None))
4395         continue
4396
4397       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4398                    self.op.command, oob_program, node.name)
4399       result = self.rpc.call_run_oob(master_node, oob_program,
4400                                      self.op.command, node.name,
4401                                      self.op.timeout)
4402
4403       if result.fail_msg:
4404         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4405                         node.name, result.fail_msg)
4406         node_entry.append((constants.RS_NODATA, None))
4407       else:
4408         try:
4409           self._CheckPayload(result)
4410         except errors.OpExecError, err:
4411           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4412                           node.name, err)
4413           node_entry.append((constants.RS_NODATA, None))
4414         else:
4415           if self.op.command == constants.OOB_HEALTH:
4416             # For health we should log important events
4417             for item, status in result.payload:
4418               if status in [constants.OOB_STATUS_WARNING,
4419                             constants.OOB_STATUS_CRITICAL]:
4420                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4421                                 item, node.name, status)
4422
4423           if self.op.command == constants.OOB_POWER_ON:
4424             node.powered = True
4425           elif self.op.command == constants.OOB_POWER_OFF:
4426             node.powered = False
4427           elif self.op.command == constants.OOB_POWER_STATUS:
4428             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4429             if powered != node.powered:
4430               logging.warning(("Recorded power state (%s) of node '%s' does not"
4431                                " match actual power state (%s)"), node.powered,
4432                               node.name, powered)
4433
4434           # For configuration changing commands we should update the node
4435           if self.op.command in (constants.OOB_POWER_ON,
4436                                  constants.OOB_POWER_OFF):
4437             self.cfg.Update(node, feedback_fn)
4438
4439           node_entry.append((constants.RS_NORMAL, result.payload))
4440
4441           if (self.op.command == constants.OOB_POWER_ON and
4442               idx < len(self.nodes) - 1):
4443             time.sleep(self.op.power_delay)
4444
4445     return ret
4446
4447   def _CheckPayload(self, result):
4448     """Checks if the payload is valid.
4449
4450     @param result: RPC result
4451     @raises errors.OpExecError: If payload is not valid
4452
4453     """
4454     errs = []
4455     if self.op.command == constants.OOB_HEALTH:
4456       if not isinstance(result.payload, list):
4457         errs.append("command 'health' is expected to return a list but got %s" %
4458                     type(result.payload))
4459       else:
4460         for item, status in result.payload:
4461           if status not in constants.OOB_STATUSES:
4462             errs.append("health item '%s' has invalid status '%s'" %
4463                         (item, status))
4464
4465     if self.op.command == constants.OOB_POWER_STATUS:
4466       if not isinstance(result.payload, dict):
4467         errs.append("power-status is expected to return a dict but got %s" %
4468                     type(result.payload))
4469
4470     if self.op.command in [
4471         constants.OOB_POWER_ON,
4472         constants.OOB_POWER_OFF,
4473         constants.OOB_POWER_CYCLE,
4474         ]:
4475       if result.payload is not None:
4476         errs.append("%s is expected to not return payload but got '%s'" %
4477                     (self.op.command, result.payload))
4478
4479     if errs:
4480       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4481                                utils.CommaJoin(errs))
4482
4483
4484 class _OsQuery(_QueryBase):
4485   FIELDS = query.OS_FIELDS
4486
4487   def ExpandNames(self, lu):
4488     # Lock all nodes in shared mode
4489     # Temporary removal of locks, should be reverted later
4490     # TODO: reintroduce locks when they are lighter-weight
4491     lu.needed_locks = {}
4492     #self.share_locks[locking.LEVEL_NODE] = 1
4493     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4494
4495     # The following variables interact with _QueryBase._GetNames
4496     if self.names:
4497       self.wanted = self.names
4498     else:
4499       self.wanted = locking.ALL_SET
4500
4501     self.do_locking = self.use_locking
4502
4503   def DeclareLocks(self, lu, level):
4504     pass
4505
4506   @staticmethod
4507   def _DiagnoseByOS(rlist):
4508     """Remaps a per-node return list into an a per-os per-node dictionary
4509
4510     @param rlist: a map with node names as keys and OS objects as values
4511
4512     @rtype: dict
4513     @return: a dictionary with osnames as keys and as value another
4514         map, with nodes as keys and tuples of (path, status, diagnose,
4515         variants, parameters, api_versions) as values, eg::
4516
4517           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4518                                      (/srv/..., False, "invalid api")],
4519                            "node2": [(/srv/..., True, "", [], [])]}
4520           }
4521
4522     """
4523     all_os = {}
4524     # we build here the list of nodes that didn't fail the RPC (at RPC
4525     # level), so that nodes with a non-responding node daemon don't
4526     # make all OSes invalid
4527     good_nodes = [node_name for node_name in rlist
4528                   if not rlist[node_name].fail_msg]
4529     for node_name, nr in rlist.items():
4530       if nr.fail_msg or not nr.payload:
4531         continue
4532       for (name, path, status, diagnose, variants,
4533            params, api_versions) in nr.payload:
4534         if name not in all_os:
4535           # build a list of nodes for this os containing empty lists
4536           # for each node in node_list
4537           all_os[name] = {}
4538           for nname in good_nodes:
4539             all_os[name][nname] = []
4540         # convert params from [name, help] to (name, help)
4541         params = [tuple(v) for v in params]
4542         all_os[name][node_name].append((path, status, diagnose,
4543                                         variants, params, api_versions))
4544     return all_os
4545
4546   def _GetQueryData(self, lu):
4547     """Computes the list of nodes and their attributes.
4548
4549     """
4550     # Locking is not used
4551     assert not (compat.any(lu.glm.is_owned(level)
4552                            for level in locking.LEVELS
4553                            if level != locking.LEVEL_CLUSTER) or
4554                 self.do_locking or self.use_locking)
4555
4556     valid_nodes = [node.name
4557                    for node in lu.cfg.GetAllNodesInfo().values()
4558                    if not node.offline and node.vm_capable]
4559     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4560     cluster = lu.cfg.GetClusterInfo()
4561
4562     data = {}
4563
4564     for (os_name, os_data) in pol.items():
4565       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4566                           hidden=(os_name in cluster.hidden_os),
4567                           blacklisted=(os_name in cluster.blacklisted_os))
4568
4569       variants = set()
4570       parameters = set()
4571       api_versions = set()
4572
4573       for idx, osl in enumerate(os_data.values()):
4574         info.valid = bool(info.valid and osl and osl[0][1])
4575         if not info.valid:
4576           break
4577
4578         (node_variants, node_params, node_api) = osl[0][3:6]
4579         if idx == 0:
4580           # First entry
4581           variants.update(node_variants)
4582           parameters.update(node_params)
4583           api_versions.update(node_api)
4584         else:
4585           # Filter out inconsistent values
4586           variants.intersection_update(node_variants)
4587           parameters.intersection_update(node_params)
4588           api_versions.intersection_update(node_api)
4589
4590       info.variants = list(variants)
4591       info.parameters = list(parameters)
4592       info.api_versions = list(api_versions)
4593
4594       data[os_name] = info
4595
4596     # Prepare data in requested order
4597     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4598             if name in data]
4599
4600
4601 class LUOsDiagnose(NoHooksLU):
4602   """Logical unit for OS diagnose/query.
4603
4604   """
4605   REQ_BGL = False
4606
4607   @staticmethod
4608   def _BuildFilter(fields, names):
4609     """Builds a filter for querying OSes.
4610
4611     """
4612     name_filter = qlang.MakeSimpleFilter("name", names)
4613
4614     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4615     # respective field is not requested
4616     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4617                      for fname in ["hidden", "blacklisted"]
4618                      if fname not in fields]
4619     if "valid" not in fields:
4620       status_filter.append([qlang.OP_TRUE, "valid"])
4621
4622     if status_filter:
4623       status_filter.insert(0, qlang.OP_AND)
4624     else:
4625       status_filter = None
4626
4627     if name_filter and status_filter:
4628       return [qlang.OP_AND, name_filter, status_filter]
4629     elif name_filter:
4630       return name_filter
4631     else:
4632       return status_filter
4633
4634   def CheckArguments(self):
4635     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4636                        self.op.output_fields, False)
4637
4638   def ExpandNames(self):
4639     self.oq.ExpandNames(self)
4640
4641   def Exec(self, feedback_fn):
4642     return self.oq.OldStyleQuery(self)
4643
4644
4645 class LUNodeRemove(LogicalUnit):
4646   """Logical unit for removing a node.
4647
4648   """
4649   HPATH = "node-remove"
4650   HTYPE = constants.HTYPE_NODE
4651
4652   def BuildHooksEnv(self):
4653     """Build hooks env.
4654
4655     This doesn't run on the target node in the pre phase as a failed
4656     node would then be impossible to remove.
4657
4658     """
4659     return {
4660       "OP_TARGET": self.op.node_name,
4661       "NODE_NAME": self.op.node_name,
4662       }
4663
4664   def BuildHooksNodes(self):
4665     """Build hooks nodes.
4666
4667     """
4668     all_nodes = self.cfg.GetNodeList()
4669     try:
4670       all_nodes.remove(self.op.node_name)
4671     except ValueError:
4672       logging.warning("Node '%s', which is about to be removed, was not found"
4673                       " in the list of all nodes", self.op.node_name)
4674     return (all_nodes, all_nodes)
4675
4676   def CheckPrereq(self):
4677     """Check prerequisites.
4678
4679     This checks:
4680      - the node exists in the configuration
4681      - it does not have primary or secondary instances
4682      - it's not the master
4683
4684     Any errors are signaled by raising errors.OpPrereqError.
4685
4686     """
4687     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4688     node = self.cfg.GetNodeInfo(self.op.node_name)
4689     assert node is not None
4690
4691     masternode = self.cfg.GetMasterNode()
4692     if node.name == masternode:
4693       raise errors.OpPrereqError("Node is the master node, failover to another"
4694                                  " node is required", errors.ECODE_INVAL)
4695
4696     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4697       if node.name in instance.all_nodes:
4698         raise errors.OpPrereqError("Instance %s is still running on the node,"
4699                                    " please remove first" % instance_name,
4700                                    errors.ECODE_INVAL)
4701     self.op.node_name = node.name
4702     self.node = node
4703
4704   def Exec(self, feedback_fn):
4705     """Removes the node from the cluster.
4706
4707     """
4708     node = self.node
4709     logging.info("Stopping the node daemon and removing configs from node %s",
4710                  node.name)
4711
4712     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4713
4714     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4715       "Not owning BGL"
4716
4717     # Promote nodes to master candidate as needed
4718     _AdjustCandidatePool(self, exceptions=[node.name])
4719     self.context.RemoveNode(node.name)
4720
4721     # Run post hooks on the node before it's removed
4722     _RunPostHook(self, node.name)
4723
4724     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4725     msg = result.fail_msg
4726     if msg:
4727       self.LogWarning("Errors encountered on the remote node while leaving"
4728                       " the cluster: %s", msg)
4729
4730     # Remove node from our /etc/hosts
4731     if self.cfg.GetClusterInfo().modify_etc_hosts:
4732       master_node = self.cfg.GetMasterNode()
4733       result = self.rpc.call_etc_hosts_modify(master_node,
4734                                               constants.ETC_HOSTS_REMOVE,
4735                                               node.name, None)
4736       result.Raise("Can't update hosts file with new host data")
4737       _RedistributeAncillaryFiles(self)
4738
4739
4740 class _NodeQuery(_QueryBase):
4741   FIELDS = query.NODE_FIELDS
4742
4743   def ExpandNames(self, lu):
4744     lu.needed_locks = {}
4745     lu.share_locks = _ShareAll()
4746
4747     if self.names:
4748       self.wanted = _GetWantedNodes(lu, self.names)
4749     else:
4750       self.wanted = locking.ALL_SET
4751
4752     self.do_locking = (self.use_locking and
4753                        query.NQ_LIVE in self.requested_data)
4754
4755     if self.do_locking:
4756       # If any non-static field is requested we need to lock the nodes
4757       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4758
4759   def DeclareLocks(self, lu, level):
4760     pass
4761
4762   def _GetQueryData(self, lu):
4763     """Computes the list of nodes and their attributes.
4764
4765     """
4766     all_info = lu.cfg.GetAllNodesInfo()
4767
4768     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4769
4770     # Gather data as requested
4771     if query.NQ_LIVE in self.requested_data:
4772       # filter out non-vm_capable nodes
4773       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4774
4775       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4776                                         [lu.cfg.GetHypervisorType()])
4777       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4778                        for (name, nresult) in node_data.items()
4779                        if not nresult.fail_msg and nresult.payload)
4780     else:
4781       live_data = None
4782
4783     if query.NQ_INST in self.requested_data:
4784       node_to_primary = dict([(name, set()) for name in nodenames])
4785       node_to_secondary = dict([(name, set()) for name in nodenames])
4786
4787       inst_data = lu.cfg.GetAllInstancesInfo()
4788
4789       for inst in inst_data.values():
4790         if inst.primary_node in node_to_primary:
4791           node_to_primary[inst.primary_node].add(inst.name)
4792         for secnode in inst.secondary_nodes:
4793           if secnode in node_to_secondary:
4794             node_to_secondary[secnode].add(inst.name)
4795     else:
4796       node_to_primary = None
4797       node_to_secondary = None
4798
4799     if query.NQ_OOB in self.requested_data:
4800       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4801                          for name, node in all_info.iteritems())
4802     else:
4803       oob_support = None
4804
4805     if query.NQ_GROUP in self.requested_data:
4806       groups = lu.cfg.GetAllNodeGroupsInfo()
4807     else:
4808       groups = {}
4809
4810     return query.NodeQueryData([all_info[name] for name in nodenames],
4811                                live_data, lu.cfg.GetMasterNode(),
4812                                node_to_primary, node_to_secondary, groups,
4813                                oob_support, lu.cfg.GetClusterInfo())
4814
4815
4816 class LUNodeQuery(NoHooksLU):
4817   """Logical unit for querying nodes.
4818
4819   """
4820   # pylint: disable=W0142
4821   REQ_BGL = False
4822
4823   def CheckArguments(self):
4824     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4825                          self.op.output_fields, self.op.use_locking)
4826
4827   def ExpandNames(self):
4828     self.nq.ExpandNames(self)
4829
4830   def DeclareLocks(self, level):
4831     self.nq.DeclareLocks(self, level)
4832
4833   def Exec(self, feedback_fn):
4834     return self.nq.OldStyleQuery(self)
4835
4836
4837 class LUNodeQueryvols(NoHooksLU):
4838   """Logical unit for getting volumes on node(s).
4839
4840   """
4841   REQ_BGL = False
4842   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4843   _FIELDS_STATIC = utils.FieldSet("node")
4844
4845   def CheckArguments(self):
4846     _CheckOutputFields(static=self._FIELDS_STATIC,
4847                        dynamic=self._FIELDS_DYNAMIC,
4848                        selected=self.op.output_fields)
4849
4850   def ExpandNames(self):
4851     self.share_locks = _ShareAll()
4852     self.needed_locks = {}
4853
4854     if not self.op.nodes:
4855       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4856     else:
4857       self.needed_locks[locking.LEVEL_NODE] = \
4858         _GetWantedNodes(self, self.op.nodes)
4859
4860   def Exec(self, feedback_fn):
4861     """Computes the list of nodes and their attributes.
4862
4863     """
4864     nodenames = self.owned_locks(locking.LEVEL_NODE)
4865     volumes = self.rpc.call_node_volumes(nodenames)
4866
4867     ilist = self.cfg.GetAllInstancesInfo()
4868     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4869
4870     output = []
4871     for node in nodenames:
4872       nresult = volumes[node]
4873       if nresult.offline:
4874         continue
4875       msg = nresult.fail_msg
4876       if msg:
4877         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4878         continue
4879
4880       node_vols = sorted(nresult.payload,
4881                          key=operator.itemgetter("dev"))
4882
4883       for vol in node_vols:
4884         node_output = []
4885         for field in self.op.output_fields:
4886           if field == "node":
4887             val = node
4888           elif field == "phys":
4889             val = vol["dev"]
4890           elif field == "vg":
4891             val = vol["vg"]
4892           elif field == "name":
4893             val = vol["name"]
4894           elif field == "size":
4895             val = int(float(vol["size"]))
4896           elif field == "instance":
4897             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4898           else:
4899             raise errors.ParameterError(field)
4900           node_output.append(str(val))
4901
4902         output.append(node_output)
4903
4904     return output
4905
4906
4907 class LUNodeQueryStorage(NoHooksLU):
4908   """Logical unit for getting information on storage units on node(s).
4909
4910   """
4911   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4912   REQ_BGL = False
4913
4914   def CheckArguments(self):
4915     _CheckOutputFields(static=self._FIELDS_STATIC,
4916                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4917                        selected=self.op.output_fields)
4918
4919   def ExpandNames(self):
4920     self.share_locks = _ShareAll()
4921     self.needed_locks = {}
4922
4923     if self.op.nodes:
4924       self.needed_locks[locking.LEVEL_NODE] = \
4925         _GetWantedNodes(self, self.op.nodes)
4926     else:
4927       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4928
4929   def Exec(self, feedback_fn):
4930     """Computes the list of nodes and their attributes.
4931
4932     """
4933     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4934
4935     # Always get name to sort by
4936     if constants.SF_NAME in self.op.output_fields:
4937       fields = self.op.output_fields[:]
4938     else:
4939       fields = [constants.SF_NAME] + self.op.output_fields
4940
4941     # Never ask for node or type as it's only known to the LU
4942     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4943       while extra in fields:
4944         fields.remove(extra)
4945
4946     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4947     name_idx = field_idx[constants.SF_NAME]
4948
4949     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4950     data = self.rpc.call_storage_list(self.nodes,
4951                                       self.op.storage_type, st_args,
4952                                       self.op.name, fields)
4953
4954     result = []
4955
4956     for node in utils.NiceSort(self.nodes):
4957       nresult = data[node]
4958       if nresult.offline:
4959         continue
4960
4961       msg = nresult.fail_msg
4962       if msg:
4963         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4964         continue
4965
4966       rows = dict([(row[name_idx], row) for row in nresult.payload])
4967
4968       for name in utils.NiceSort(rows.keys()):
4969         row = rows[name]
4970
4971         out = []
4972
4973         for field in self.op.output_fields:
4974           if field == constants.SF_NODE:
4975             val = node
4976           elif field == constants.SF_TYPE:
4977             val = self.op.storage_type
4978           elif field in field_idx:
4979             val = row[field_idx[field]]
4980           else:
4981             raise errors.ParameterError(field)
4982
4983           out.append(val)
4984
4985         result.append(out)
4986
4987     return result
4988
4989
4990 class _InstanceQuery(_QueryBase):
4991   FIELDS = query.INSTANCE_FIELDS
4992
4993   def ExpandNames(self, lu):
4994     lu.needed_locks = {}
4995     lu.share_locks = _ShareAll()
4996
4997     if self.names:
4998       self.wanted = _GetWantedInstances(lu, self.names)
4999     else:
5000       self.wanted = locking.ALL_SET
5001
5002     self.do_locking = (self.use_locking and
5003                        query.IQ_LIVE in self.requested_data)
5004     if self.do_locking:
5005       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5006       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5007       lu.needed_locks[locking.LEVEL_NODE] = []
5008       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5009
5010     self.do_grouplocks = (self.do_locking and
5011                           query.IQ_NODES in self.requested_data)
5012
5013   def DeclareLocks(self, lu, level):
5014     if self.do_locking:
5015       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5016         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5017
5018         # Lock all groups used by instances optimistically; this requires going
5019         # via the node before it's locked, requiring verification later on
5020         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5021           set(group_uuid
5022               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5023               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5024       elif level == locking.LEVEL_NODE:
5025         lu._LockInstancesNodes() # pylint: disable=W0212
5026
5027   @staticmethod
5028   def _CheckGroupLocks(lu):
5029     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5030     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5031
5032     # Check if node groups for locked instances are still correct
5033     for instance_name in owned_instances:
5034       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5035
5036   def _GetQueryData(self, lu):
5037     """Computes the list of instances and their attributes.
5038
5039     """
5040     if self.do_grouplocks:
5041       self._CheckGroupLocks(lu)
5042
5043     cluster = lu.cfg.GetClusterInfo()
5044     all_info = lu.cfg.GetAllInstancesInfo()
5045
5046     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5047
5048     instance_list = [all_info[name] for name in instance_names]
5049     nodes = frozenset(itertools.chain(*(inst.all_nodes
5050                                         for inst in instance_list)))
5051     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5052     bad_nodes = []
5053     offline_nodes = []
5054     wrongnode_inst = set()
5055
5056     # Gather data as requested
5057     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5058       live_data = {}
5059       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5060       for name in nodes:
5061         result = node_data[name]
5062         if result.offline:
5063           # offline nodes will be in both lists
5064           assert result.fail_msg
5065           offline_nodes.append(name)
5066         if result.fail_msg:
5067           bad_nodes.append(name)
5068         elif result.payload:
5069           for inst in result.payload:
5070             if inst in all_info:
5071               if all_info[inst].primary_node == name:
5072                 live_data.update(result.payload)
5073               else:
5074                 wrongnode_inst.add(inst)
5075             else:
5076               # orphan instance; we don't list it here as we don't
5077               # handle this case yet in the output of instance listing
5078               logging.warning("Orphan instance '%s' found on node %s",
5079                               inst, name)
5080         # else no instance is alive
5081     else:
5082       live_data = {}
5083
5084     if query.IQ_DISKUSAGE in self.requested_data:
5085       disk_usage = dict((inst.name,
5086                          _ComputeDiskSize(inst.disk_template,
5087                                           [{constants.IDISK_SIZE: disk.size}
5088                                            for disk in inst.disks]))
5089                         for inst in instance_list)
5090     else:
5091       disk_usage = None
5092
5093     if query.IQ_CONSOLE in self.requested_data:
5094       consinfo = {}
5095       for inst in instance_list:
5096         if inst.name in live_data:
5097           # Instance is running
5098           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5099         else:
5100           consinfo[inst.name] = None
5101       assert set(consinfo.keys()) == set(instance_names)
5102     else:
5103       consinfo = None
5104
5105     if query.IQ_NODES in self.requested_data:
5106       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5107                                             instance_list)))
5108       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5109       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5110                     for uuid in set(map(operator.attrgetter("group"),
5111                                         nodes.values())))
5112     else:
5113       nodes = None
5114       groups = None
5115
5116     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5117                                    disk_usage, offline_nodes, bad_nodes,
5118                                    live_data, wrongnode_inst, consinfo,
5119                                    nodes, groups)
5120
5121
5122 class LUQuery(NoHooksLU):
5123   """Query for resources/items of a certain kind.
5124
5125   """
5126   # pylint: disable=W0142
5127   REQ_BGL = False
5128
5129   def CheckArguments(self):
5130     qcls = _GetQueryImplementation(self.op.what)
5131
5132     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5133
5134   def ExpandNames(self):
5135     self.impl.ExpandNames(self)
5136
5137   def DeclareLocks(self, level):
5138     self.impl.DeclareLocks(self, level)
5139
5140   def Exec(self, feedback_fn):
5141     return self.impl.NewStyleQuery(self)
5142
5143
5144 class LUQueryFields(NoHooksLU):
5145   """Query for resources/items of a certain kind.
5146
5147   """
5148   # pylint: disable=W0142
5149   REQ_BGL = False
5150
5151   def CheckArguments(self):
5152     self.qcls = _GetQueryImplementation(self.op.what)
5153
5154   def ExpandNames(self):
5155     self.needed_locks = {}
5156
5157   def Exec(self, feedback_fn):
5158     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5159
5160
5161 class LUNodeModifyStorage(NoHooksLU):
5162   """Logical unit for modifying a storage volume on a node.
5163
5164   """
5165   REQ_BGL = False
5166
5167   def CheckArguments(self):
5168     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5169
5170     storage_type = self.op.storage_type
5171
5172     try:
5173       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5174     except KeyError:
5175       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5176                                  " modified" % storage_type,
5177                                  errors.ECODE_INVAL)
5178
5179     diff = set(self.op.changes.keys()) - modifiable
5180     if diff:
5181       raise errors.OpPrereqError("The following fields can not be modified for"
5182                                  " storage units of type '%s': %r" %
5183                                  (storage_type, list(diff)),
5184                                  errors.ECODE_INVAL)
5185
5186   def ExpandNames(self):
5187     self.needed_locks = {
5188       locking.LEVEL_NODE: self.op.node_name,
5189       }
5190
5191   def Exec(self, feedback_fn):
5192     """Computes the list of nodes and their attributes.
5193
5194     """
5195     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5196     result = self.rpc.call_storage_modify(self.op.node_name,
5197                                           self.op.storage_type, st_args,
5198                                           self.op.name, self.op.changes)
5199     result.Raise("Failed to modify storage unit '%s' on %s" %
5200                  (self.op.name, self.op.node_name))
5201
5202
5203 class LUNodeAdd(LogicalUnit):
5204   """Logical unit for adding node to the cluster.
5205
5206   """
5207   HPATH = "node-add"
5208   HTYPE = constants.HTYPE_NODE
5209   _NFLAGS = ["master_capable", "vm_capable"]
5210
5211   def CheckArguments(self):
5212     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5213     # validate/normalize the node name
5214     self.hostname = netutils.GetHostname(name=self.op.node_name,
5215                                          family=self.primary_ip_family)
5216     self.op.node_name = self.hostname.name
5217
5218     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5219       raise errors.OpPrereqError("Cannot readd the master node",
5220                                  errors.ECODE_STATE)
5221
5222     if self.op.readd and self.op.group:
5223       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5224                                  " being readded", errors.ECODE_INVAL)
5225
5226   def BuildHooksEnv(self):
5227     """Build hooks env.
5228
5229     This will run on all nodes before, and on all nodes + the new node after.
5230
5231     """
5232     return {
5233       "OP_TARGET": self.op.node_name,
5234       "NODE_NAME": self.op.node_name,
5235       "NODE_PIP": self.op.primary_ip,
5236       "NODE_SIP": self.op.secondary_ip,
5237       "MASTER_CAPABLE": str(self.op.master_capable),
5238       "VM_CAPABLE": str(self.op.vm_capable),
5239       }
5240
5241   def BuildHooksNodes(self):
5242     """Build hooks nodes.
5243
5244     """
5245     # Exclude added node
5246     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5247     post_nodes = pre_nodes + [self.op.node_name, ]
5248
5249     return (pre_nodes, post_nodes)
5250
5251   def CheckPrereq(self):
5252     """Check prerequisites.
5253
5254     This checks:
5255      - the new node is not already in the config
5256      - it is resolvable
5257      - its parameters (single/dual homed) matches the cluster
5258
5259     Any errors are signaled by raising errors.OpPrereqError.
5260
5261     """
5262     cfg = self.cfg
5263     hostname = self.hostname
5264     node = hostname.name
5265     primary_ip = self.op.primary_ip = hostname.ip
5266     if self.op.secondary_ip is None:
5267       if self.primary_ip_family == netutils.IP6Address.family:
5268         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5269                                    " IPv4 address must be given as secondary",
5270                                    errors.ECODE_INVAL)
5271       self.op.secondary_ip = primary_ip
5272
5273     secondary_ip = self.op.secondary_ip
5274     if not netutils.IP4Address.IsValid(secondary_ip):
5275       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5276                                  " address" % secondary_ip, errors.ECODE_INVAL)
5277
5278     node_list = cfg.GetNodeList()
5279     if not self.op.readd and node in node_list:
5280       raise errors.OpPrereqError("Node %s is already in the configuration" %
5281                                  node, errors.ECODE_EXISTS)
5282     elif self.op.readd and node not in node_list:
5283       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5284                                  errors.ECODE_NOENT)
5285
5286     self.changed_primary_ip = False
5287
5288     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5289       if self.op.readd and node == existing_node_name:
5290         if existing_node.secondary_ip != secondary_ip:
5291           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5292                                      " address configuration as before",
5293                                      errors.ECODE_INVAL)
5294         if existing_node.primary_ip != primary_ip:
5295           self.changed_primary_ip = True
5296
5297         continue
5298
5299       if (existing_node.primary_ip == primary_ip or
5300           existing_node.secondary_ip == primary_ip or
5301           existing_node.primary_ip == secondary_ip or
5302           existing_node.secondary_ip == secondary_ip):
5303         raise errors.OpPrereqError("New node ip address(es) conflict with"
5304                                    " existing node %s" % existing_node.name,
5305                                    errors.ECODE_NOTUNIQUE)
5306
5307     # After this 'if' block, None is no longer a valid value for the
5308     # _capable op attributes
5309     if self.op.readd:
5310       old_node = self.cfg.GetNodeInfo(node)
5311       assert old_node is not None, "Can't retrieve locked node %s" % node
5312       for attr in self._NFLAGS:
5313         if getattr(self.op, attr) is None:
5314           setattr(self.op, attr, getattr(old_node, attr))
5315     else:
5316       for attr in self._NFLAGS:
5317         if getattr(self.op, attr) is None:
5318           setattr(self.op, attr, True)
5319
5320     if self.op.readd and not self.op.vm_capable:
5321       pri, sec = cfg.GetNodeInstances(node)
5322       if pri or sec:
5323         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5324                                    " flag set to false, but it already holds"
5325                                    " instances" % node,
5326                                    errors.ECODE_STATE)
5327
5328     # check that the type of the node (single versus dual homed) is the
5329     # same as for the master
5330     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5331     master_singlehomed = myself.secondary_ip == myself.primary_ip
5332     newbie_singlehomed = secondary_ip == primary_ip
5333     if master_singlehomed != newbie_singlehomed:
5334       if master_singlehomed:
5335         raise errors.OpPrereqError("The master has no secondary ip but the"
5336                                    " new node has one",
5337                                    errors.ECODE_INVAL)
5338       else:
5339         raise errors.OpPrereqError("The master has a secondary ip but the"
5340                                    " new node doesn't have one",
5341                                    errors.ECODE_INVAL)
5342
5343     # checks reachability
5344     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5345       raise errors.OpPrereqError("Node not reachable by ping",
5346                                  errors.ECODE_ENVIRON)
5347
5348     if not newbie_singlehomed:
5349       # check reachability from my secondary ip to newbie's secondary ip
5350       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5351                            source=myself.secondary_ip):
5352         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5353                                    " based ping to node daemon port",
5354                                    errors.ECODE_ENVIRON)
5355
5356     if self.op.readd:
5357       exceptions = [node]
5358     else:
5359       exceptions = []
5360
5361     if self.op.master_capable:
5362       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5363     else:
5364       self.master_candidate = False
5365
5366     if self.op.readd:
5367       self.new_node = old_node
5368     else:
5369       node_group = cfg.LookupNodeGroup(self.op.group)
5370       self.new_node = objects.Node(name=node,
5371                                    primary_ip=primary_ip,
5372                                    secondary_ip=secondary_ip,
5373                                    master_candidate=self.master_candidate,
5374                                    offline=False, drained=False,
5375                                    group=node_group)
5376
5377     if self.op.ndparams:
5378       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5379
5380   def Exec(self, feedback_fn):
5381     """Adds the new node to the cluster.
5382
5383     """
5384     new_node = self.new_node
5385     node = new_node.name
5386
5387     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5388       "Not owning BGL"
5389
5390     # We adding a new node so we assume it's powered
5391     new_node.powered = True
5392
5393     # for re-adds, reset the offline/drained/master-candidate flags;
5394     # we need to reset here, otherwise offline would prevent RPC calls
5395     # later in the procedure; this also means that if the re-add
5396     # fails, we are left with a non-offlined, broken node
5397     if self.op.readd:
5398       new_node.drained = new_node.offline = False # pylint: disable=W0201
5399       self.LogInfo("Readding a node, the offline/drained flags were reset")
5400       # if we demote the node, we do cleanup later in the procedure
5401       new_node.master_candidate = self.master_candidate
5402       if self.changed_primary_ip:
5403         new_node.primary_ip = self.op.primary_ip
5404
5405     # copy the master/vm_capable flags
5406     for attr in self._NFLAGS:
5407       setattr(new_node, attr, getattr(self.op, attr))
5408
5409     # notify the user about any possible mc promotion
5410     if new_node.master_candidate:
5411       self.LogInfo("Node will be a master candidate")
5412
5413     if self.op.ndparams:
5414       new_node.ndparams = self.op.ndparams
5415     else:
5416       new_node.ndparams = {}
5417
5418     # check connectivity
5419     result = self.rpc.call_version([node])[node]
5420     result.Raise("Can't get version information from node %s" % node)
5421     if constants.PROTOCOL_VERSION == result.payload:
5422       logging.info("Communication to node %s fine, sw version %s match",
5423                    node, result.payload)
5424     else:
5425       raise errors.OpExecError("Version mismatch master version %s,"
5426                                " node version %s" %
5427                                (constants.PROTOCOL_VERSION, result.payload))
5428
5429     # Add node to our /etc/hosts, and add key to known_hosts
5430     if self.cfg.GetClusterInfo().modify_etc_hosts:
5431       master_node = self.cfg.GetMasterNode()
5432       result = self.rpc.call_etc_hosts_modify(master_node,
5433                                               constants.ETC_HOSTS_ADD,
5434                                               self.hostname.name,
5435                                               self.hostname.ip)
5436       result.Raise("Can't update hosts file with new host data")
5437
5438     if new_node.secondary_ip != new_node.primary_ip:
5439       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5440                                False)
5441
5442     node_verify_list = [self.cfg.GetMasterNode()]
5443     node_verify_param = {
5444       constants.NV_NODELIST: ([node], {}),
5445       # TODO: do a node-net-test as well?
5446     }
5447
5448     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5449                                        self.cfg.GetClusterName())
5450     for verifier in node_verify_list:
5451       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5452       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5453       if nl_payload:
5454         for failed in nl_payload:
5455           feedback_fn("ssh/hostname verification failed"
5456                       " (checking from %s): %s" %
5457                       (verifier, nl_payload[failed]))
5458         raise errors.OpExecError("ssh/hostname verification failed")
5459
5460     if self.op.readd:
5461       _RedistributeAncillaryFiles(self)
5462       self.context.ReaddNode(new_node)
5463       # make sure we redistribute the config
5464       self.cfg.Update(new_node, feedback_fn)
5465       # and make sure the new node will not have old files around
5466       if not new_node.master_candidate:
5467         result = self.rpc.call_node_demote_from_mc(new_node.name)
5468         msg = result.fail_msg
5469         if msg:
5470           self.LogWarning("Node failed to demote itself from master"
5471                           " candidate status: %s" % msg)
5472     else:
5473       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5474                                   additional_vm=self.op.vm_capable)
5475       self.context.AddNode(new_node, self.proc.GetECId())
5476
5477
5478 class LUNodeSetParams(LogicalUnit):
5479   """Modifies the parameters of a node.
5480
5481   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5482       to the node role (as _ROLE_*)
5483   @cvar _R2F: a dictionary from node role to tuples of flags
5484   @cvar _FLAGS: a list of attribute names corresponding to the flags
5485
5486   """
5487   HPATH = "node-modify"
5488   HTYPE = constants.HTYPE_NODE
5489   REQ_BGL = False
5490   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5491   _F2R = {
5492     (True, False, False): _ROLE_CANDIDATE,
5493     (False, True, False): _ROLE_DRAINED,
5494     (False, False, True): _ROLE_OFFLINE,
5495     (False, False, False): _ROLE_REGULAR,
5496     }
5497   _R2F = dict((v, k) for k, v in _F2R.items())
5498   _FLAGS = ["master_candidate", "drained", "offline"]
5499
5500   def CheckArguments(self):
5501     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5502     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5503                 self.op.master_capable, self.op.vm_capable,
5504                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5505                 self.op.disk_state]
5506     if all_mods.count(None) == len(all_mods):
5507       raise errors.OpPrereqError("Please pass at least one modification",
5508                                  errors.ECODE_INVAL)
5509     if all_mods.count(True) > 1:
5510       raise errors.OpPrereqError("Can't set the node into more than one"
5511                                  " state at the same time",
5512                                  errors.ECODE_INVAL)
5513
5514     # Boolean value that tells us whether we might be demoting from MC
5515     self.might_demote = (self.op.master_candidate == False or
5516                          self.op.offline == True or
5517                          self.op.drained == True or
5518                          self.op.master_capable == False)
5519
5520     if self.op.secondary_ip:
5521       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5522         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5523                                    " address" % self.op.secondary_ip,
5524                                    errors.ECODE_INVAL)
5525
5526     self.lock_all = self.op.auto_promote and self.might_demote
5527     self.lock_instances = self.op.secondary_ip is not None
5528
5529   def _InstanceFilter(self, instance):
5530     """Filter for getting affected instances.
5531
5532     """
5533     return (instance.disk_template in constants.DTS_INT_MIRROR and
5534             self.op.node_name in instance.all_nodes)
5535
5536   def ExpandNames(self):
5537     if self.lock_all:
5538       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5539     else:
5540       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5541
5542     # Since modifying a node can have severe effects on currently running
5543     # operations the resource lock is at least acquired in shared mode
5544     self.needed_locks[locking.LEVEL_NODE_RES] = \
5545       self.needed_locks[locking.LEVEL_NODE]
5546
5547     # Get node resource and instance locks in shared mode; they are not used
5548     # for anything but read-only access
5549     self.share_locks[locking.LEVEL_NODE_RES] = 1
5550     self.share_locks[locking.LEVEL_INSTANCE] = 1
5551
5552     if self.lock_instances:
5553       self.needed_locks[locking.LEVEL_INSTANCE] = \
5554         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5555
5556   def BuildHooksEnv(self):
5557     """Build hooks env.
5558
5559     This runs on the master node.
5560
5561     """
5562     return {
5563       "OP_TARGET": self.op.node_name,
5564       "MASTER_CANDIDATE": str(self.op.master_candidate),
5565       "OFFLINE": str(self.op.offline),
5566       "DRAINED": str(self.op.drained),
5567       "MASTER_CAPABLE": str(self.op.master_capable),
5568       "VM_CAPABLE": str(self.op.vm_capable),
5569       }
5570
5571   def BuildHooksNodes(self):
5572     """Build hooks nodes.
5573
5574     """
5575     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5576     return (nl, nl)
5577
5578   def CheckPrereq(self):
5579     """Check prerequisites.
5580
5581     This only checks the instance list against the existing names.
5582
5583     """
5584     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5585
5586     if self.lock_instances:
5587       affected_instances = \
5588         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5589
5590       # Verify instance locks
5591       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5592       wanted_instances = frozenset(affected_instances.keys())
5593       if wanted_instances - owned_instances:
5594         raise errors.OpPrereqError("Instances affected by changing node %s's"
5595                                    " secondary IP address have changed since"
5596                                    " locks were acquired, wanted '%s', have"
5597                                    " '%s'; retry the operation" %
5598                                    (self.op.node_name,
5599                                     utils.CommaJoin(wanted_instances),
5600                                     utils.CommaJoin(owned_instances)),
5601                                    errors.ECODE_STATE)
5602     else:
5603       affected_instances = None
5604
5605     if (self.op.master_candidate is not None or
5606         self.op.drained is not None or
5607         self.op.offline is not None):
5608       # we can't change the master's node flags
5609       if self.op.node_name == self.cfg.GetMasterNode():
5610         raise errors.OpPrereqError("The master role can be changed"
5611                                    " only via master-failover",
5612                                    errors.ECODE_INVAL)
5613
5614     if self.op.master_candidate and not node.master_capable:
5615       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5616                                  " it a master candidate" % node.name,
5617                                  errors.ECODE_STATE)
5618
5619     if self.op.vm_capable == False:
5620       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5621       if ipri or isec:
5622         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5623                                    " the vm_capable flag" % node.name,
5624                                    errors.ECODE_STATE)
5625
5626     if node.master_candidate and self.might_demote and not self.lock_all:
5627       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5628       # check if after removing the current node, we're missing master
5629       # candidates
5630       (mc_remaining, mc_should, _) = \
5631           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5632       if mc_remaining < mc_should:
5633         raise errors.OpPrereqError("Not enough master candidates, please"
5634                                    " pass auto promote option to allow"
5635                                    " promotion", errors.ECODE_STATE)
5636
5637     self.old_flags = old_flags = (node.master_candidate,
5638                                   node.drained, node.offline)
5639     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5640     self.old_role = old_role = self._F2R[old_flags]
5641
5642     # Check for ineffective changes
5643     for attr in self._FLAGS:
5644       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5645         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5646         setattr(self.op, attr, None)
5647
5648     # Past this point, any flag change to False means a transition
5649     # away from the respective state, as only real changes are kept
5650
5651     # TODO: We might query the real power state if it supports OOB
5652     if _SupportsOob(self.cfg, node):
5653       if self.op.offline is False and not (node.powered or
5654                                            self.op.powered == True):
5655         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5656                                     " offline status can be reset") %
5657                                    self.op.node_name)
5658     elif self.op.powered is not None:
5659       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5660                                   " as it does not support out-of-band"
5661                                   " handling") % self.op.node_name)
5662
5663     # If we're being deofflined/drained, we'll MC ourself if needed
5664     if (self.op.drained == False or self.op.offline == False or
5665         (self.op.master_capable and not node.master_capable)):
5666       if _DecideSelfPromotion(self):
5667         self.op.master_candidate = True
5668         self.LogInfo("Auto-promoting node to master candidate")
5669
5670     # If we're no longer master capable, we'll demote ourselves from MC
5671     if self.op.master_capable == False and node.master_candidate:
5672       self.LogInfo("Demoting from master candidate")
5673       self.op.master_candidate = False
5674
5675     # Compute new role
5676     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5677     if self.op.master_candidate:
5678       new_role = self._ROLE_CANDIDATE
5679     elif self.op.drained:
5680       new_role = self._ROLE_DRAINED
5681     elif self.op.offline:
5682       new_role = self._ROLE_OFFLINE
5683     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5684       # False is still in new flags, which means we're un-setting (the
5685       # only) True flag
5686       new_role = self._ROLE_REGULAR
5687     else: # no new flags, nothing, keep old role
5688       new_role = old_role
5689
5690     self.new_role = new_role
5691
5692     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5693       # Trying to transition out of offline status
5694       # TODO: Use standard RPC runner, but make sure it works when the node is
5695       # still marked offline
5696       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5697       if result.fail_msg:
5698         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5699                                    " to report its version: %s" %
5700                                    (node.name, result.fail_msg),
5701                                    errors.ECODE_STATE)
5702       else:
5703         self.LogWarning("Transitioning node from offline to online state"
5704                         " without using re-add. Please make sure the node"
5705                         " is healthy!")
5706
5707     if self.op.secondary_ip:
5708       # Ok even without locking, because this can't be changed by any LU
5709       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5710       master_singlehomed = master.secondary_ip == master.primary_ip
5711       if master_singlehomed and self.op.secondary_ip:
5712         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5713                                    " homed cluster", errors.ECODE_INVAL)
5714
5715       assert not (frozenset(affected_instances) -
5716                   self.owned_locks(locking.LEVEL_INSTANCE))
5717
5718       if node.offline:
5719         if affected_instances:
5720           raise errors.OpPrereqError("Cannot change secondary IP address:"
5721                                      " offline node has instances (%s)"
5722                                      " configured to use it" %
5723                                      utils.CommaJoin(affected_instances.keys()))
5724       else:
5725         # On online nodes, check that no instances are running, and that
5726         # the node has the new ip and we can reach it.
5727         for instance in affected_instances.values():
5728           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5729                               msg="cannot change secondary ip")
5730
5731         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5732         if master.name != node.name:
5733           # check reachability from master secondary ip to new secondary ip
5734           if not netutils.TcpPing(self.op.secondary_ip,
5735                                   constants.DEFAULT_NODED_PORT,
5736                                   source=master.secondary_ip):
5737             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5738                                        " based ping to node daemon port",
5739                                        errors.ECODE_ENVIRON)
5740
5741     if self.op.ndparams:
5742       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5743       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5744       self.new_ndparams = new_ndparams
5745
5746     if self.op.hv_state:
5747       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5748                                                  self.node.hv_state_static)
5749
5750     if self.op.disk_state:
5751       self.new_disk_state = \
5752         _MergeAndVerifyDiskState(self.op.disk_state,
5753                                  self.node.disk_state_static)
5754
5755   def Exec(self, feedback_fn):
5756     """Modifies a node.
5757
5758     """
5759     node = self.node
5760     old_role = self.old_role
5761     new_role = self.new_role
5762
5763     result = []
5764
5765     if self.op.ndparams:
5766       node.ndparams = self.new_ndparams
5767
5768     if self.op.powered is not None:
5769       node.powered = self.op.powered
5770
5771     if self.op.hv_state:
5772       node.hv_state_static = self.new_hv_state
5773
5774     if self.op.disk_state:
5775       node.disk_state_static = self.new_disk_state
5776
5777     for attr in ["master_capable", "vm_capable"]:
5778       val = getattr(self.op, attr)
5779       if val is not None:
5780         setattr(node, attr, val)
5781         result.append((attr, str(val)))
5782
5783     if new_role != old_role:
5784       # Tell the node to demote itself, if no longer MC and not offline
5785       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5786         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5787         if msg:
5788           self.LogWarning("Node failed to demote itself: %s", msg)
5789
5790       new_flags = self._R2F[new_role]
5791       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5792         if of != nf:
5793           result.append((desc, str(nf)))
5794       (node.master_candidate, node.drained, node.offline) = new_flags
5795
5796       # we locked all nodes, we adjust the CP before updating this node
5797       if self.lock_all:
5798         _AdjustCandidatePool(self, [node.name])
5799
5800     if self.op.secondary_ip:
5801       node.secondary_ip = self.op.secondary_ip
5802       result.append(("secondary_ip", self.op.secondary_ip))
5803
5804     # this will trigger configuration file update, if needed
5805     self.cfg.Update(node, feedback_fn)
5806
5807     # this will trigger job queue propagation or cleanup if the mc
5808     # flag changed
5809     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5810       self.context.ReaddNode(node)
5811
5812     return result
5813
5814
5815 class LUNodePowercycle(NoHooksLU):
5816   """Powercycles a node.
5817
5818   """
5819   REQ_BGL = False
5820
5821   def CheckArguments(self):
5822     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5823     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5824       raise errors.OpPrereqError("The node is the master and the force"
5825                                  " parameter was not set",
5826                                  errors.ECODE_INVAL)
5827
5828   def ExpandNames(self):
5829     """Locking for PowercycleNode.
5830
5831     This is a last-resort option and shouldn't block on other
5832     jobs. Therefore, we grab no locks.
5833
5834     """
5835     self.needed_locks = {}
5836
5837   def Exec(self, feedback_fn):
5838     """Reboots a node.
5839
5840     """
5841     result = self.rpc.call_node_powercycle(self.op.node_name,
5842                                            self.cfg.GetHypervisorType())
5843     result.Raise("Failed to schedule the reboot")
5844     return result.payload
5845
5846
5847 class LUClusterQuery(NoHooksLU):
5848   """Query cluster configuration.
5849
5850   """
5851   REQ_BGL = False
5852
5853   def ExpandNames(self):
5854     self.needed_locks = {}
5855
5856   def Exec(self, feedback_fn):
5857     """Return cluster config.
5858
5859     """
5860     cluster = self.cfg.GetClusterInfo()
5861     os_hvp = {}
5862
5863     # Filter just for enabled hypervisors
5864     for os_name, hv_dict in cluster.os_hvp.items():
5865       os_hvp[os_name] = {}
5866       for hv_name, hv_params in hv_dict.items():
5867         if hv_name in cluster.enabled_hypervisors:
5868           os_hvp[os_name][hv_name] = hv_params
5869
5870     # Convert ip_family to ip_version
5871     primary_ip_version = constants.IP4_VERSION
5872     if cluster.primary_ip_family == netutils.IP6Address.family:
5873       primary_ip_version = constants.IP6_VERSION
5874
5875     result = {
5876       "software_version": constants.RELEASE_VERSION,
5877       "protocol_version": constants.PROTOCOL_VERSION,
5878       "config_version": constants.CONFIG_VERSION,
5879       "os_api_version": max(constants.OS_API_VERSIONS),
5880       "export_version": constants.EXPORT_VERSION,
5881       "architecture": (platform.architecture()[0], platform.machine()),
5882       "name": cluster.cluster_name,
5883       "master": cluster.master_node,
5884       "default_hypervisor": cluster.primary_hypervisor,
5885       "enabled_hypervisors": cluster.enabled_hypervisors,
5886       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5887                         for hypervisor_name in cluster.enabled_hypervisors]),
5888       "os_hvp": os_hvp,
5889       "beparams": cluster.beparams,
5890       "osparams": cluster.osparams,
5891       "ipolicy": cluster.ipolicy,
5892       "nicparams": cluster.nicparams,
5893       "ndparams": cluster.ndparams,
5894       "candidate_pool_size": cluster.candidate_pool_size,
5895       "master_netdev": cluster.master_netdev,
5896       "master_netmask": cluster.master_netmask,
5897       "use_external_mip_script": cluster.use_external_mip_script,
5898       "volume_group_name": cluster.volume_group_name,
5899       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5900       "file_storage_dir": cluster.file_storage_dir,
5901       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5902       "maintain_node_health": cluster.maintain_node_health,
5903       "ctime": cluster.ctime,
5904       "mtime": cluster.mtime,
5905       "uuid": cluster.uuid,
5906       "tags": list(cluster.GetTags()),
5907       "uid_pool": cluster.uid_pool,
5908       "default_iallocator": cluster.default_iallocator,
5909       "reserved_lvs": cluster.reserved_lvs,
5910       "primary_ip_version": primary_ip_version,
5911       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5912       "hidden_os": cluster.hidden_os,
5913       "blacklisted_os": cluster.blacklisted_os,
5914       }
5915
5916     return result
5917
5918
5919 class LUClusterConfigQuery(NoHooksLU):
5920   """Return configuration values.
5921
5922   """
5923   REQ_BGL = False
5924   _FIELDS_DYNAMIC = utils.FieldSet()
5925   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5926                                   "watcher_pause", "volume_group_name")
5927
5928   def CheckArguments(self):
5929     _CheckOutputFields(static=self._FIELDS_STATIC,
5930                        dynamic=self._FIELDS_DYNAMIC,
5931                        selected=self.op.output_fields)
5932
5933   def ExpandNames(self):
5934     self.needed_locks = {}
5935
5936   def Exec(self, feedback_fn):
5937     """Dump a representation of the cluster config to the standard output.
5938
5939     """
5940     values = []
5941     for field in self.op.output_fields:
5942       if field == "cluster_name":
5943         entry = self.cfg.GetClusterName()
5944       elif field == "master_node":
5945         entry = self.cfg.GetMasterNode()
5946       elif field == "drain_flag":
5947         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5948       elif field == "watcher_pause":
5949         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5950       elif field == "volume_group_name":
5951         entry = self.cfg.GetVGName()
5952       else:
5953         raise errors.ParameterError(field)
5954       values.append(entry)
5955     return values
5956
5957
5958 class LUInstanceActivateDisks(NoHooksLU):
5959   """Bring up an instance's disks.
5960
5961   """
5962   REQ_BGL = False
5963
5964   def ExpandNames(self):
5965     self._ExpandAndLockInstance()
5966     self.needed_locks[locking.LEVEL_NODE] = []
5967     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5968
5969   def DeclareLocks(self, level):
5970     if level == locking.LEVEL_NODE:
5971       self._LockInstancesNodes()
5972
5973   def CheckPrereq(self):
5974     """Check prerequisites.
5975
5976     This checks that the instance is in the cluster.
5977
5978     """
5979     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5980     assert self.instance is not None, \
5981       "Cannot retrieve locked instance %s" % self.op.instance_name
5982     _CheckNodeOnline(self, self.instance.primary_node)
5983
5984   def Exec(self, feedback_fn):
5985     """Activate the disks.
5986
5987     """
5988     disks_ok, disks_info = \
5989               _AssembleInstanceDisks(self, self.instance,
5990                                      ignore_size=self.op.ignore_size)
5991     if not disks_ok:
5992       raise errors.OpExecError("Cannot activate block devices")
5993
5994     return disks_info
5995
5996
5997 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5998                            ignore_size=False):
5999   """Prepare the block devices for an instance.
6000
6001   This sets up the block devices on all nodes.
6002
6003   @type lu: L{LogicalUnit}
6004   @param lu: the logical unit on whose behalf we execute
6005   @type instance: L{objects.Instance}
6006   @param instance: the instance for whose disks we assemble
6007   @type disks: list of L{objects.Disk} or None
6008   @param disks: which disks to assemble (or all, if None)
6009   @type ignore_secondaries: boolean
6010   @param ignore_secondaries: if true, errors on secondary nodes
6011       won't result in an error return from the function
6012   @type ignore_size: boolean
6013   @param ignore_size: if true, the current known size of the disk
6014       will not be used during the disk activation, useful for cases
6015       when the size is wrong
6016   @return: False if the operation failed, otherwise a list of
6017       (host, instance_visible_name, node_visible_name)
6018       with the mapping from node devices to instance devices
6019
6020   """
6021   device_info = []
6022   disks_ok = True
6023   iname = instance.name
6024   disks = _ExpandCheckDisks(instance, disks)
6025
6026   # With the two passes mechanism we try to reduce the window of
6027   # opportunity for the race condition of switching DRBD to primary
6028   # before handshaking occured, but we do not eliminate it
6029
6030   # The proper fix would be to wait (with some limits) until the
6031   # connection has been made and drbd transitions from WFConnection
6032   # into any other network-connected state (Connected, SyncTarget,
6033   # SyncSource, etc.)
6034
6035   # 1st pass, assemble on all nodes in secondary mode
6036   for idx, inst_disk in enumerate(disks):
6037     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6038       if ignore_size:
6039         node_disk = node_disk.Copy()
6040         node_disk.UnsetSize()
6041       lu.cfg.SetDiskID(node_disk, node)
6042       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6043       msg = result.fail_msg
6044       if msg:
6045         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6046                            " (is_primary=False, pass=1): %s",
6047                            inst_disk.iv_name, node, msg)
6048         if not ignore_secondaries:
6049           disks_ok = False
6050
6051   # FIXME: race condition on drbd migration to primary
6052
6053   # 2nd pass, do only the primary node
6054   for idx, inst_disk in enumerate(disks):
6055     dev_path = None
6056
6057     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6058       if node != instance.primary_node:
6059         continue
6060       if ignore_size:
6061         node_disk = node_disk.Copy()
6062         node_disk.UnsetSize()
6063       lu.cfg.SetDiskID(node_disk, node)
6064       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6065       msg = result.fail_msg
6066       if msg:
6067         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6068                            " (is_primary=True, pass=2): %s",
6069                            inst_disk.iv_name, node, msg)
6070         disks_ok = False
6071       else:
6072         dev_path = result.payload
6073
6074     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6075
6076   # leave the disks configured for the primary node
6077   # this is a workaround that would be fixed better by
6078   # improving the logical/physical id handling
6079   for disk in disks:
6080     lu.cfg.SetDiskID(disk, instance.primary_node)
6081
6082   return disks_ok, device_info
6083
6084
6085 def _StartInstanceDisks(lu, instance, force):
6086   """Start the disks of an instance.
6087
6088   """
6089   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6090                                            ignore_secondaries=force)
6091   if not disks_ok:
6092     _ShutdownInstanceDisks(lu, instance)
6093     if force is not None and not force:
6094       lu.proc.LogWarning("", hint="If the message above refers to a"
6095                          " secondary node,"
6096                          " you can retry the operation using '--force'.")
6097     raise errors.OpExecError("Disk consistency error")
6098
6099
6100 class LUInstanceDeactivateDisks(NoHooksLU):
6101   """Shutdown an instance's disks.
6102
6103   """
6104   REQ_BGL = False
6105
6106   def ExpandNames(self):
6107     self._ExpandAndLockInstance()
6108     self.needed_locks[locking.LEVEL_NODE] = []
6109     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6110
6111   def DeclareLocks(self, level):
6112     if level == locking.LEVEL_NODE:
6113       self._LockInstancesNodes()
6114
6115   def CheckPrereq(self):
6116     """Check prerequisites.
6117
6118     This checks that the instance is in the cluster.
6119
6120     """
6121     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6122     assert self.instance is not None, \
6123       "Cannot retrieve locked instance %s" % self.op.instance_name
6124
6125   def Exec(self, feedback_fn):
6126     """Deactivate the disks
6127
6128     """
6129     instance = self.instance
6130     if self.op.force:
6131       _ShutdownInstanceDisks(self, instance)
6132     else:
6133       _SafeShutdownInstanceDisks(self, instance)
6134
6135
6136 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6137   """Shutdown block devices of an instance.
6138
6139   This function checks if an instance is running, before calling
6140   _ShutdownInstanceDisks.
6141
6142   """
6143   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6144   _ShutdownInstanceDisks(lu, instance, disks=disks)
6145
6146
6147 def _ExpandCheckDisks(instance, disks):
6148   """Return the instance disks selected by the disks list
6149
6150   @type disks: list of L{objects.Disk} or None
6151   @param disks: selected disks
6152   @rtype: list of L{objects.Disk}
6153   @return: selected instance disks to act on
6154
6155   """
6156   if disks is None:
6157     return instance.disks
6158   else:
6159     if not set(disks).issubset(instance.disks):
6160       raise errors.ProgrammerError("Can only act on disks belonging to the"
6161                                    " target instance")
6162     return disks
6163
6164
6165 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6166   """Shutdown block devices of an instance.
6167
6168   This does the shutdown on all nodes of the instance.
6169
6170   If the ignore_primary is false, errors on the primary node are
6171   ignored.
6172
6173   """
6174   all_result = True
6175   disks = _ExpandCheckDisks(instance, disks)
6176
6177   for disk in disks:
6178     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6179       lu.cfg.SetDiskID(top_disk, node)
6180       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6181       msg = result.fail_msg
6182       if msg:
6183         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6184                       disk.iv_name, node, msg)
6185         if ((node == instance.primary_node and not ignore_primary) or
6186             (node != instance.primary_node and not result.offline)):
6187           all_result = False
6188   return all_result
6189
6190
6191 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6192   """Checks if a node has enough free memory.
6193
6194   This function check if a given node has the needed amount of free
6195   memory. In case the node has less memory or we cannot get the
6196   information from the node, this function raise an OpPrereqError
6197   exception.
6198
6199   @type lu: C{LogicalUnit}
6200   @param lu: a logical unit from which we get configuration data
6201   @type node: C{str}
6202   @param node: the node to check
6203   @type reason: C{str}
6204   @param reason: string to use in the error message
6205   @type requested: C{int}
6206   @param requested: the amount of memory in MiB to check for
6207   @type hypervisor_name: C{str}
6208   @param hypervisor_name: the hypervisor to ask for memory stats
6209   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6210       we cannot check the node
6211
6212   """
6213   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6214   nodeinfo[node].Raise("Can't get data from node %s" % node,
6215                        prereq=True, ecode=errors.ECODE_ENVIRON)
6216   (_, _, (hv_info, )) = nodeinfo[node].payload
6217
6218   free_mem = hv_info.get("memory_free", None)
6219   if not isinstance(free_mem, int):
6220     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6221                                " was '%s'" % (node, free_mem),
6222                                errors.ECODE_ENVIRON)
6223   if requested > free_mem:
6224     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6225                                " needed %s MiB, available %s MiB" %
6226                                (node, reason, requested, free_mem),
6227                                errors.ECODE_NORES)
6228
6229
6230 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6231   """Checks if nodes have enough free disk space in the all VGs.
6232
6233   This function check if all given nodes have the needed amount of
6234   free disk. In case any node has less disk or we cannot get the
6235   information from the node, this function raise an OpPrereqError
6236   exception.
6237
6238   @type lu: C{LogicalUnit}
6239   @param lu: a logical unit from which we get configuration data
6240   @type nodenames: C{list}
6241   @param nodenames: the list of node names to check
6242   @type req_sizes: C{dict}
6243   @param req_sizes: the hash of vg and corresponding amount of disk in
6244       MiB to check for
6245   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6246       or we cannot check the node
6247
6248   """
6249   for vg, req_size in req_sizes.items():
6250     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6251
6252
6253 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6254   """Checks if nodes have enough free disk space in the specified VG.
6255
6256   This function check if all given nodes have the needed amount of
6257   free disk. In case any node has less disk or we cannot get the
6258   information from the node, this function raise an OpPrereqError
6259   exception.
6260
6261   @type lu: C{LogicalUnit}
6262   @param lu: a logical unit from which we get configuration data
6263   @type nodenames: C{list}
6264   @param nodenames: the list of node names to check
6265   @type vg: C{str}
6266   @param vg: the volume group to check
6267   @type requested: C{int}
6268   @param requested: the amount of disk in MiB to check for
6269   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6270       or we cannot check the node
6271
6272   """
6273   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6274   for node in nodenames:
6275     info = nodeinfo[node]
6276     info.Raise("Cannot get current information from node %s" % node,
6277                prereq=True, ecode=errors.ECODE_ENVIRON)
6278     (_, (vg_info, ), _) = info.payload
6279     vg_free = vg_info.get("vg_free", None)
6280     if not isinstance(vg_free, int):
6281       raise errors.OpPrereqError("Can't compute free disk space on node"
6282                                  " %s for vg %s, result was '%s'" %
6283                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6284     if requested > vg_free:
6285       raise errors.OpPrereqError("Not enough disk space on target node %s"
6286                                  " vg %s: required %d MiB, available %d MiB" %
6287                                  (node, vg, requested, vg_free),
6288                                  errors.ECODE_NORES)
6289
6290
6291 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6292   """Checks if nodes have enough physical CPUs
6293
6294   This function checks if all given nodes have the needed number of
6295   physical CPUs. In case any node has less CPUs or we cannot get the
6296   information from the node, this function raises an OpPrereqError
6297   exception.
6298
6299   @type lu: C{LogicalUnit}
6300   @param lu: a logical unit from which we get configuration data
6301   @type nodenames: C{list}
6302   @param nodenames: the list of node names to check
6303   @type requested: C{int}
6304   @param requested: the minimum acceptable number of physical CPUs
6305   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6306       or we cannot check the node
6307
6308   """
6309   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6310   for node in nodenames:
6311     info = nodeinfo[node]
6312     info.Raise("Cannot get current information from node %s" % node,
6313                prereq=True, ecode=errors.ECODE_ENVIRON)
6314     (_, _, (hv_info, )) = info.payload
6315     num_cpus = hv_info.get("cpu_total", None)
6316     if not isinstance(num_cpus, int):
6317       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6318                                  " on node %s, result was '%s'" %
6319                                  (node, num_cpus), errors.ECODE_ENVIRON)
6320     if requested > num_cpus:
6321       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6322                                  "required" % (node, num_cpus, requested),
6323                                  errors.ECODE_NORES)
6324
6325
6326 class LUInstanceStartup(LogicalUnit):
6327   """Starts an instance.
6328
6329   """
6330   HPATH = "instance-start"
6331   HTYPE = constants.HTYPE_INSTANCE
6332   REQ_BGL = False
6333
6334   def CheckArguments(self):
6335     # extra beparams
6336     if self.op.beparams:
6337       # fill the beparams dict
6338       objects.UpgradeBeParams(self.op.beparams)
6339       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6340
6341   def ExpandNames(self):
6342     self._ExpandAndLockInstance()
6343
6344   def BuildHooksEnv(self):
6345     """Build hooks env.
6346
6347     This runs on master, primary and secondary nodes of the instance.
6348
6349     """
6350     env = {
6351       "FORCE": self.op.force,
6352       }
6353
6354     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6355
6356     return env
6357
6358   def BuildHooksNodes(self):
6359     """Build hooks nodes.
6360
6361     """
6362     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6363     return (nl, nl)
6364
6365   def CheckPrereq(self):
6366     """Check prerequisites.
6367
6368     This checks that the instance is in the cluster.
6369
6370     """
6371     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6372     assert self.instance is not None, \
6373       "Cannot retrieve locked instance %s" % self.op.instance_name
6374
6375     # extra hvparams
6376     if self.op.hvparams:
6377       # check hypervisor parameter syntax (locally)
6378       cluster = self.cfg.GetClusterInfo()
6379       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6380       filled_hvp = cluster.FillHV(instance)
6381       filled_hvp.update(self.op.hvparams)
6382       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6383       hv_type.CheckParameterSyntax(filled_hvp)
6384       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6385
6386     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6387
6388     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6389
6390     if self.primary_offline and self.op.ignore_offline_nodes:
6391       self.proc.LogWarning("Ignoring offline primary node")
6392
6393       if self.op.hvparams or self.op.beparams:
6394         self.proc.LogWarning("Overridden parameters are ignored")
6395     else:
6396       _CheckNodeOnline(self, instance.primary_node)
6397
6398       bep = self.cfg.GetClusterInfo().FillBE(instance)
6399
6400       # check bridges existence
6401       _CheckInstanceBridgesExist(self, instance)
6402
6403       remote_info = self.rpc.call_instance_info(instance.primary_node,
6404                                                 instance.name,
6405                                                 instance.hypervisor)
6406       remote_info.Raise("Error checking node %s" % instance.primary_node,
6407                         prereq=True, ecode=errors.ECODE_ENVIRON)
6408       if not remote_info.payload: # not running already
6409         _CheckNodeFreeMemory(self, instance.primary_node,
6410                              "starting instance %s" % instance.name,
6411                              bep[constants.BE_MAXMEM], instance.hypervisor)
6412
6413   def Exec(self, feedback_fn):
6414     """Start the instance.
6415
6416     """
6417     instance = self.instance
6418     force = self.op.force
6419
6420     if not self.op.no_remember:
6421       self.cfg.MarkInstanceUp(instance.name)
6422
6423     if self.primary_offline:
6424       assert self.op.ignore_offline_nodes
6425       self.proc.LogInfo("Primary node offline, marked instance as started")
6426     else:
6427       node_current = instance.primary_node
6428
6429       _StartInstanceDisks(self, instance, force)
6430
6431       result = \
6432         self.rpc.call_instance_start(node_current,
6433                                      (instance, self.op.hvparams,
6434                                       self.op.beparams),
6435                                      self.op.startup_paused)
6436       msg = result.fail_msg
6437       if msg:
6438         _ShutdownInstanceDisks(self, instance)
6439         raise errors.OpExecError("Could not start instance: %s" % msg)
6440
6441
6442 class LUInstanceReboot(LogicalUnit):
6443   """Reboot an instance.
6444
6445   """
6446   HPATH = "instance-reboot"
6447   HTYPE = constants.HTYPE_INSTANCE
6448   REQ_BGL = False
6449
6450   def ExpandNames(self):
6451     self._ExpandAndLockInstance()
6452
6453   def BuildHooksEnv(self):
6454     """Build hooks env.
6455
6456     This runs on master, primary and secondary nodes of the instance.
6457
6458     """
6459     env = {
6460       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6461       "REBOOT_TYPE": self.op.reboot_type,
6462       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6463       }
6464
6465     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6466
6467     return env
6468
6469   def BuildHooksNodes(self):
6470     """Build hooks nodes.
6471
6472     """
6473     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6474     return (nl, nl)
6475
6476   def CheckPrereq(self):
6477     """Check prerequisites.
6478
6479     This checks that the instance is in the cluster.
6480
6481     """
6482     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6483     assert self.instance is not None, \
6484       "Cannot retrieve locked instance %s" % self.op.instance_name
6485     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6486     _CheckNodeOnline(self, instance.primary_node)
6487
6488     # check bridges existence
6489     _CheckInstanceBridgesExist(self, instance)
6490
6491   def Exec(self, feedback_fn):
6492     """Reboot the instance.
6493
6494     """
6495     instance = self.instance
6496     ignore_secondaries = self.op.ignore_secondaries
6497     reboot_type = self.op.reboot_type
6498
6499     remote_info = self.rpc.call_instance_info(instance.primary_node,
6500                                               instance.name,
6501                                               instance.hypervisor)
6502     remote_info.Raise("Error checking node %s" % instance.primary_node)
6503     instance_running = bool(remote_info.payload)
6504
6505     node_current = instance.primary_node
6506
6507     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6508                                             constants.INSTANCE_REBOOT_HARD]:
6509       for disk in instance.disks:
6510         self.cfg.SetDiskID(disk, node_current)
6511       result = self.rpc.call_instance_reboot(node_current, instance,
6512                                              reboot_type,
6513                                              self.op.shutdown_timeout)
6514       result.Raise("Could not reboot instance")
6515     else:
6516       if instance_running:
6517         result = self.rpc.call_instance_shutdown(node_current, instance,
6518                                                  self.op.shutdown_timeout)
6519         result.Raise("Could not shutdown instance for full reboot")
6520         _ShutdownInstanceDisks(self, instance)
6521       else:
6522         self.LogInfo("Instance %s was already stopped, starting now",
6523                      instance.name)
6524       _StartInstanceDisks(self, instance, ignore_secondaries)
6525       result = self.rpc.call_instance_start(node_current,
6526                                             (instance, None, None), False)
6527       msg = result.fail_msg
6528       if msg:
6529         _ShutdownInstanceDisks(self, instance)
6530         raise errors.OpExecError("Could not start instance for"
6531                                  " full reboot: %s" % msg)
6532
6533     self.cfg.MarkInstanceUp(instance.name)
6534
6535
6536 class LUInstanceShutdown(LogicalUnit):
6537   """Shutdown an instance.
6538
6539   """
6540   HPATH = "instance-stop"
6541   HTYPE = constants.HTYPE_INSTANCE
6542   REQ_BGL = False
6543
6544   def ExpandNames(self):
6545     self._ExpandAndLockInstance()
6546
6547   def BuildHooksEnv(self):
6548     """Build hooks env.
6549
6550     This runs on master, primary and secondary nodes of the instance.
6551
6552     """
6553     env = _BuildInstanceHookEnvByObject(self, self.instance)
6554     env["TIMEOUT"] = self.op.timeout
6555     return env
6556
6557   def BuildHooksNodes(self):
6558     """Build hooks nodes.
6559
6560     """
6561     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6562     return (nl, nl)
6563
6564   def CheckPrereq(self):
6565     """Check prerequisites.
6566
6567     This checks that the instance is in the cluster.
6568
6569     """
6570     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6571     assert self.instance is not None, \
6572       "Cannot retrieve locked instance %s" % self.op.instance_name
6573
6574     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6575
6576     self.primary_offline = \
6577       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6578
6579     if self.primary_offline and self.op.ignore_offline_nodes:
6580       self.proc.LogWarning("Ignoring offline primary node")
6581     else:
6582       _CheckNodeOnline(self, self.instance.primary_node)
6583
6584   def Exec(self, feedback_fn):
6585     """Shutdown the instance.
6586
6587     """
6588     instance = self.instance
6589     node_current = instance.primary_node
6590     timeout = self.op.timeout
6591
6592     if not self.op.no_remember:
6593       self.cfg.MarkInstanceDown(instance.name)
6594
6595     if self.primary_offline:
6596       assert self.op.ignore_offline_nodes
6597       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6598     else:
6599       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6600       msg = result.fail_msg
6601       if msg:
6602         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6603
6604       _ShutdownInstanceDisks(self, instance)
6605
6606
6607 class LUInstanceReinstall(LogicalUnit):
6608   """Reinstall an instance.
6609
6610   """
6611   HPATH = "instance-reinstall"
6612   HTYPE = constants.HTYPE_INSTANCE
6613   REQ_BGL = False
6614
6615   def ExpandNames(self):
6616     self._ExpandAndLockInstance()
6617
6618   def BuildHooksEnv(self):
6619     """Build hooks env.
6620
6621     This runs on master, primary and secondary nodes of the instance.
6622
6623     """
6624     return _BuildInstanceHookEnvByObject(self, self.instance)
6625
6626   def BuildHooksNodes(self):
6627     """Build hooks nodes.
6628
6629     """
6630     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6631     return (nl, nl)
6632
6633   def CheckPrereq(self):
6634     """Check prerequisites.
6635
6636     This checks that the instance is in the cluster and is not running.
6637
6638     """
6639     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6640     assert instance is not None, \
6641       "Cannot retrieve locked instance %s" % self.op.instance_name
6642     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6643                      " offline, cannot reinstall")
6644     for node in instance.secondary_nodes:
6645       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6646                        " cannot reinstall")
6647
6648     if instance.disk_template == constants.DT_DISKLESS:
6649       raise errors.OpPrereqError("Instance '%s' has no disks" %
6650                                  self.op.instance_name,
6651                                  errors.ECODE_INVAL)
6652     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6653
6654     if self.op.os_type is not None:
6655       # OS verification
6656       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6657       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6658       instance_os = self.op.os_type
6659     else:
6660       instance_os = instance.os
6661
6662     nodelist = list(instance.all_nodes)
6663
6664     if self.op.osparams:
6665       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6666       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6667       self.os_inst = i_osdict # the new dict (without defaults)
6668     else:
6669       self.os_inst = None
6670
6671     self.instance = instance
6672
6673   def Exec(self, feedback_fn):
6674     """Reinstall the instance.
6675
6676     """
6677     inst = self.instance
6678
6679     if self.op.os_type is not None:
6680       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6681       inst.os = self.op.os_type
6682       # Write to configuration
6683       self.cfg.Update(inst, feedback_fn)
6684
6685     _StartInstanceDisks(self, inst, None)
6686     try:
6687       feedback_fn("Running the instance OS create scripts...")
6688       # FIXME: pass debug option from opcode to backend
6689       result = self.rpc.call_instance_os_add(inst.primary_node,
6690                                              (inst, self.os_inst), True,
6691                                              self.op.debug_level)
6692       result.Raise("Could not install OS for instance %s on node %s" %
6693                    (inst.name, inst.primary_node))
6694     finally:
6695       _ShutdownInstanceDisks(self, inst)
6696
6697
6698 class LUInstanceRecreateDisks(LogicalUnit):
6699   """Recreate an instance's missing disks.
6700
6701   """
6702   HPATH = "instance-recreate-disks"
6703   HTYPE = constants.HTYPE_INSTANCE
6704   REQ_BGL = False
6705
6706   def CheckArguments(self):
6707     # normalise the disk list
6708     self.op.disks = sorted(frozenset(self.op.disks))
6709
6710   def ExpandNames(self):
6711     self._ExpandAndLockInstance()
6712     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6713     if self.op.nodes:
6714       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6715       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6716     else:
6717       self.needed_locks[locking.LEVEL_NODE] = []
6718
6719   def DeclareLocks(self, level):
6720     if level == locking.LEVEL_NODE:
6721       # if we replace the nodes, we only need to lock the old primary,
6722       # otherwise we need to lock all nodes for disk re-creation
6723       primary_only = bool(self.op.nodes)
6724       self._LockInstancesNodes(primary_only=primary_only)
6725     elif level == locking.LEVEL_NODE_RES:
6726       # Copy node locks
6727       self.needed_locks[locking.LEVEL_NODE_RES] = \
6728         self.needed_locks[locking.LEVEL_NODE][:]
6729
6730   def BuildHooksEnv(self):
6731     """Build hooks env.
6732
6733     This runs on master, primary and secondary nodes of the instance.
6734
6735     """
6736     return _BuildInstanceHookEnvByObject(self, self.instance)
6737
6738   def BuildHooksNodes(self):
6739     """Build hooks nodes.
6740
6741     """
6742     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6743     return (nl, nl)
6744
6745   def CheckPrereq(self):
6746     """Check prerequisites.
6747
6748     This checks that the instance is in the cluster and is not running.
6749
6750     """
6751     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6752     assert instance is not None, \
6753       "Cannot retrieve locked instance %s" % self.op.instance_name
6754     if self.op.nodes:
6755       if len(self.op.nodes) != len(instance.all_nodes):
6756         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6757                                    " %d replacement nodes were specified" %
6758                                    (instance.name, len(instance.all_nodes),
6759                                     len(self.op.nodes)),
6760                                    errors.ECODE_INVAL)
6761       assert instance.disk_template != constants.DT_DRBD8 or \
6762           len(self.op.nodes) == 2
6763       assert instance.disk_template != constants.DT_PLAIN or \
6764           len(self.op.nodes) == 1
6765       primary_node = self.op.nodes[0]
6766     else:
6767       primary_node = instance.primary_node
6768     _CheckNodeOnline(self, primary_node)
6769
6770     if instance.disk_template == constants.DT_DISKLESS:
6771       raise errors.OpPrereqError("Instance '%s' has no disks" %
6772                                  self.op.instance_name, errors.ECODE_INVAL)
6773     # if we replace nodes *and* the old primary is offline, we don't
6774     # check
6775     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6776     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6777     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6778     if not (self.op.nodes and old_pnode.offline):
6779       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6780                           msg="cannot recreate disks")
6781
6782     if not self.op.disks:
6783       self.op.disks = range(len(instance.disks))
6784     else:
6785       for idx in self.op.disks:
6786         if idx >= len(instance.disks):
6787           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6788                                      errors.ECODE_INVAL)
6789     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6790       raise errors.OpPrereqError("Can't recreate disks partially and"
6791                                  " change the nodes at the same time",
6792                                  errors.ECODE_INVAL)
6793     self.instance = instance
6794
6795   def Exec(self, feedback_fn):
6796     """Recreate the disks.
6797
6798     """
6799     instance = self.instance
6800
6801     assert (self.owned_locks(locking.LEVEL_NODE) ==
6802             self.owned_locks(locking.LEVEL_NODE_RES))
6803
6804     to_skip = []
6805     mods = [] # keeps track of needed logical_id changes
6806
6807     for idx, disk in enumerate(instance.disks):
6808       if idx not in self.op.disks: # disk idx has not been passed in
6809         to_skip.append(idx)
6810         continue
6811       # update secondaries for disks, if needed
6812       if self.op.nodes:
6813         if disk.dev_type == constants.LD_DRBD8:
6814           # need to update the nodes and minors
6815           assert len(self.op.nodes) == 2
6816           assert len(disk.logical_id) == 6 # otherwise disk internals
6817                                            # have changed
6818           (_, _, old_port, _, _, old_secret) = disk.logical_id
6819           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6820           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6821                     new_minors[0], new_minors[1], old_secret)
6822           assert len(disk.logical_id) == len(new_id)
6823           mods.append((idx, new_id))
6824
6825     # now that we have passed all asserts above, we can apply the mods
6826     # in a single run (to avoid partial changes)
6827     for idx, new_id in mods:
6828       instance.disks[idx].logical_id = new_id
6829
6830     # change primary node, if needed
6831     if self.op.nodes:
6832       instance.primary_node = self.op.nodes[0]
6833       self.LogWarning("Changing the instance's nodes, you will have to"
6834                       " remove any disks left on the older nodes manually")
6835
6836     if self.op.nodes:
6837       self.cfg.Update(instance, feedback_fn)
6838
6839     _CreateDisks(self, instance, to_skip=to_skip)
6840
6841
6842 class LUInstanceRename(LogicalUnit):
6843   """Rename an instance.
6844
6845   """
6846   HPATH = "instance-rename"
6847   HTYPE = constants.HTYPE_INSTANCE
6848
6849   def CheckArguments(self):
6850     """Check arguments.
6851
6852     """
6853     if self.op.ip_check and not self.op.name_check:
6854       # TODO: make the ip check more flexible and not depend on the name check
6855       raise errors.OpPrereqError("IP address check requires a name check",
6856                                  errors.ECODE_INVAL)
6857
6858   def BuildHooksEnv(self):
6859     """Build hooks env.
6860
6861     This runs on master, primary and secondary nodes of the instance.
6862
6863     """
6864     env = _BuildInstanceHookEnvByObject(self, self.instance)
6865     env["INSTANCE_NEW_NAME"] = self.op.new_name
6866     return env
6867
6868   def BuildHooksNodes(self):
6869     """Build hooks nodes.
6870
6871     """
6872     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6873     return (nl, nl)
6874
6875   def CheckPrereq(self):
6876     """Check prerequisites.
6877
6878     This checks that the instance is in the cluster and is not running.
6879
6880     """
6881     self.op.instance_name = _ExpandInstanceName(self.cfg,
6882                                                 self.op.instance_name)
6883     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6884     assert instance is not None
6885     _CheckNodeOnline(self, instance.primary_node)
6886     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6887                         msg="cannot rename")
6888     self.instance = instance
6889
6890     new_name = self.op.new_name
6891     if self.op.name_check:
6892       hostname = netutils.GetHostname(name=new_name)
6893       if hostname.name != new_name:
6894         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6895                      hostname.name)
6896       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6897         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6898                                     " same as given hostname '%s'") %
6899                                     (hostname.name, self.op.new_name),
6900                                     errors.ECODE_INVAL)
6901       new_name = self.op.new_name = hostname.name
6902       if (self.op.ip_check and
6903           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6904         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6905                                    (hostname.ip, new_name),
6906                                    errors.ECODE_NOTUNIQUE)
6907
6908     instance_list = self.cfg.GetInstanceList()
6909     if new_name in instance_list and new_name != instance.name:
6910       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6911                                  new_name, errors.ECODE_EXISTS)
6912
6913   def Exec(self, feedback_fn):
6914     """Rename the instance.
6915
6916     """
6917     inst = self.instance
6918     old_name = inst.name
6919
6920     rename_file_storage = False
6921     if (inst.disk_template in constants.DTS_FILEBASED and
6922         self.op.new_name != inst.name):
6923       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6924       rename_file_storage = True
6925
6926     self.cfg.RenameInstance(inst.name, self.op.new_name)
6927     # Change the instance lock. This is definitely safe while we hold the BGL.
6928     # Otherwise the new lock would have to be added in acquired mode.
6929     assert self.REQ_BGL
6930     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6931     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6932
6933     # re-read the instance from the configuration after rename
6934     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6935
6936     if rename_file_storage:
6937       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6938       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6939                                                      old_file_storage_dir,
6940                                                      new_file_storage_dir)
6941       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6942                    " (but the instance has been renamed in Ganeti)" %
6943                    (inst.primary_node, old_file_storage_dir,
6944                     new_file_storage_dir))
6945
6946     _StartInstanceDisks(self, inst, None)
6947     try:
6948       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6949                                                  old_name, self.op.debug_level)
6950       msg = result.fail_msg
6951       if msg:
6952         msg = ("Could not run OS rename script for instance %s on node %s"
6953                " (but the instance has been renamed in Ganeti): %s" %
6954                (inst.name, inst.primary_node, msg))
6955         self.proc.LogWarning(msg)
6956     finally:
6957       _ShutdownInstanceDisks(self, inst)
6958
6959     return inst.name
6960
6961
6962 class LUInstanceRemove(LogicalUnit):
6963   """Remove an instance.
6964
6965   """
6966   HPATH = "instance-remove"
6967   HTYPE = constants.HTYPE_INSTANCE
6968   REQ_BGL = False
6969
6970   def ExpandNames(self):
6971     self._ExpandAndLockInstance()
6972     self.needed_locks[locking.LEVEL_NODE] = []
6973     self.needed_locks[locking.LEVEL_NODE_RES] = []
6974     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6975
6976   def DeclareLocks(self, level):
6977     if level == locking.LEVEL_NODE:
6978       self._LockInstancesNodes()
6979     elif level == locking.LEVEL_NODE_RES:
6980       # Copy node locks
6981       self.needed_locks[locking.LEVEL_NODE_RES] = \
6982         self.needed_locks[locking.LEVEL_NODE][:]
6983
6984   def BuildHooksEnv(self):
6985     """Build hooks env.
6986
6987     This runs on master, primary and secondary nodes of the instance.
6988
6989     """
6990     env = _BuildInstanceHookEnvByObject(self, self.instance)
6991     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6992     return env
6993
6994   def BuildHooksNodes(self):
6995     """Build hooks nodes.
6996
6997     """
6998     nl = [self.cfg.GetMasterNode()]
6999     nl_post = list(self.instance.all_nodes) + nl
7000     return (nl, nl_post)
7001
7002   def CheckPrereq(self):
7003     """Check prerequisites.
7004
7005     This checks that the instance is in the cluster.
7006
7007     """
7008     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7009     assert self.instance is not None, \
7010       "Cannot retrieve locked instance %s" % self.op.instance_name
7011
7012   def Exec(self, feedback_fn):
7013     """Remove the instance.
7014
7015     """
7016     instance = self.instance
7017     logging.info("Shutting down instance %s on node %s",
7018                  instance.name, instance.primary_node)
7019
7020     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7021                                              self.op.shutdown_timeout)
7022     msg = result.fail_msg
7023     if msg:
7024       if self.op.ignore_failures:
7025         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7026       else:
7027         raise errors.OpExecError("Could not shutdown instance %s on"
7028                                  " node %s: %s" %
7029                                  (instance.name, instance.primary_node, msg))
7030
7031     assert (self.owned_locks(locking.LEVEL_NODE) ==
7032             self.owned_locks(locking.LEVEL_NODE_RES))
7033     assert not (set(instance.all_nodes) -
7034                 self.owned_locks(locking.LEVEL_NODE)), \
7035       "Not owning correct locks"
7036
7037     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7038
7039
7040 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7041   """Utility function to remove an instance.
7042
7043   """
7044   logging.info("Removing block devices for instance %s", instance.name)
7045
7046   if not _RemoveDisks(lu, instance):
7047     if not ignore_failures:
7048       raise errors.OpExecError("Can't remove instance's disks")
7049     feedback_fn("Warning: can't remove instance's disks")
7050
7051   logging.info("Removing instance %s out of cluster config", instance.name)
7052
7053   lu.cfg.RemoveInstance(instance.name)
7054
7055   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7056     "Instance lock removal conflict"
7057
7058   # Remove lock for the instance
7059   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7060
7061
7062 class LUInstanceQuery(NoHooksLU):
7063   """Logical unit for querying instances.
7064
7065   """
7066   # pylint: disable=W0142
7067   REQ_BGL = False
7068
7069   def CheckArguments(self):
7070     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7071                              self.op.output_fields, self.op.use_locking)
7072
7073   def ExpandNames(self):
7074     self.iq.ExpandNames(self)
7075
7076   def DeclareLocks(self, level):
7077     self.iq.DeclareLocks(self, level)
7078
7079   def Exec(self, feedback_fn):
7080     return self.iq.OldStyleQuery(self)
7081
7082
7083 class LUInstanceFailover(LogicalUnit):
7084   """Failover an instance.
7085
7086   """
7087   HPATH = "instance-failover"
7088   HTYPE = constants.HTYPE_INSTANCE
7089   REQ_BGL = False
7090
7091   def CheckArguments(self):
7092     """Check the arguments.
7093
7094     """
7095     self.iallocator = getattr(self.op, "iallocator", None)
7096     self.target_node = getattr(self.op, "target_node", None)
7097
7098   def ExpandNames(self):
7099     self._ExpandAndLockInstance()
7100
7101     if self.op.target_node is not None:
7102       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7103
7104     self.needed_locks[locking.LEVEL_NODE] = []
7105     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7106
7107     ignore_consistency = self.op.ignore_consistency
7108     shutdown_timeout = self.op.shutdown_timeout
7109     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7110                                        cleanup=False,
7111                                        failover=True,
7112                                        ignore_consistency=ignore_consistency,
7113                                        shutdown_timeout=shutdown_timeout)
7114     self.tasklets = [self._migrater]
7115
7116   def DeclareLocks(self, level):
7117     if level == locking.LEVEL_NODE:
7118       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7119       if instance.disk_template in constants.DTS_EXT_MIRROR:
7120         if self.op.target_node is None:
7121           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7122         else:
7123           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7124                                                    self.op.target_node]
7125         del self.recalculate_locks[locking.LEVEL_NODE]
7126       else:
7127         self._LockInstancesNodes()
7128
7129   def BuildHooksEnv(self):
7130     """Build hooks env.
7131
7132     This runs on master, primary and secondary nodes of the instance.
7133
7134     """
7135     instance = self._migrater.instance
7136     source_node = instance.primary_node
7137     target_node = self.op.target_node
7138     env = {
7139       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7140       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7141       "OLD_PRIMARY": source_node,
7142       "NEW_PRIMARY": target_node,
7143       }
7144
7145     if instance.disk_template in constants.DTS_INT_MIRROR:
7146       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7147       env["NEW_SECONDARY"] = source_node
7148     else:
7149       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7150
7151     env.update(_BuildInstanceHookEnvByObject(self, instance))
7152
7153     return env
7154
7155   def BuildHooksNodes(self):
7156     """Build hooks nodes.
7157
7158     """
7159     instance = self._migrater.instance
7160     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7161     return (nl, nl + [instance.primary_node])
7162
7163
7164 class LUInstanceMigrate(LogicalUnit):
7165   """Migrate an instance.
7166
7167   This is migration without shutting down, compared to the failover,
7168   which is done with shutdown.
7169
7170   """
7171   HPATH = "instance-migrate"
7172   HTYPE = constants.HTYPE_INSTANCE
7173   REQ_BGL = False
7174
7175   def ExpandNames(self):
7176     self._ExpandAndLockInstance()
7177
7178     if self.op.target_node is not None:
7179       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7180
7181     self.needed_locks[locking.LEVEL_NODE] = []
7182     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7183
7184     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7185                                        cleanup=self.op.cleanup,
7186                                        failover=False,
7187                                        fallback=self.op.allow_failover)
7188     self.tasklets = [self._migrater]
7189
7190   def DeclareLocks(self, level):
7191     if level == locking.LEVEL_NODE:
7192       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7193       if instance.disk_template in constants.DTS_EXT_MIRROR:
7194         if self.op.target_node is None:
7195           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7196         else:
7197           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7198                                                    self.op.target_node]
7199         del self.recalculate_locks[locking.LEVEL_NODE]
7200       else:
7201         self._LockInstancesNodes()
7202
7203   def BuildHooksEnv(self):
7204     """Build hooks env.
7205
7206     This runs on master, primary and secondary nodes of the instance.
7207
7208     """
7209     instance = self._migrater.instance
7210     source_node = instance.primary_node
7211     target_node = self.op.target_node
7212     env = _BuildInstanceHookEnvByObject(self, instance)
7213     env.update({
7214       "MIGRATE_LIVE": self._migrater.live,
7215       "MIGRATE_CLEANUP": self.op.cleanup,
7216       "OLD_PRIMARY": source_node,
7217       "NEW_PRIMARY": target_node,
7218       })
7219
7220     if instance.disk_template in constants.DTS_INT_MIRROR:
7221       env["OLD_SECONDARY"] = target_node
7222       env["NEW_SECONDARY"] = source_node
7223     else:
7224       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7225
7226     return env
7227
7228   def BuildHooksNodes(self):
7229     """Build hooks nodes.
7230
7231     """
7232     instance = self._migrater.instance
7233     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7234     return (nl, nl + [instance.primary_node])
7235
7236
7237 class LUInstanceMove(LogicalUnit):
7238   """Move an instance by data-copying.
7239
7240   """
7241   HPATH = "instance-move"
7242   HTYPE = constants.HTYPE_INSTANCE
7243   REQ_BGL = False
7244
7245   def ExpandNames(self):
7246     self._ExpandAndLockInstance()
7247     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7248     self.op.target_node = target_node
7249     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7250     self.needed_locks[locking.LEVEL_NODE_RES] = []
7251     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7252
7253   def DeclareLocks(self, level):
7254     if level == locking.LEVEL_NODE:
7255       self._LockInstancesNodes(primary_only=True)
7256     elif level == locking.LEVEL_NODE_RES:
7257       # Copy node locks
7258       self.needed_locks[locking.LEVEL_NODE_RES] = \
7259         self.needed_locks[locking.LEVEL_NODE][:]
7260
7261   def BuildHooksEnv(self):
7262     """Build hooks env.
7263
7264     This runs on master, primary and secondary nodes of the instance.
7265
7266     """
7267     env = {
7268       "TARGET_NODE": self.op.target_node,
7269       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7270       }
7271     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7272     return env
7273
7274   def BuildHooksNodes(self):
7275     """Build hooks nodes.
7276
7277     """
7278     nl = [
7279       self.cfg.GetMasterNode(),
7280       self.instance.primary_node,
7281       self.op.target_node,
7282       ]
7283     return (nl, nl)
7284
7285   def CheckPrereq(self):
7286     """Check prerequisites.
7287
7288     This checks that the instance is in the cluster.
7289
7290     """
7291     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7292     assert self.instance is not None, \
7293       "Cannot retrieve locked instance %s" % self.op.instance_name
7294
7295     node = self.cfg.GetNodeInfo(self.op.target_node)
7296     assert node is not None, \
7297       "Cannot retrieve locked node %s" % self.op.target_node
7298
7299     self.target_node = target_node = node.name
7300
7301     if target_node == instance.primary_node:
7302       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7303                                  (instance.name, target_node),
7304                                  errors.ECODE_STATE)
7305
7306     bep = self.cfg.GetClusterInfo().FillBE(instance)
7307
7308     for idx, dsk in enumerate(instance.disks):
7309       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7310         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7311                                    " cannot copy" % idx, errors.ECODE_STATE)
7312
7313     _CheckNodeOnline(self, target_node)
7314     _CheckNodeNotDrained(self, target_node)
7315     _CheckNodeVmCapable(self, target_node)
7316
7317     if instance.admin_state == constants.ADMINST_UP:
7318       # check memory requirements on the secondary node
7319       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7320                            instance.name, bep[constants.BE_MAXMEM],
7321                            instance.hypervisor)
7322     else:
7323       self.LogInfo("Not checking memory on the secondary node as"
7324                    " instance will not be started")
7325
7326     # check bridge existance
7327     _CheckInstanceBridgesExist(self, instance, node=target_node)
7328
7329   def Exec(self, feedback_fn):
7330     """Move an instance.
7331
7332     The move is done by shutting it down on its present node, copying
7333     the data over (slow) and starting it on the new node.
7334
7335     """
7336     instance = self.instance
7337
7338     source_node = instance.primary_node
7339     target_node = self.target_node
7340
7341     self.LogInfo("Shutting down instance %s on source node %s",
7342                  instance.name, source_node)
7343
7344     assert (self.owned_locks(locking.LEVEL_NODE) ==
7345             self.owned_locks(locking.LEVEL_NODE_RES))
7346
7347     result = self.rpc.call_instance_shutdown(source_node, instance,
7348                                              self.op.shutdown_timeout)
7349     msg = result.fail_msg
7350     if msg:
7351       if self.op.ignore_consistency:
7352         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7353                              " Proceeding anyway. Please make sure node"
7354                              " %s is down. Error details: %s",
7355                              instance.name, source_node, source_node, msg)
7356       else:
7357         raise errors.OpExecError("Could not shutdown instance %s on"
7358                                  " node %s: %s" %
7359                                  (instance.name, source_node, msg))
7360
7361     # create the target disks
7362     try:
7363       _CreateDisks(self, instance, target_node=target_node)
7364     except errors.OpExecError:
7365       self.LogWarning("Device creation failed, reverting...")
7366       try:
7367         _RemoveDisks(self, instance, target_node=target_node)
7368       finally:
7369         self.cfg.ReleaseDRBDMinors(instance.name)
7370         raise
7371
7372     cluster_name = self.cfg.GetClusterInfo().cluster_name
7373
7374     errs = []
7375     # activate, get path, copy the data over
7376     for idx, disk in enumerate(instance.disks):
7377       self.LogInfo("Copying data for disk %d", idx)
7378       result = self.rpc.call_blockdev_assemble(target_node, disk,
7379                                                instance.name, True, idx)
7380       if result.fail_msg:
7381         self.LogWarning("Can't assemble newly created disk %d: %s",
7382                         idx, result.fail_msg)
7383         errs.append(result.fail_msg)
7384         break
7385       dev_path = result.payload
7386       result = self.rpc.call_blockdev_export(source_node, disk,
7387                                              target_node, dev_path,
7388                                              cluster_name)
7389       if result.fail_msg:
7390         self.LogWarning("Can't copy data over for disk %d: %s",
7391                         idx, result.fail_msg)
7392         errs.append(result.fail_msg)
7393         break
7394
7395     if errs:
7396       self.LogWarning("Some disks failed to copy, aborting")
7397       try:
7398         _RemoveDisks(self, instance, target_node=target_node)
7399       finally:
7400         self.cfg.ReleaseDRBDMinors(instance.name)
7401         raise errors.OpExecError("Errors during disk copy: %s" %
7402                                  (",".join(errs),))
7403
7404     instance.primary_node = target_node
7405     self.cfg.Update(instance, feedback_fn)
7406
7407     self.LogInfo("Removing the disks on the original node")
7408     _RemoveDisks(self, instance, target_node=source_node)
7409
7410     # Only start the instance if it's marked as up
7411     if instance.admin_state == constants.ADMINST_UP:
7412       self.LogInfo("Starting instance %s on node %s",
7413                    instance.name, target_node)
7414
7415       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7416                                            ignore_secondaries=True)
7417       if not disks_ok:
7418         _ShutdownInstanceDisks(self, instance)
7419         raise errors.OpExecError("Can't activate the instance's disks")
7420
7421       result = self.rpc.call_instance_start(target_node,
7422                                             (instance, None, None), False)
7423       msg = result.fail_msg
7424       if msg:
7425         _ShutdownInstanceDisks(self, instance)
7426         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7427                                  (instance.name, target_node, msg))
7428
7429
7430 class LUNodeMigrate(LogicalUnit):
7431   """Migrate all instances from a node.
7432
7433   """
7434   HPATH = "node-migrate"
7435   HTYPE = constants.HTYPE_NODE
7436   REQ_BGL = False
7437
7438   def CheckArguments(self):
7439     pass
7440
7441   def ExpandNames(self):
7442     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7443
7444     self.share_locks = _ShareAll()
7445     self.needed_locks = {
7446       locking.LEVEL_NODE: [self.op.node_name],
7447       }
7448
7449   def BuildHooksEnv(self):
7450     """Build hooks env.
7451
7452     This runs on the master, the primary and all the secondaries.
7453
7454     """
7455     return {
7456       "NODE_NAME": self.op.node_name,
7457       }
7458
7459   def BuildHooksNodes(self):
7460     """Build hooks nodes.
7461
7462     """
7463     nl = [self.cfg.GetMasterNode()]
7464     return (nl, nl)
7465
7466   def CheckPrereq(self):
7467     pass
7468
7469   def Exec(self, feedback_fn):
7470     # Prepare jobs for migration instances
7471     jobs = [
7472       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7473                                  mode=self.op.mode,
7474                                  live=self.op.live,
7475                                  iallocator=self.op.iallocator,
7476                                  target_node=self.op.target_node)]
7477       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7478       ]
7479
7480     # TODO: Run iallocator in this opcode and pass correct placement options to
7481     # OpInstanceMigrate. Since other jobs can modify the cluster between
7482     # running the iallocator and the actual migration, a good consistency model
7483     # will have to be found.
7484
7485     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7486             frozenset([self.op.node_name]))
7487
7488     return ResultWithJobs(jobs)
7489
7490
7491 class TLMigrateInstance(Tasklet):
7492   """Tasklet class for instance migration.
7493
7494   @type live: boolean
7495   @ivar live: whether the migration will be done live or non-live;
7496       this variable is initalized only after CheckPrereq has run
7497   @type cleanup: boolean
7498   @ivar cleanup: Wheater we cleanup from a failed migration
7499   @type iallocator: string
7500   @ivar iallocator: The iallocator used to determine target_node
7501   @type target_node: string
7502   @ivar target_node: If given, the target_node to reallocate the instance to
7503   @type failover: boolean
7504   @ivar failover: Whether operation results in failover or migration
7505   @type fallback: boolean
7506   @ivar fallback: Whether fallback to failover is allowed if migration not
7507                   possible
7508   @type ignore_consistency: boolean
7509   @ivar ignore_consistency: Wheter we should ignore consistency between source
7510                             and target node
7511   @type shutdown_timeout: int
7512   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7513
7514   """
7515
7516   # Constants
7517   _MIGRATION_POLL_INTERVAL = 1      # seconds
7518   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7519
7520   def __init__(self, lu, instance_name, cleanup=False,
7521                failover=False, fallback=False,
7522                ignore_consistency=False,
7523                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7524     """Initializes this class.
7525
7526     """
7527     Tasklet.__init__(self, lu)
7528
7529     # Parameters
7530     self.instance_name = instance_name
7531     self.cleanup = cleanup
7532     self.live = False # will be overridden later
7533     self.failover = failover
7534     self.fallback = fallback
7535     self.ignore_consistency = ignore_consistency
7536     self.shutdown_timeout = shutdown_timeout
7537
7538   def CheckPrereq(self):
7539     """Check prerequisites.
7540
7541     This checks that the instance is in the cluster.
7542
7543     """
7544     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7545     instance = self.cfg.GetInstanceInfo(instance_name)
7546     assert instance is not None
7547     self.instance = instance
7548
7549     if (not self.cleanup and
7550         not instance.admin_state == constants.ADMINST_UP and
7551         not self.failover and self.fallback):
7552       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7553                       " switching to failover")
7554       self.failover = True
7555
7556     if instance.disk_template not in constants.DTS_MIRRORED:
7557       if self.failover:
7558         text = "failovers"
7559       else:
7560         text = "migrations"
7561       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7562                                  " %s" % (instance.disk_template, text),
7563                                  errors.ECODE_STATE)
7564
7565     if instance.disk_template in constants.DTS_EXT_MIRROR:
7566       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7567
7568       if self.lu.op.iallocator:
7569         self._RunAllocator()
7570       else:
7571         # We set set self.target_node as it is required by
7572         # BuildHooksEnv
7573         self.target_node = self.lu.op.target_node
7574
7575       # self.target_node is already populated, either directly or by the
7576       # iallocator run
7577       target_node = self.target_node
7578       if self.target_node == instance.primary_node:
7579         raise errors.OpPrereqError("Cannot migrate instance %s"
7580                                    " to its primary (%s)" %
7581                                    (instance.name, instance.primary_node))
7582
7583       if len(self.lu.tasklets) == 1:
7584         # It is safe to release locks only when we're the only tasklet
7585         # in the LU
7586         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7587                       keep=[instance.primary_node, self.target_node])
7588
7589     else:
7590       secondary_nodes = instance.secondary_nodes
7591       if not secondary_nodes:
7592         raise errors.ConfigurationError("No secondary node but using"
7593                                         " %s disk template" %
7594                                         instance.disk_template)
7595       target_node = secondary_nodes[0]
7596       if self.lu.op.iallocator or (self.lu.op.target_node and
7597                                    self.lu.op.target_node != target_node):
7598         if self.failover:
7599           text = "failed over"
7600         else:
7601           text = "migrated"
7602         raise errors.OpPrereqError("Instances with disk template %s cannot"
7603                                    " be %s to arbitrary nodes"
7604                                    " (neither an iallocator nor a target"
7605                                    " node can be passed)" %
7606                                    (instance.disk_template, text),
7607                                    errors.ECODE_INVAL)
7608
7609     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7610
7611     # check memory requirements on the secondary node
7612     if not self.failover or instance.admin_state == constants.ADMINST_UP:
7613       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7614                            instance.name, i_be[constants.BE_MAXMEM],
7615                            instance.hypervisor)
7616     else:
7617       self.lu.LogInfo("Not checking memory on the secondary node as"
7618                       " instance will not be started")
7619
7620     # check if failover must be forced instead of migration
7621     if (not self.cleanup and not self.failover and
7622         i_be[constants.BE_ALWAYS_FAILOVER]):
7623       if self.fallback:
7624         self.lu.LogInfo("Instance configured to always failover; fallback"
7625                         " to failover")
7626         self.failover = True
7627       else:
7628         raise errors.OpPrereqError("This instance has been configured to"
7629                                    " always failover, please allow failover",
7630                                    errors.ECODE_STATE)
7631
7632     # check bridge existance
7633     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7634
7635     if not self.cleanup:
7636       _CheckNodeNotDrained(self.lu, target_node)
7637       if not self.failover:
7638         result = self.rpc.call_instance_migratable(instance.primary_node,
7639                                                    instance)
7640         if result.fail_msg and self.fallback:
7641           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7642                           " failover")
7643           self.failover = True
7644         else:
7645           result.Raise("Can't migrate, please use failover",
7646                        prereq=True, ecode=errors.ECODE_STATE)
7647
7648     assert not (self.failover and self.cleanup)
7649
7650     if not self.failover:
7651       if self.lu.op.live is not None and self.lu.op.mode is not None:
7652         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7653                                    " parameters are accepted",
7654                                    errors.ECODE_INVAL)
7655       if self.lu.op.live is not None:
7656         if self.lu.op.live:
7657           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7658         else:
7659           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7660         # reset the 'live' parameter to None so that repeated
7661         # invocations of CheckPrereq do not raise an exception
7662         self.lu.op.live = None
7663       elif self.lu.op.mode is None:
7664         # read the default value from the hypervisor
7665         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7666                                                 skip_globals=False)
7667         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7668
7669       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7670     else:
7671       # Failover is never live
7672       self.live = False
7673
7674   def _RunAllocator(self):
7675     """Run the allocator based on input opcode.
7676
7677     """
7678     ial = IAllocator(self.cfg, self.rpc,
7679                      mode=constants.IALLOCATOR_MODE_RELOC,
7680                      name=self.instance_name,
7681                      # TODO See why hail breaks with a single node below
7682                      relocate_from=[self.instance.primary_node,
7683                                     self.instance.primary_node],
7684                      )
7685
7686     ial.Run(self.lu.op.iallocator)
7687
7688     if not ial.success:
7689       raise errors.OpPrereqError("Can't compute nodes using"
7690                                  " iallocator '%s': %s" %
7691                                  (self.lu.op.iallocator, ial.info),
7692                                  errors.ECODE_NORES)
7693     if len(ial.result) != ial.required_nodes:
7694       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7695                                  " of nodes (%s), required %s" %
7696                                  (self.lu.op.iallocator, len(ial.result),
7697                                   ial.required_nodes), errors.ECODE_FAULT)
7698     self.target_node = ial.result[0]
7699     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7700                  self.instance_name, self.lu.op.iallocator,
7701                  utils.CommaJoin(ial.result))
7702
7703   def _WaitUntilSync(self):
7704     """Poll with custom rpc for disk sync.
7705
7706     This uses our own step-based rpc call.
7707
7708     """
7709     self.feedback_fn("* wait until resync is done")
7710     all_done = False
7711     while not all_done:
7712       all_done = True
7713       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7714                                             self.nodes_ip,
7715                                             self.instance.disks)
7716       min_percent = 100
7717       for node, nres in result.items():
7718         nres.Raise("Cannot resync disks on node %s" % node)
7719         node_done, node_percent = nres.payload
7720         all_done = all_done and node_done
7721         if node_percent is not None:
7722           min_percent = min(min_percent, node_percent)
7723       if not all_done:
7724         if min_percent < 100:
7725           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7726         time.sleep(2)
7727
7728   def _EnsureSecondary(self, node):
7729     """Demote a node to secondary.
7730
7731     """
7732     self.feedback_fn("* switching node %s to secondary mode" % node)
7733
7734     for dev in self.instance.disks:
7735       self.cfg.SetDiskID(dev, node)
7736
7737     result = self.rpc.call_blockdev_close(node, self.instance.name,
7738                                           self.instance.disks)
7739     result.Raise("Cannot change disk to secondary on node %s" % node)
7740
7741   def _GoStandalone(self):
7742     """Disconnect from the network.
7743
7744     """
7745     self.feedback_fn("* changing into standalone mode")
7746     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7747                                                self.instance.disks)
7748     for node, nres in result.items():
7749       nres.Raise("Cannot disconnect disks node %s" % node)
7750
7751   def _GoReconnect(self, multimaster):
7752     """Reconnect to the network.
7753
7754     """
7755     if multimaster:
7756       msg = "dual-master"
7757     else:
7758       msg = "single-master"
7759     self.feedback_fn("* changing disks into %s mode" % msg)
7760     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7761                                            self.instance.disks,
7762                                            self.instance.name, multimaster)
7763     for node, nres in result.items():
7764       nres.Raise("Cannot change disks config on node %s" % node)
7765
7766   def _ExecCleanup(self):
7767     """Try to cleanup after a failed migration.
7768
7769     The cleanup is done by:
7770       - check that the instance is running only on one node
7771         (and update the config if needed)
7772       - change disks on its secondary node to secondary
7773       - wait until disks are fully synchronized
7774       - disconnect from the network
7775       - change disks into single-master mode
7776       - wait again until disks are fully synchronized
7777
7778     """
7779     instance = self.instance
7780     target_node = self.target_node
7781     source_node = self.source_node
7782
7783     # check running on only one node
7784     self.feedback_fn("* checking where the instance actually runs"
7785                      " (if this hangs, the hypervisor might be in"
7786                      " a bad state)")
7787     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7788     for node, result in ins_l.items():
7789       result.Raise("Can't contact node %s" % node)
7790
7791     runningon_source = instance.name in ins_l[source_node].payload
7792     runningon_target = instance.name in ins_l[target_node].payload
7793
7794     if runningon_source and runningon_target:
7795       raise errors.OpExecError("Instance seems to be running on two nodes,"
7796                                " or the hypervisor is confused; you will have"
7797                                " to ensure manually that it runs only on one"
7798                                " and restart this operation")
7799
7800     if not (runningon_source or runningon_target):
7801       raise errors.OpExecError("Instance does not seem to be running at all;"
7802                                " in this case it's safer to repair by"
7803                                " running 'gnt-instance stop' to ensure disk"
7804                                " shutdown, and then restarting it")
7805
7806     if runningon_target:
7807       # the migration has actually succeeded, we need to update the config
7808       self.feedback_fn("* instance running on secondary node (%s),"
7809                        " updating config" % target_node)
7810       instance.primary_node = target_node
7811       self.cfg.Update(instance, self.feedback_fn)
7812       demoted_node = source_node
7813     else:
7814       self.feedback_fn("* instance confirmed to be running on its"
7815                        " primary node (%s)" % source_node)
7816       demoted_node = target_node
7817
7818     if instance.disk_template in constants.DTS_INT_MIRROR:
7819       self._EnsureSecondary(demoted_node)
7820       try:
7821         self._WaitUntilSync()
7822       except errors.OpExecError:
7823         # we ignore here errors, since if the device is standalone, it
7824         # won't be able to sync
7825         pass
7826       self._GoStandalone()
7827       self._GoReconnect(False)
7828       self._WaitUntilSync()
7829
7830     self.feedback_fn("* done")
7831
7832   def _RevertDiskStatus(self):
7833     """Try to revert the disk status after a failed migration.
7834
7835     """
7836     target_node = self.target_node
7837     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7838       return
7839
7840     try:
7841       self._EnsureSecondary(target_node)
7842       self._GoStandalone()
7843       self._GoReconnect(False)
7844       self._WaitUntilSync()
7845     except errors.OpExecError, err:
7846       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7847                          " please try to recover the instance manually;"
7848                          " error '%s'" % str(err))
7849
7850   def _AbortMigration(self):
7851     """Call the hypervisor code to abort a started migration.
7852
7853     """
7854     instance = self.instance
7855     target_node = self.target_node
7856     source_node = self.source_node
7857     migration_info = self.migration_info
7858
7859     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7860                                                                  instance,
7861                                                                  migration_info,
7862                                                                  False)
7863     abort_msg = abort_result.fail_msg
7864     if abort_msg:
7865       logging.error("Aborting migration failed on target node %s: %s",
7866                     target_node, abort_msg)
7867       # Don't raise an exception here, as we stil have to try to revert the
7868       # disk status, even if this step failed.
7869
7870     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7871         instance, False, self.live)
7872     abort_msg = abort_result.fail_msg
7873     if abort_msg:
7874       logging.error("Aborting migration failed on source node %s: %s",
7875                     source_node, abort_msg)
7876
7877   def _ExecMigration(self):
7878     """Migrate an instance.
7879
7880     The migrate is done by:
7881       - change the disks into dual-master mode
7882       - wait until disks are fully synchronized again
7883       - migrate the instance
7884       - change disks on the new secondary node (the old primary) to secondary
7885       - wait until disks are fully synchronized
7886       - change disks into single-master mode
7887
7888     """
7889     instance = self.instance
7890     target_node = self.target_node
7891     source_node = self.source_node
7892
7893     # Check for hypervisor version mismatch and warn the user.
7894     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7895                                        None, [self.instance.hypervisor])
7896     for ninfo in nodeinfo.values():
7897       ninfo.Raise("Unable to retrieve node information from node '%s'" %
7898                   ninfo.node)
7899     (_, _, (src_info, )) = nodeinfo[source_node].payload
7900     (_, _, (dst_info, )) = nodeinfo[target_node].payload
7901
7902     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7903         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7904       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7905       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7906       if src_version != dst_version:
7907         self.feedback_fn("* warning: hypervisor version mismatch between"
7908                          " source (%s) and target (%s) node" %
7909                          (src_version, dst_version))
7910
7911     self.feedback_fn("* checking disk consistency between source and target")
7912     for dev in instance.disks:
7913       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7914         raise errors.OpExecError("Disk %s is degraded or not fully"
7915                                  " synchronized on target node,"
7916                                  " aborting migration" % dev.iv_name)
7917
7918     # First get the migration information from the remote node
7919     result = self.rpc.call_migration_info(source_node, instance)
7920     msg = result.fail_msg
7921     if msg:
7922       log_err = ("Failed fetching source migration information from %s: %s" %
7923                  (source_node, msg))
7924       logging.error(log_err)
7925       raise errors.OpExecError(log_err)
7926
7927     self.migration_info = migration_info = result.payload
7928
7929     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7930       # Then switch the disks to master/master mode
7931       self._EnsureSecondary(target_node)
7932       self._GoStandalone()
7933       self._GoReconnect(True)
7934       self._WaitUntilSync()
7935
7936     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7937     result = self.rpc.call_accept_instance(target_node,
7938                                            instance,
7939                                            migration_info,
7940                                            self.nodes_ip[target_node])
7941
7942     msg = result.fail_msg
7943     if msg:
7944       logging.error("Instance pre-migration failed, trying to revert"
7945                     " disk status: %s", msg)
7946       self.feedback_fn("Pre-migration failed, aborting")
7947       self._AbortMigration()
7948       self._RevertDiskStatus()
7949       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7950                                (instance.name, msg))
7951
7952     self.feedback_fn("* migrating instance to %s" % target_node)
7953     result = self.rpc.call_instance_migrate(source_node, instance,
7954                                             self.nodes_ip[target_node],
7955                                             self.live)
7956     msg = result.fail_msg
7957     if msg:
7958       logging.error("Instance migration failed, trying to revert"
7959                     " disk status: %s", msg)
7960       self.feedback_fn("Migration failed, aborting")
7961       self._AbortMigration()
7962       self._RevertDiskStatus()
7963       raise errors.OpExecError("Could not migrate instance %s: %s" %
7964                                (instance.name, msg))
7965
7966     self.feedback_fn("* starting memory transfer")
7967     last_feedback = time.time()
7968     while True:
7969       result = self.rpc.call_instance_get_migration_status(source_node,
7970                                                            instance)
7971       msg = result.fail_msg
7972       ms = result.payload   # MigrationStatus instance
7973       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7974         logging.error("Instance migration failed, trying to revert"
7975                       " disk status: %s", msg)
7976         self.feedback_fn("Migration failed, aborting")
7977         self._AbortMigration()
7978         self._RevertDiskStatus()
7979         raise errors.OpExecError("Could not migrate instance %s: %s" %
7980                                  (instance.name, msg))
7981
7982       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7983         self.feedback_fn("* memory transfer complete")
7984         break
7985
7986       if (utils.TimeoutExpired(last_feedback,
7987                                self._MIGRATION_FEEDBACK_INTERVAL) and
7988           ms.transferred_ram is not None):
7989         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7990         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7991         last_feedback = time.time()
7992
7993       time.sleep(self._MIGRATION_POLL_INTERVAL)
7994
7995     result = self.rpc.call_instance_finalize_migration_src(source_node,
7996                                                            instance,
7997                                                            True,
7998                                                            self.live)
7999     msg = result.fail_msg
8000     if msg:
8001       logging.error("Instance migration succeeded, but finalization failed"
8002                     " on the source node: %s", msg)
8003       raise errors.OpExecError("Could not finalize instance migration: %s" %
8004                                msg)
8005
8006     instance.primary_node = target_node
8007
8008     # distribute new instance config to the other nodes
8009     self.cfg.Update(instance, self.feedback_fn)
8010
8011     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8012                                                            instance,
8013                                                            migration_info,
8014                                                            True)
8015     msg = result.fail_msg
8016     if msg:
8017       logging.error("Instance migration succeeded, but finalization failed"
8018                     " on the target node: %s", msg)
8019       raise errors.OpExecError("Could not finalize instance migration: %s" %
8020                                msg)
8021
8022     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8023       self._EnsureSecondary(source_node)
8024       self._WaitUntilSync()
8025       self._GoStandalone()
8026       self._GoReconnect(False)
8027       self._WaitUntilSync()
8028
8029     self.feedback_fn("* done")
8030
8031   def _ExecFailover(self):
8032     """Failover an instance.
8033
8034     The failover is done by shutting it down on its present node and
8035     starting it on the secondary.
8036
8037     """
8038     instance = self.instance
8039     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8040
8041     source_node = instance.primary_node
8042     target_node = self.target_node
8043
8044     if instance.admin_state == constants.ADMINST_UP:
8045       self.feedback_fn("* checking disk consistency between source and target")
8046       for dev in instance.disks:
8047         # for drbd, these are drbd over lvm
8048         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8049           if primary_node.offline:
8050             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8051                              " target node %s" %
8052                              (primary_node.name, dev.iv_name, target_node))
8053           elif not self.ignore_consistency:
8054             raise errors.OpExecError("Disk %s is degraded on target node,"
8055                                      " aborting failover" % dev.iv_name)
8056     else:
8057       self.feedback_fn("* not checking disk consistency as instance is not"
8058                        " running")
8059
8060     self.feedback_fn("* shutting down instance on source node")
8061     logging.info("Shutting down instance %s on node %s",
8062                  instance.name, source_node)
8063
8064     result = self.rpc.call_instance_shutdown(source_node, instance,
8065                                              self.shutdown_timeout)
8066     msg = result.fail_msg
8067     if msg:
8068       if self.ignore_consistency or primary_node.offline:
8069         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8070                            " proceeding anyway; please make sure node"
8071                            " %s is down; error details: %s",
8072                            instance.name, source_node, source_node, msg)
8073       else:
8074         raise errors.OpExecError("Could not shutdown instance %s on"
8075                                  " node %s: %s" %
8076                                  (instance.name, source_node, msg))
8077
8078     self.feedback_fn("* deactivating the instance's disks on source node")
8079     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8080       raise errors.OpExecError("Can't shut down the instance's disks")
8081
8082     instance.primary_node = target_node
8083     # distribute new instance config to the other nodes
8084     self.cfg.Update(instance, self.feedback_fn)
8085
8086     # Only start the instance if it's marked as up
8087     if instance.admin_state == constants.ADMINST_UP:
8088       self.feedback_fn("* activating the instance's disks on target node %s" %
8089                        target_node)
8090       logging.info("Starting instance %s on node %s",
8091                    instance.name, target_node)
8092
8093       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8094                                            ignore_secondaries=True)
8095       if not disks_ok:
8096         _ShutdownInstanceDisks(self.lu, instance)
8097         raise errors.OpExecError("Can't activate the instance's disks")
8098
8099       self.feedback_fn("* starting the instance on the target node %s" %
8100                        target_node)
8101       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8102                                             False)
8103       msg = result.fail_msg
8104       if msg:
8105         _ShutdownInstanceDisks(self.lu, instance)
8106         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8107                                  (instance.name, target_node, msg))
8108
8109   def Exec(self, feedback_fn):
8110     """Perform the migration.
8111
8112     """
8113     self.feedback_fn = feedback_fn
8114     self.source_node = self.instance.primary_node
8115
8116     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8117     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8118       self.target_node = self.instance.secondary_nodes[0]
8119       # Otherwise self.target_node has been populated either
8120       # directly, or through an iallocator.
8121
8122     self.all_nodes = [self.source_node, self.target_node]
8123     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8124                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8125
8126     if self.failover:
8127       feedback_fn("Failover instance %s" % self.instance.name)
8128       self._ExecFailover()
8129     else:
8130       feedback_fn("Migrating instance %s" % self.instance.name)
8131
8132       if self.cleanup:
8133         return self._ExecCleanup()
8134       else:
8135         return self._ExecMigration()
8136
8137
8138 def _CreateBlockDev(lu, node, instance, device, force_create,
8139                     info, force_open):
8140   """Create a tree of block devices on a given node.
8141
8142   If this device type has to be created on secondaries, create it and
8143   all its children.
8144
8145   If not, just recurse to children keeping the same 'force' value.
8146
8147   @param lu: the lu on whose behalf we execute
8148   @param node: the node on which to create the device
8149   @type instance: L{objects.Instance}
8150   @param instance: the instance which owns the device
8151   @type device: L{objects.Disk}
8152   @param device: the device to create
8153   @type force_create: boolean
8154   @param force_create: whether to force creation of this device; this
8155       will be change to True whenever we find a device which has
8156       CreateOnSecondary() attribute
8157   @param info: the extra 'metadata' we should attach to the device
8158       (this will be represented as a LVM tag)
8159   @type force_open: boolean
8160   @param force_open: this parameter will be passes to the
8161       L{backend.BlockdevCreate} function where it specifies
8162       whether we run on primary or not, and it affects both
8163       the child assembly and the device own Open() execution
8164
8165   """
8166   if device.CreateOnSecondary():
8167     force_create = True
8168
8169   if device.children:
8170     for child in device.children:
8171       _CreateBlockDev(lu, node, instance, child, force_create,
8172                       info, force_open)
8173
8174   if not force_create:
8175     return
8176
8177   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8178
8179
8180 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8181   """Create a single block device on a given node.
8182
8183   This will not recurse over children of the device, so they must be
8184   created in advance.
8185
8186   @param lu: the lu on whose behalf we execute
8187   @param node: the node on which to create the device
8188   @type instance: L{objects.Instance}
8189   @param instance: the instance which owns the device
8190   @type device: L{objects.Disk}
8191   @param device: the device to create
8192   @param info: the extra 'metadata' we should attach to the device
8193       (this will be represented as a LVM tag)
8194   @type force_open: boolean
8195   @param force_open: this parameter will be passes to the
8196       L{backend.BlockdevCreate} function where it specifies
8197       whether we run on primary or not, and it affects both
8198       the child assembly and the device own Open() execution
8199
8200   """
8201   lu.cfg.SetDiskID(device, node)
8202   result = lu.rpc.call_blockdev_create(node, device, device.size,
8203                                        instance.name, force_open, info)
8204   result.Raise("Can't create block device %s on"
8205                " node %s for instance %s" % (device, node, instance.name))
8206   if device.physical_id is None:
8207     device.physical_id = result.payload
8208
8209
8210 def _GenerateUniqueNames(lu, exts):
8211   """Generate a suitable LV name.
8212
8213   This will generate a logical volume name for the given instance.
8214
8215   """
8216   results = []
8217   for val in exts:
8218     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8219     results.append("%s%s" % (new_id, val))
8220   return results
8221
8222
8223 def _ComputeLDParams(disk_template, disk_params):
8224   """Computes Logical Disk parameters from Disk Template parameters.
8225
8226   @type disk_template: string
8227   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8228   @type disk_params: dict
8229   @param disk_params: disk template parameters; dict(template_name -> parameters
8230   @rtype: list(dict)
8231   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8232     contains the LD parameters of the node. The tree is flattened in-order.
8233
8234   """
8235   if disk_template not in constants.DISK_TEMPLATES:
8236     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8237
8238   result = list()
8239   dt_params = disk_params[disk_template]
8240   if disk_template == constants.DT_DRBD8:
8241     drbd_params = {
8242       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8243       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8244       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8245       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8246       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8247       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8248       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8249       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8250       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8251       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8252       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8253       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8254       }
8255
8256     drbd_params = \
8257       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8258                        drbd_params)
8259
8260     result.append(drbd_params)
8261
8262     # data LV
8263     data_params = {
8264       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8265       }
8266     data_params = \
8267       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8268                        data_params)
8269     result.append(data_params)
8270
8271     # metadata LV
8272     meta_params = {
8273       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8274       }
8275     meta_params = \
8276       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8277                        meta_params)
8278     result.append(meta_params)
8279
8280   elif (disk_template == constants.DT_FILE or
8281         disk_template == constants.DT_SHARED_FILE):
8282     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8283
8284   elif disk_template == constants.DT_PLAIN:
8285     params = {
8286       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8287       }
8288     params = \
8289       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8290                        params)
8291     result.append(params)
8292
8293   elif disk_template == constants.DT_BLOCK:
8294     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8295
8296   return result
8297
8298
8299 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8300                          iv_name, p_minor, s_minor, drbd_params, data_params,
8301                          meta_params):
8302   """Generate a drbd8 device complete with its children.
8303
8304   """
8305   assert len(vgnames) == len(names) == 2
8306   port = lu.cfg.AllocatePort()
8307   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8308
8309   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8310                           logical_id=(vgnames[0], names[0]),
8311                           params=data_params)
8312   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8313                           logical_id=(vgnames[1], names[1]),
8314                           params=meta_params)
8315   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8316                           logical_id=(primary, secondary, port,
8317                                       p_minor, s_minor,
8318                                       shared_secret),
8319                           children=[dev_data, dev_meta],
8320                           iv_name=iv_name, params=drbd_params)
8321   return drbd_dev
8322
8323
8324 def _GenerateDiskTemplate(lu, template_name,
8325                           instance_name, primary_node,
8326                           secondary_nodes, disk_info,
8327                           file_storage_dir, file_driver,
8328                           base_index, feedback_fn, disk_params):
8329   """Generate the entire disk layout for a given template type.
8330
8331   """
8332   #TODO: compute space requirements
8333
8334   vgname = lu.cfg.GetVGName()
8335   disk_count = len(disk_info)
8336   disks = []
8337   ld_params = _ComputeLDParams(template_name, disk_params)
8338   if template_name == constants.DT_DISKLESS:
8339     pass
8340   elif template_name == constants.DT_PLAIN:
8341     if len(secondary_nodes) != 0:
8342       raise errors.ProgrammerError("Wrong template configuration")
8343
8344     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8345                                       for i in range(disk_count)])
8346     for idx, disk in enumerate(disk_info):
8347       disk_index = idx + base_index
8348       vg = disk.get(constants.IDISK_VG, vgname)
8349       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8350       disk_dev = objects.Disk(dev_type=constants.LD_LV,
8351                               size=disk[constants.IDISK_SIZE],
8352                               logical_id=(vg, names[idx]),
8353                               iv_name="disk/%d" % disk_index,
8354                               mode=disk[constants.IDISK_MODE],
8355                               params=ld_params[0])
8356       disks.append(disk_dev)
8357   elif template_name == constants.DT_DRBD8:
8358     drbd_params, data_params, meta_params = ld_params
8359     if len(secondary_nodes) != 1:
8360       raise errors.ProgrammerError("Wrong template configuration")
8361     remote_node = secondary_nodes[0]
8362     minors = lu.cfg.AllocateDRBDMinor(
8363       [primary_node, remote_node] * len(disk_info), instance_name)
8364
8365     names = []
8366     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8367                                                for i in range(disk_count)]):
8368       names.append(lv_prefix + "_data")
8369       names.append(lv_prefix + "_meta")
8370     for idx, disk in enumerate(disk_info):
8371       disk_index = idx + base_index
8372       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8373       data_vg = disk.get(constants.IDISK_VG, vgname)
8374       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8375       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8376                                       disk[constants.IDISK_SIZE],
8377                                       [data_vg, meta_vg],
8378                                       names[idx * 2:idx * 2 + 2],
8379                                       "disk/%d" % disk_index,
8380                                       minors[idx * 2], minors[idx * 2 + 1],
8381                                       drbd_params, data_params, meta_params)
8382       disk_dev.mode = disk[constants.IDISK_MODE]
8383       disks.append(disk_dev)
8384   elif template_name == constants.DT_FILE:
8385     if len(secondary_nodes) != 0:
8386       raise errors.ProgrammerError("Wrong template configuration")
8387
8388     opcodes.RequireFileStorage()
8389
8390     for idx, disk in enumerate(disk_info):
8391       disk_index = idx + base_index
8392       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8393                               size=disk[constants.IDISK_SIZE],
8394                               iv_name="disk/%d" % disk_index,
8395                               logical_id=(file_driver,
8396                                           "%s/disk%d" % (file_storage_dir,
8397                                                          disk_index)),
8398                               mode=disk[constants.IDISK_MODE],
8399                               params=ld_params[0])
8400       disks.append(disk_dev)
8401   elif template_name == constants.DT_SHARED_FILE:
8402     if len(secondary_nodes) != 0:
8403       raise errors.ProgrammerError("Wrong template configuration")
8404
8405     opcodes.RequireSharedFileStorage()
8406
8407     for idx, disk in enumerate(disk_info):
8408       disk_index = idx + base_index
8409       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8410                               size=disk[constants.IDISK_SIZE],
8411                               iv_name="disk/%d" % disk_index,
8412                               logical_id=(file_driver,
8413                                           "%s/disk%d" % (file_storage_dir,
8414                                                          disk_index)),
8415                               mode=disk[constants.IDISK_MODE],
8416                               params=ld_params[0])
8417       disks.append(disk_dev)
8418   elif template_name == constants.DT_BLOCK:
8419     if len(secondary_nodes) != 0:
8420       raise errors.ProgrammerError("Wrong template configuration")
8421
8422     for idx, disk in enumerate(disk_info):
8423       disk_index = idx + base_index
8424       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8425                               size=disk[constants.IDISK_SIZE],
8426                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8427                                           disk[constants.IDISK_ADOPT]),
8428                               iv_name="disk/%d" % disk_index,
8429                               mode=disk[constants.IDISK_MODE],
8430                               params=ld_params[0])
8431       disks.append(disk_dev)
8432
8433   else:
8434     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8435   return disks
8436
8437
8438 def _GetInstanceInfoText(instance):
8439   """Compute that text that should be added to the disk's metadata.
8440
8441   """
8442   return "originstname+%s" % instance.name
8443
8444
8445 def _CalcEta(time_taken, written, total_size):
8446   """Calculates the ETA based on size written and total size.
8447
8448   @param time_taken: The time taken so far
8449   @param written: amount written so far
8450   @param total_size: The total size of data to be written
8451   @return: The remaining time in seconds
8452
8453   """
8454   avg_time = time_taken / float(written)
8455   return (total_size - written) * avg_time
8456
8457
8458 def _WipeDisks(lu, instance):
8459   """Wipes instance disks.
8460
8461   @type lu: L{LogicalUnit}
8462   @param lu: the logical unit on whose behalf we execute
8463   @type instance: L{objects.Instance}
8464   @param instance: the instance whose disks we should create
8465   @return: the success of the wipe
8466
8467   """
8468   node = instance.primary_node
8469
8470   for device in instance.disks:
8471     lu.cfg.SetDiskID(device, node)
8472
8473   logging.info("Pause sync of instance %s disks", instance.name)
8474   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8475
8476   for idx, success in enumerate(result.payload):
8477     if not success:
8478       logging.warn("pause-sync of instance %s for disks %d failed",
8479                    instance.name, idx)
8480
8481   try:
8482     for idx, device in enumerate(instance.disks):
8483       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8484       # MAX_WIPE_CHUNK at max
8485       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8486                             constants.MIN_WIPE_CHUNK_PERCENT)
8487       # we _must_ make this an int, otherwise rounding errors will
8488       # occur
8489       wipe_chunk_size = int(wipe_chunk_size)
8490
8491       lu.LogInfo("* Wiping disk %d", idx)
8492       logging.info("Wiping disk %d for instance %s, node %s using"
8493                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8494
8495       offset = 0
8496       size = device.size
8497       last_output = 0
8498       start_time = time.time()
8499
8500       while offset < size:
8501         wipe_size = min(wipe_chunk_size, size - offset)
8502         logging.debug("Wiping disk %d, offset %s, chunk %s",
8503                       idx, offset, wipe_size)
8504         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8505         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8506                      (idx, offset, wipe_size))
8507         now = time.time()
8508         offset += wipe_size
8509         if now - last_output >= 60:
8510           eta = _CalcEta(now - start_time, offset, size)
8511           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8512                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8513           last_output = now
8514   finally:
8515     logging.info("Resume sync of instance %s disks", instance.name)
8516
8517     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8518
8519     for idx, success in enumerate(result.payload):
8520       if not success:
8521         lu.LogWarning("Resume sync of disk %d failed, please have a"
8522                       " look at the status and troubleshoot the issue", idx)
8523         logging.warn("resume-sync of instance %s for disks %d failed",
8524                      instance.name, idx)
8525
8526
8527 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8528   """Create all disks for an instance.
8529
8530   This abstracts away some work from AddInstance.
8531
8532   @type lu: L{LogicalUnit}
8533   @param lu: the logical unit on whose behalf we execute
8534   @type instance: L{objects.Instance}
8535   @param instance: the instance whose disks we should create
8536   @type to_skip: list
8537   @param to_skip: list of indices to skip
8538   @type target_node: string
8539   @param target_node: if passed, overrides the target node for creation
8540   @rtype: boolean
8541   @return: the success of the creation
8542
8543   """
8544   info = _GetInstanceInfoText(instance)
8545   if target_node is None:
8546     pnode = instance.primary_node
8547     all_nodes = instance.all_nodes
8548   else:
8549     pnode = target_node
8550     all_nodes = [pnode]
8551
8552   if instance.disk_template in constants.DTS_FILEBASED:
8553     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8554     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8555
8556     result.Raise("Failed to create directory '%s' on"
8557                  " node %s" % (file_storage_dir, pnode))
8558
8559   # Note: this needs to be kept in sync with adding of disks in
8560   # LUInstanceSetParams
8561   for idx, device in enumerate(instance.disks):
8562     if to_skip and idx in to_skip:
8563       continue
8564     logging.info("Creating volume %s for instance %s",
8565                  device.iv_name, instance.name)
8566     #HARDCODE
8567     for node in all_nodes:
8568       f_create = node == pnode
8569       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8570
8571
8572 def _RemoveDisks(lu, instance, target_node=None):
8573   """Remove all disks for an instance.
8574
8575   This abstracts away some work from `AddInstance()` and
8576   `RemoveInstance()`. Note that in case some of the devices couldn't
8577   be removed, the removal will continue with the other ones (compare
8578   with `_CreateDisks()`).
8579
8580   @type lu: L{LogicalUnit}
8581   @param lu: the logical unit on whose behalf we execute
8582   @type instance: L{objects.Instance}
8583   @param instance: the instance whose disks we should remove
8584   @type target_node: string
8585   @param target_node: used to override the node on which to remove the disks
8586   @rtype: boolean
8587   @return: the success of the removal
8588
8589   """
8590   logging.info("Removing block devices for instance %s", instance.name)
8591
8592   all_result = True
8593   for device in instance.disks:
8594     if target_node:
8595       edata = [(target_node, device)]
8596     else:
8597       edata = device.ComputeNodeTree(instance.primary_node)
8598     for node, disk in edata:
8599       lu.cfg.SetDiskID(disk, node)
8600       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8601       if msg:
8602         lu.LogWarning("Could not remove block device %s on node %s,"
8603                       " continuing anyway: %s", device.iv_name, node, msg)
8604         all_result = False
8605
8606     # if this is a DRBD disk, return its port to the pool
8607     if device.dev_type in constants.LDS_DRBD:
8608       tcp_port = device.logical_id[2]
8609       lu.cfg.AddTcpUdpPort(tcp_port)
8610
8611   if instance.disk_template == constants.DT_FILE:
8612     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8613     if target_node:
8614       tgt = target_node
8615     else:
8616       tgt = instance.primary_node
8617     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8618     if result.fail_msg:
8619       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8620                     file_storage_dir, instance.primary_node, result.fail_msg)
8621       all_result = False
8622
8623   return all_result
8624
8625
8626 def _ComputeDiskSizePerVG(disk_template, disks):
8627   """Compute disk size requirements in the volume group
8628
8629   """
8630   def _compute(disks, payload):
8631     """Universal algorithm.
8632
8633     """
8634     vgs = {}
8635     for disk in disks:
8636       vgs[disk[constants.IDISK_VG]] = \
8637         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8638
8639     return vgs
8640
8641   # Required free disk space as a function of disk and swap space
8642   req_size_dict = {
8643     constants.DT_DISKLESS: {},
8644     constants.DT_PLAIN: _compute(disks, 0),
8645     # 128 MB are added for drbd metadata for each disk
8646     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8647     constants.DT_FILE: {},
8648     constants.DT_SHARED_FILE: {},
8649   }
8650
8651   if disk_template not in req_size_dict:
8652     raise errors.ProgrammerError("Disk template '%s' size requirement"
8653                                  " is unknown" % disk_template)
8654
8655   return req_size_dict[disk_template]
8656
8657
8658 def _ComputeDiskSize(disk_template, disks):
8659   """Compute disk size requirements in the volume group
8660
8661   """
8662   # Required free disk space as a function of disk and swap space
8663   req_size_dict = {
8664     constants.DT_DISKLESS: None,
8665     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8666     # 128 MB are added for drbd metadata for each disk
8667     constants.DT_DRBD8:
8668       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8669     constants.DT_FILE: None,
8670     constants.DT_SHARED_FILE: 0,
8671     constants.DT_BLOCK: 0,
8672   }
8673
8674   if disk_template not in req_size_dict:
8675     raise errors.ProgrammerError("Disk template '%s' size requirement"
8676                                  " is unknown" % disk_template)
8677
8678   return req_size_dict[disk_template]
8679
8680
8681 def _FilterVmNodes(lu, nodenames):
8682   """Filters out non-vm_capable nodes from a list.
8683
8684   @type lu: L{LogicalUnit}
8685   @param lu: the logical unit for which we check
8686   @type nodenames: list
8687   @param nodenames: the list of nodes on which we should check
8688   @rtype: list
8689   @return: the list of vm-capable nodes
8690
8691   """
8692   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8693   return [name for name in nodenames if name not in vm_nodes]
8694
8695
8696 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8697   """Hypervisor parameter validation.
8698
8699   This function abstract the hypervisor parameter validation to be
8700   used in both instance create and instance modify.
8701
8702   @type lu: L{LogicalUnit}
8703   @param lu: the logical unit for which we check
8704   @type nodenames: list
8705   @param nodenames: the list of nodes on which we should check
8706   @type hvname: string
8707   @param hvname: the name of the hypervisor we should use
8708   @type hvparams: dict
8709   @param hvparams: the parameters which we need to check
8710   @raise errors.OpPrereqError: if the parameters are not valid
8711
8712   """
8713   nodenames = _FilterVmNodes(lu, nodenames)
8714
8715   cluster = lu.cfg.GetClusterInfo()
8716   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8717
8718   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8719   for node in nodenames:
8720     info = hvinfo[node]
8721     if info.offline:
8722       continue
8723     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8724
8725
8726 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8727   """OS parameters validation.
8728
8729   @type lu: L{LogicalUnit}
8730   @param lu: the logical unit for which we check
8731   @type required: boolean
8732   @param required: whether the validation should fail if the OS is not
8733       found
8734   @type nodenames: list
8735   @param nodenames: the list of nodes on which we should check
8736   @type osname: string
8737   @param osname: the name of the hypervisor we should use
8738   @type osparams: dict
8739   @param osparams: the parameters which we need to check
8740   @raise errors.OpPrereqError: if the parameters are not valid
8741
8742   """
8743   nodenames = _FilterVmNodes(lu, nodenames)
8744   result = lu.rpc.call_os_validate(nodenames, required, osname,
8745                                    [constants.OS_VALIDATE_PARAMETERS],
8746                                    osparams)
8747   for node, nres in result.items():
8748     # we don't check for offline cases since this should be run only
8749     # against the master node and/or an instance's nodes
8750     nres.Raise("OS Parameters validation failed on node %s" % node)
8751     if not nres.payload:
8752       lu.LogInfo("OS %s not found on node %s, validation skipped",
8753                  osname, node)
8754
8755
8756 class LUInstanceCreate(LogicalUnit):
8757   """Create an instance.
8758
8759   """
8760   HPATH = "instance-add"
8761   HTYPE = constants.HTYPE_INSTANCE
8762   REQ_BGL = False
8763
8764   def CheckArguments(self):
8765     """Check arguments.
8766
8767     """
8768     # do not require name_check to ease forward/backward compatibility
8769     # for tools
8770     if self.op.no_install and self.op.start:
8771       self.LogInfo("No-installation mode selected, disabling startup")
8772       self.op.start = False
8773     # validate/normalize the instance name
8774     self.op.instance_name = \
8775       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8776
8777     if self.op.ip_check and not self.op.name_check:
8778       # TODO: make the ip check more flexible and not depend on the name check
8779       raise errors.OpPrereqError("Cannot do IP address check without a name"
8780                                  " check", errors.ECODE_INVAL)
8781
8782     # check nics' parameter names
8783     for nic in self.op.nics:
8784       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8785
8786     # check disks. parameter names and consistent adopt/no-adopt strategy
8787     has_adopt = has_no_adopt = False
8788     for disk in self.op.disks:
8789       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8790       if constants.IDISK_ADOPT in disk:
8791         has_adopt = True
8792       else:
8793         has_no_adopt = True
8794     if has_adopt and has_no_adopt:
8795       raise errors.OpPrereqError("Either all disks are adopted or none is",
8796                                  errors.ECODE_INVAL)
8797     if has_adopt:
8798       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8799         raise errors.OpPrereqError("Disk adoption is not supported for the"
8800                                    " '%s' disk template" %
8801                                    self.op.disk_template,
8802                                    errors.ECODE_INVAL)
8803       if self.op.iallocator is not None:
8804         raise errors.OpPrereqError("Disk adoption not allowed with an"
8805                                    " iallocator script", errors.ECODE_INVAL)
8806       if self.op.mode == constants.INSTANCE_IMPORT:
8807         raise errors.OpPrereqError("Disk adoption not allowed for"
8808                                    " instance import", errors.ECODE_INVAL)
8809     else:
8810       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8811         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8812                                    " but no 'adopt' parameter given" %
8813                                    self.op.disk_template,
8814                                    errors.ECODE_INVAL)
8815
8816     self.adopt_disks = has_adopt
8817
8818     # instance name verification
8819     if self.op.name_check:
8820       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8821       self.op.instance_name = self.hostname1.name
8822       # used in CheckPrereq for ip ping check
8823       self.check_ip = self.hostname1.ip
8824     else:
8825       self.check_ip = None
8826
8827     # file storage checks
8828     if (self.op.file_driver and
8829         not self.op.file_driver in constants.FILE_DRIVER):
8830       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8831                                  self.op.file_driver, errors.ECODE_INVAL)
8832
8833     if self.op.disk_template == constants.DT_FILE:
8834       opcodes.RequireFileStorage()
8835     elif self.op.disk_template == constants.DT_SHARED_FILE:
8836       opcodes.RequireSharedFileStorage()
8837
8838     ### Node/iallocator related checks
8839     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8840
8841     if self.op.pnode is not None:
8842       if self.op.disk_template in constants.DTS_INT_MIRROR:
8843         if self.op.snode is None:
8844           raise errors.OpPrereqError("The networked disk templates need"
8845                                      " a mirror node", errors.ECODE_INVAL)
8846       elif self.op.snode:
8847         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8848                         " template")
8849         self.op.snode = None
8850
8851     self._cds = _GetClusterDomainSecret()
8852
8853     if self.op.mode == constants.INSTANCE_IMPORT:
8854       # On import force_variant must be True, because if we forced it at
8855       # initial install, our only chance when importing it back is that it
8856       # works again!
8857       self.op.force_variant = True
8858
8859       if self.op.no_install:
8860         self.LogInfo("No-installation mode has no effect during import")
8861
8862     elif self.op.mode == constants.INSTANCE_CREATE:
8863       if self.op.os_type is None:
8864         raise errors.OpPrereqError("No guest OS specified",
8865                                    errors.ECODE_INVAL)
8866       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8867         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8868                                    " installation" % self.op.os_type,
8869                                    errors.ECODE_STATE)
8870       if self.op.disk_template is None:
8871         raise errors.OpPrereqError("No disk template specified",
8872                                    errors.ECODE_INVAL)
8873
8874     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8875       # Check handshake to ensure both clusters have the same domain secret
8876       src_handshake = self.op.source_handshake
8877       if not src_handshake:
8878         raise errors.OpPrereqError("Missing source handshake",
8879                                    errors.ECODE_INVAL)
8880
8881       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8882                                                            src_handshake)
8883       if errmsg:
8884         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8885                                    errors.ECODE_INVAL)
8886
8887       # Load and check source CA
8888       self.source_x509_ca_pem = self.op.source_x509_ca
8889       if not self.source_x509_ca_pem:
8890         raise errors.OpPrereqError("Missing source X509 CA",
8891                                    errors.ECODE_INVAL)
8892
8893       try:
8894         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8895                                                     self._cds)
8896       except OpenSSL.crypto.Error, err:
8897         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8898                                    (err, ), errors.ECODE_INVAL)
8899
8900       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8901       if errcode is not None:
8902         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8903                                    errors.ECODE_INVAL)
8904
8905       self.source_x509_ca = cert
8906
8907       src_instance_name = self.op.source_instance_name
8908       if not src_instance_name:
8909         raise errors.OpPrereqError("Missing source instance name",
8910                                    errors.ECODE_INVAL)
8911
8912       self.source_instance_name = \
8913           netutils.GetHostname(name=src_instance_name).name
8914
8915     else:
8916       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8917                                  self.op.mode, errors.ECODE_INVAL)
8918
8919   def ExpandNames(self):
8920     """ExpandNames for CreateInstance.
8921
8922     Figure out the right locks for instance creation.
8923
8924     """
8925     self.needed_locks = {}
8926
8927     instance_name = self.op.instance_name
8928     # this is just a preventive check, but someone might still add this
8929     # instance in the meantime, and creation will fail at lock-add time
8930     if instance_name in self.cfg.GetInstanceList():
8931       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8932                                  instance_name, errors.ECODE_EXISTS)
8933
8934     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8935
8936     if self.op.iallocator:
8937       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8938       # specifying a group on instance creation and then selecting nodes from
8939       # that group
8940       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8941       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8942     else:
8943       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8944       nodelist = [self.op.pnode]
8945       if self.op.snode is not None:
8946         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8947         nodelist.append(self.op.snode)
8948       self.needed_locks[locking.LEVEL_NODE] = nodelist
8949       # Lock resources of instance's primary and secondary nodes (copy to
8950       # prevent accidential modification)
8951       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8952
8953     # in case of import lock the source node too
8954     if self.op.mode == constants.INSTANCE_IMPORT:
8955       src_node = self.op.src_node
8956       src_path = self.op.src_path
8957
8958       if src_path is None:
8959         self.op.src_path = src_path = self.op.instance_name
8960
8961       if src_node is None:
8962         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8963         self.op.src_node = None
8964         if os.path.isabs(src_path):
8965           raise errors.OpPrereqError("Importing an instance from a path"
8966                                      " requires a source node option",
8967                                      errors.ECODE_INVAL)
8968       else:
8969         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8970         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8971           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8972         if not os.path.isabs(src_path):
8973           self.op.src_path = src_path = \
8974             utils.PathJoin(constants.EXPORT_DIR, src_path)
8975
8976   def _RunAllocator(self):
8977     """Run the allocator based on input opcode.
8978
8979     """
8980     nics = [n.ToDict() for n in self.nics]
8981     ial = IAllocator(self.cfg, self.rpc,
8982                      mode=constants.IALLOCATOR_MODE_ALLOC,
8983                      name=self.op.instance_name,
8984                      disk_template=self.op.disk_template,
8985                      tags=self.op.tags,
8986                      os=self.op.os_type,
8987                      vcpus=self.be_full[constants.BE_VCPUS],
8988                      memory=self.be_full[constants.BE_MAXMEM],
8989                      disks=self.disks,
8990                      nics=nics,
8991                      hypervisor=self.op.hypervisor,
8992                      )
8993
8994     ial.Run(self.op.iallocator)
8995
8996     if not ial.success:
8997       raise errors.OpPrereqError("Can't compute nodes using"
8998                                  " iallocator '%s': %s" %
8999                                  (self.op.iallocator, ial.info),
9000                                  errors.ECODE_NORES)
9001     if len(ial.result) != ial.required_nodes:
9002       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9003                                  " of nodes (%s), required %s" %
9004                                  (self.op.iallocator, len(ial.result),
9005                                   ial.required_nodes), errors.ECODE_FAULT)
9006     self.op.pnode = ial.result[0]
9007     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9008                  self.op.instance_name, self.op.iallocator,
9009                  utils.CommaJoin(ial.result))
9010     if ial.required_nodes == 2:
9011       self.op.snode = ial.result[1]
9012
9013   def BuildHooksEnv(self):
9014     """Build hooks env.
9015
9016     This runs on master, primary and secondary nodes of the instance.
9017
9018     """
9019     env = {
9020       "ADD_MODE": self.op.mode,
9021       }
9022     if self.op.mode == constants.INSTANCE_IMPORT:
9023       env["SRC_NODE"] = self.op.src_node
9024       env["SRC_PATH"] = self.op.src_path
9025       env["SRC_IMAGES"] = self.src_images
9026
9027     env.update(_BuildInstanceHookEnv(
9028       name=self.op.instance_name,
9029       primary_node=self.op.pnode,
9030       secondary_nodes=self.secondaries,
9031       status=self.op.start,
9032       os_type=self.op.os_type,
9033       minmem=self.be_full[constants.BE_MINMEM],
9034       maxmem=self.be_full[constants.BE_MAXMEM],
9035       vcpus=self.be_full[constants.BE_VCPUS],
9036       nics=_NICListToTuple(self, self.nics),
9037       disk_template=self.op.disk_template,
9038       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9039              for d in self.disks],
9040       bep=self.be_full,
9041       hvp=self.hv_full,
9042       hypervisor_name=self.op.hypervisor,
9043       tags=self.op.tags,
9044     ))
9045
9046     return env
9047
9048   def BuildHooksNodes(self):
9049     """Build hooks nodes.
9050
9051     """
9052     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9053     return nl, nl
9054
9055   def _ReadExportInfo(self):
9056     """Reads the export information from disk.
9057
9058     It will override the opcode source node and path with the actual
9059     information, if these two were not specified before.
9060
9061     @return: the export information
9062
9063     """
9064     assert self.op.mode == constants.INSTANCE_IMPORT
9065
9066     src_node = self.op.src_node
9067     src_path = self.op.src_path
9068
9069     if src_node is None:
9070       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9071       exp_list = self.rpc.call_export_list(locked_nodes)
9072       found = False
9073       for node in exp_list:
9074         if exp_list[node].fail_msg:
9075           continue
9076         if src_path in exp_list[node].payload:
9077           found = True
9078           self.op.src_node = src_node = node
9079           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9080                                                        src_path)
9081           break
9082       if not found:
9083         raise errors.OpPrereqError("No export found for relative path %s" %
9084                                     src_path, errors.ECODE_INVAL)
9085
9086     _CheckNodeOnline(self, src_node)
9087     result = self.rpc.call_export_info(src_node, src_path)
9088     result.Raise("No export or invalid export found in dir %s" % src_path)
9089
9090     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9091     if not export_info.has_section(constants.INISECT_EXP):
9092       raise errors.ProgrammerError("Corrupted export config",
9093                                    errors.ECODE_ENVIRON)
9094
9095     ei_version = export_info.get(constants.INISECT_EXP, "version")
9096     if (int(ei_version) != constants.EXPORT_VERSION):
9097       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9098                                  (ei_version, constants.EXPORT_VERSION),
9099                                  errors.ECODE_ENVIRON)
9100     return export_info
9101
9102   def _ReadExportParams(self, einfo):
9103     """Use export parameters as defaults.
9104
9105     In case the opcode doesn't specify (as in override) some instance
9106     parameters, then try to use them from the export information, if
9107     that declares them.
9108
9109     """
9110     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9111
9112     if self.op.disk_template is None:
9113       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9114         self.op.disk_template = einfo.get(constants.INISECT_INS,
9115                                           "disk_template")
9116         if self.op.disk_template not in constants.DISK_TEMPLATES:
9117           raise errors.OpPrereqError("Disk template specified in configuration"
9118                                      " file is not one of the allowed values:"
9119                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9120       else:
9121         raise errors.OpPrereqError("No disk template specified and the export"
9122                                    " is missing the disk_template information",
9123                                    errors.ECODE_INVAL)
9124
9125     if not self.op.disks:
9126       disks = []
9127       # TODO: import the disk iv_name too
9128       for idx in range(constants.MAX_DISKS):
9129         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9130           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9131           disks.append({constants.IDISK_SIZE: disk_sz})
9132       self.op.disks = disks
9133       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9134         raise errors.OpPrereqError("No disk info specified and the export"
9135                                    " is missing the disk information",
9136                                    errors.ECODE_INVAL)
9137
9138     if not self.op.nics:
9139       nics = []
9140       for idx in range(constants.MAX_NICS):
9141         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9142           ndict = {}
9143           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9144             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9145             ndict[name] = v
9146           nics.append(ndict)
9147         else:
9148           break
9149       self.op.nics = nics
9150
9151     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9152       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9153
9154     if (self.op.hypervisor is None and
9155         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9156       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9157
9158     if einfo.has_section(constants.INISECT_HYP):
9159       # use the export parameters but do not override the ones
9160       # specified by the user
9161       for name, value in einfo.items(constants.INISECT_HYP):
9162         if name not in self.op.hvparams:
9163           self.op.hvparams[name] = value
9164
9165     if einfo.has_section(constants.INISECT_BEP):
9166       # use the parameters, without overriding
9167       for name, value in einfo.items(constants.INISECT_BEP):
9168         if name not in self.op.beparams:
9169           self.op.beparams[name] = value
9170         # Compatibility for the old "memory" be param
9171         if name == constants.BE_MEMORY:
9172           if constants.BE_MAXMEM not in self.op.beparams:
9173             self.op.beparams[constants.BE_MAXMEM] = value
9174           if constants.BE_MINMEM not in self.op.beparams:
9175             self.op.beparams[constants.BE_MINMEM] = value
9176     else:
9177       # try to read the parameters old style, from the main section
9178       for name in constants.BES_PARAMETERS:
9179         if (name not in self.op.beparams and
9180             einfo.has_option(constants.INISECT_INS, name)):
9181           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9182
9183     if einfo.has_section(constants.INISECT_OSP):
9184       # use the parameters, without overriding
9185       for name, value in einfo.items(constants.INISECT_OSP):
9186         if name not in self.op.osparams:
9187           self.op.osparams[name] = value
9188
9189   def _RevertToDefaults(self, cluster):
9190     """Revert the instance parameters to the default values.
9191
9192     """
9193     # hvparams
9194     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9195     for name in self.op.hvparams.keys():
9196       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9197         del self.op.hvparams[name]
9198     # beparams
9199     be_defs = cluster.SimpleFillBE({})
9200     for name in self.op.beparams.keys():
9201       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9202         del self.op.beparams[name]
9203     # nic params
9204     nic_defs = cluster.SimpleFillNIC({})
9205     for nic in self.op.nics:
9206       for name in constants.NICS_PARAMETERS:
9207         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9208           del nic[name]
9209     # osparams
9210     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9211     for name in self.op.osparams.keys():
9212       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9213         del self.op.osparams[name]
9214
9215   def _CalculateFileStorageDir(self):
9216     """Calculate final instance file storage dir.
9217
9218     """
9219     # file storage dir calculation/check
9220     self.instance_file_storage_dir = None
9221     if self.op.disk_template in constants.DTS_FILEBASED:
9222       # build the full file storage dir path
9223       joinargs = []
9224
9225       if self.op.disk_template == constants.DT_SHARED_FILE:
9226         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9227       else:
9228         get_fsd_fn = self.cfg.GetFileStorageDir
9229
9230       cfg_storagedir = get_fsd_fn()
9231       if not cfg_storagedir:
9232         raise errors.OpPrereqError("Cluster file storage dir not defined")
9233       joinargs.append(cfg_storagedir)
9234
9235       if self.op.file_storage_dir is not None:
9236         joinargs.append(self.op.file_storage_dir)
9237
9238       joinargs.append(self.op.instance_name)
9239
9240       # pylint: disable=W0142
9241       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9242
9243   def CheckPrereq(self):
9244     """Check prerequisites.
9245
9246     """
9247     self._CalculateFileStorageDir()
9248
9249     if self.op.mode == constants.INSTANCE_IMPORT:
9250       export_info = self._ReadExportInfo()
9251       self._ReadExportParams(export_info)
9252
9253     if (not self.cfg.GetVGName() and
9254         self.op.disk_template not in constants.DTS_NOT_LVM):
9255       raise errors.OpPrereqError("Cluster does not support lvm-based"
9256                                  " instances", errors.ECODE_STATE)
9257
9258     if (self.op.hypervisor is None or
9259         self.op.hypervisor == constants.VALUE_AUTO):
9260       self.op.hypervisor = self.cfg.GetHypervisorType()
9261
9262     cluster = self.cfg.GetClusterInfo()
9263     enabled_hvs = cluster.enabled_hypervisors
9264     if self.op.hypervisor not in enabled_hvs:
9265       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9266                                  " cluster (%s)" % (self.op.hypervisor,
9267                                   ",".join(enabled_hvs)),
9268                                  errors.ECODE_STATE)
9269
9270     # Check tag validity
9271     for tag in self.op.tags:
9272       objects.TaggableObject.ValidateTag(tag)
9273
9274     # check hypervisor parameter syntax (locally)
9275     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9276     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9277                                       self.op.hvparams)
9278     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9279     hv_type.CheckParameterSyntax(filled_hvp)
9280     self.hv_full = filled_hvp
9281     # check that we don't specify global parameters on an instance
9282     _CheckGlobalHvParams(self.op.hvparams)
9283
9284     # fill and remember the beparams dict
9285     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9286     for param, value in self.op.beparams.iteritems():
9287       if value == constants.VALUE_AUTO:
9288         self.op.beparams[param] = default_beparams[param]
9289     objects.UpgradeBeParams(self.op.beparams)
9290     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9291     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9292
9293     # build os parameters
9294     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9295
9296     # now that hvp/bep are in final format, let's reset to defaults,
9297     # if told to do so
9298     if self.op.identify_defaults:
9299       self._RevertToDefaults(cluster)
9300
9301     # NIC buildup
9302     self.nics = []
9303     for idx, nic in enumerate(self.op.nics):
9304       nic_mode_req = nic.get(constants.INIC_MODE, None)
9305       nic_mode = nic_mode_req
9306       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9307         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9308
9309       # in routed mode, for the first nic, the default ip is 'auto'
9310       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9311         default_ip_mode = constants.VALUE_AUTO
9312       else:
9313         default_ip_mode = constants.VALUE_NONE
9314
9315       # ip validity checks
9316       ip = nic.get(constants.INIC_IP, default_ip_mode)
9317       if ip is None or ip.lower() == constants.VALUE_NONE:
9318         nic_ip = None
9319       elif ip.lower() == constants.VALUE_AUTO:
9320         if not self.op.name_check:
9321           raise errors.OpPrereqError("IP address set to auto but name checks"
9322                                      " have been skipped",
9323                                      errors.ECODE_INVAL)
9324         nic_ip = self.hostname1.ip
9325       else:
9326         if not netutils.IPAddress.IsValid(ip):
9327           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9328                                      errors.ECODE_INVAL)
9329         nic_ip = ip
9330
9331       # TODO: check the ip address for uniqueness
9332       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9333         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9334                                    errors.ECODE_INVAL)
9335
9336       # MAC address verification
9337       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9338       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9339         mac = utils.NormalizeAndValidateMac(mac)
9340
9341         try:
9342           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9343         except errors.ReservationError:
9344           raise errors.OpPrereqError("MAC address %s already in use"
9345                                      " in cluster" % mac,
9346                                      errors.ECODE_NOTUNIQUE)
9347
9348       #  Build nic parameters
9349       link = nic.get(constants.INIC_LINK, None)
9350       if link == constants.VALUE_AUTO:
9351         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9352       nicparams = {}
9353       if nic_mode_req:
9354         nicparams[constants.NIC_MODE] = nic_mode
9355       if link:
9356         nicparams[constants.NIC_LINK] = link
9357
9358       check_params = cluster.SimpleFillNIC(nicparams)
9359       objects.NIC.CheckParameterSyntax(check_params)
9360       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9361
9362     # disk checks/pre-build
9363     default_vg = self.cfg.GetVGName()
9364     self.disks = []
9365     for disk in self.op.disks:
9366       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9367       if mode not in constants.DISK_ACCESS_SET:
9368         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9369                                    mode, errors.ECODE_INVAL)
9370       size = disk.get(constants.IDISK_SIZE, None)
9371       if size is None:
9372         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9373       try:
9374         size = int(size)
9375       except (TypeError, ValueError):
9376         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9377                                    errors.ECODE_INVAL)
9378
9379       data_vg = disk.get(constants.IDISK_VG, default_vg)
9380       new_disk = {
9381         constants.IDISK_SIZE: size,
9382         constants.IDISK_MODE: mode,
9383         constants.IDISK_VG: data_vg,
9384         }
9385       if constants.IDISK_METAVG in disk:
9386         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9387       if constants.IDISK_ADOPT in disk:
9388         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9389       self.disks.append(new_disk)
9390
9391     if self.op.mode == constants.INSTANCE_IMPORT:
9392       disk_images = []
9393       for idx in range(len(self.disks)):
9394         option = "disk%d_dump" % idx
9395         if export_info.has_option(constants.INISECT_INS, option):
9396           # FIXME: are the old os-es, disk sizes, etc. useful?
9397           export_name = export_info.get(constants.INISECT_INS, option)
9398           image = utils.PathJoin(self.op.src_path, export_name)
9399           disk_images.append(image)
9400         else:
9401           disk_images.append(False)
9402
9403       self.src_images = disk_images
9404
9405       old_name = export_info.get(constants.INISECT_INS, "name")
9406       if self.op.instance_name == old_name:
9407         for idx, nic in enumerate(self.nics):
9408           if nic.mac == constants.VALUE_AUTO:
9409             nic_mac_ini = "nic%d_mac" % idx
9410             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9411
9412     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9413
9414     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9415     if self.op.ip_check:
9416       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9417         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9418                                    (self.check_ip, self.op.instance_name),
9419                                    errors.ECODE_NOTUNIQUE)
9420
9421     #### mac address generation
9422     # By generating here the mac address both the allocator and the hooks get
9423     # the real final mac address rather than the 'auto' or 'generate' value.
9424     # There is a race condition between the generation and the instance object
9425     # creation, which means that we know the mac is valid now, but we're not
9426     # sure it will be when we actually add the instance. If things go bad
9427     # adding the instance will abort because of a duplicate mac, and the
9428     # creation job will fail.
9429     for nic in self.nics:
9430       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9431         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9432
9433     #### allocator run
9434
9435     if self.op.iallocator is not None:
9436       self._RunAllocator()
9437
9438     # Release all unneeded node locks
9439     _ReleaseLocks(self, locking.LEVEL_NODE,
9440                   keep=filter(None, [self.op.pnode, self.op.snode,
9441                                      self.op.src_node]))
9442
9443     #### node related checks
9444
9445     # check primary node
9446     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9447     assert self.pnode is not None, \
9448       "Cannot retrieve locked node %s" % self.op.pnode
9449     if pnode.offline:
9450       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9451                                  pnode.name, errors.ECODE_STATE)
9452     if pnode.drained:
9453       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9454                                  pnode.name, errors.ECODE_STATE)
9455     if not pnode.vm_capable:
9456       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9457                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9458
9459     self.secondaries = []
9460
9461     # mirror node verification
9462     if self.op.disk_template in constants.DTS_INT_MIRROR:
9463       if self.op.snode == pnode.name:
9464         raise errors.OpPrereqError("The secondary node cannot be the"
9465                                    " primary node", errors.ECODE_INVAL)
9466       _CheckNodeOnline(self, self.op.snode)
9467       _CheckNodeNotDrained(self, self.op.snode)
9468       _CheckNodeVmCapable(self, self.op.snode)
9469       self.secondaries.append(self.op.snode)
9470
9471       snode = self.cfg.GetNodeInfo(self.op.snode)
9472       if pnode.group != snode.group:
9473         self.LogWarning("The primary and secondary nodes are in two"
9474                         " different node groups; the disk parameters"
9475                         " from the first disk's node group will be"
9476                         " used")
9477
9478     nodenames = [pnode.name] + self.secondaries
9479
9480     # disk parameters (not customizable at instance or node level)
9481     # just use the primary node parameters, ignoring the secondary.
9482     self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9483
9484     if not self.adopt_disks:
9485       # Check lv size requirements, if not adopting
9486       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9487       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9488
9489     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9490       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9491                                 disk[constants.IDISK_ADOPT])
9492                      for disk in self.disks])
9493       if len(all_lvs) != len(self.disks):
9494         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9495                                    errors.ECODE_INVAL)
9496       for lv_name in all_lvs:
9497         try:
9498           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9499           # to ReserveLV uses the same syntax
9500           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9501         except errors.ReservationError:
9502           raise errors.OpPrereqError("LV named %s used by another instance" %
9503                                      lv_name, errors.ECODE_NOTUNIQUE)
9504
9505       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9506       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9507
9508       node_lvs = self.rpc.call_lv_list([pnode.name],
9509                                        vg_names.payload.keys())[pnode.name]
9510       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9511       node_lvs = node_lvs.payload
9512
9513       delta = all_lvs.difference(node_lvs.keys())
9514       if delta:
9515         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9516                                    utils.CommaJoin(delta),
9517                                    errors.ECODE_INVAL)
9518       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9519       if online_lvs:
9520         raise errors.OpPrereqError("Online logical volumes found, cannot"
9521                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9522                                    errors.ECODE_STATE)
9523       # update the size of disk based on what is found
9524       for dsk in self.disks:
9525         dsk[constants.IDISK_SIZE] = \
9526           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9527                                         dsk[constants.IDISK_ADOPT])][0]))
9528
9529     elif self.op.disk_template == constants.DT_BLOCK:
9530       # Normalize and de-duplicate device paths
9531       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9532                        for disk in self.disks])
9533       if len(all_disks) != len(self.disks):
9534         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9535                                    errors.ECODE_INVAL)
9536       baddisks = [d for d in all_disks
9537                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9538       if baddisks:
9539         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9540                                    " cannot be adopted" %
9541                                    (", ".join(baddisks),
9542                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9543                                    errors.ECODE_INVAL)
9544
9545       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9546                                             list(all_disks))[pnode.name]
9547       node_disks.Raise("Cannot get block device information from node %s" %
9548                        pnode.name)
9549       node_disks = node_disks.payload
9550       delta = all_disks.difference(node_disks.keys())
9551       if delta:
9552         raise errors.OpPrereqError("Missing block device(s): %s" %
9553                                    utils.CommaJoin(delta),
9554                                    errors.ECODE_INVAL)
9555       for dsk in self.disks:
9556         dsk[constants.IDISK_SIZE] = \
9557           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9558
9559     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9560
9561     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9562     # check OS parameters (remotely)
9563     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9564
9565     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9566
9567     # memory check on primary node
9568     #TODO(dynmem): use MINMEM for checking
9569     if self.op.start:
9570       _CheckNodeFreeMemory(self, self.pnode.name,
9571                            "creating instance %s" % self.op.instance_name,
9572                            self.be_full[constants.BE_MAXMEM],
9573                            self.op.hypervisor)
9574
9575     self.dry_run_result = list(nodenames)
9576
9577   def Exec(self, feedback_fn):
9578     """Create and add the instance to the cluster.
9579
9580     """
9581     instance = self.op.instance_name
9582     pnode_name = self.pnode.name
9583
9584     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9585                 self.owned_locks(locking.LEVEL_NODE)), \
9586       "Node locks differ from node resource locks"
9587
9588     ht_kind = self.op.hypervisor
9589     if ht_kind in constants.HTS_REQ_PORT:
9590       network_port = self.cfg.AllocatePort()
9591     else:
9592       network_port = None
9593
9594     disks = _GenerateDiskTemplate(self,
9595                                   self.op.disk_template,
9596                                   instance, pnode_name,
9597                                   self.secondaries,
9598                                   self.disks,
9599                                   self.instance_file_storage_dir,
9600                                   self.op.file_driver,
9601                                   0,
9602                                   feedback_fn,
9603                                   self.diskparams)
9604
9605     iobj = objects.Instance(name=instance, os=self.op.os_type,
9606                             primary_node=pnode_name,
9607                             nics=self.nics, disks=disks,
9608                             disk_template=self.op.disk_template,
9609                             admin_state=constants.ADMINST_DOWN,
9610                             network_port=network_port,
9611                             beparams=self.op.beparams,
9612                             hvparams=self.op.hvparams,
9613                             hypervisor=self.op.hypervisor,
9614                             osparams=self.op.osparams,
9615                             )
9616
9617     if self.op.tags:
9618       for tag in self.op.tags:
9619         iobj.AddTag(tag)
9620
9621     if self.adopt_disks:
9622       if self.op.disk_template == constants.DT_PLAIN:
9623         # rename LVs to the newly-generated names; we need to construct
9624         # 'fake' LV disks with the old data, plus the new unique_id
9625         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9626         rename_to = []
9627         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9628           rename_to.append(t_dsk.logical_id)
9629           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9630           self.cfg.SetDiskID(t_dsk, pnode_name)
9631         result = self.rpc.call_blockdev_rename(pnode_name,
9632                                                zip(tmp_disks, rename_to))
9633         result.Raise("Failed to rename adoped LVs")
9634     else:
9635       feedback_fn("* creating instance disks...")
9636       try:
9637         _CreateDisks(self, iobj)
9638       except errors.OpExecError:
9639         self.LogWarning("Device creation failed, reverting...")
9640         try:
9641           _RemoveDisks(self, iobj)
9642         finally:
9643           self.cfg.ReleaseDRBDMinors(instance)
9644           raise
9645
9646     feedback_fn("adding instance %s to cluster config" % instance)
9647
9648     self.cfg.AddInstance(iobj, self.proc.GetECId())
9649
9650     # Declare that we don't want to remove the instance lock anymore, as we've
9651     # added the instance to the config
9652     del self.remove_locks[locking.LEVEL_INSTANCE]
9653
9654     if self.op.mode == constants.INSTANCE_IMPORT:
9655       # Release unused nodes
9656       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9657     else:
9658       # Release all nodes
9659       _ReleaseLocks(self, locking.LEVEL_NODE)
9660
9661     disk_abort = False
9662     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9663       feedback_fn("* wiping instance disks...")
9664       try:
9665         _WipeDisks(self, iobj)
9666       except errors.OpExecError, err:
9667         logging.exception("Wiping disks failed")
9668         self.LogWarning("Wiping instance disks failed (%s)", err)
9669         disk_abort = True
9670
9671     if disk_abort:
9672       # Something is already wrong with the disks, don't do anything else
9673       pass
9674     elif self.op.wait_for_sync:
9675       disk_abort = not _WaitForSync(self, iobj)
9676     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9677       # make sure the disks are not degraded (still sync-ing is ok)
9678       feedback_fn("* checking mirrors status")
9679       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9680     else:
9681       disk_abort = False
9682
9683     if disk_abort:
9684       _RemoveDisks(self, iobj)
9685       self.cfg.RemoveInstance(iobj.name)
9686       # Make sure the instance lock gets removed
9687       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9688       raise errors.OpExecError("There are some degraded disks for"
9689                                " this instance")
9690
9691     # Release all node resource locks
9692     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9693
9694     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9695       if self.op.mode == constants.INSTANCE_CREATE:
9696         if not self.op.no_install:
9697           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9698                         not self.op.wait_for_sync)
9699           if pause_sync:
9700             feedback_fn("* pausing disk sync to install instance OS")
9701             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9702                                                               iobj.disks, True)
9703             for idx, success in enumerate(result.payload):
9704               if not success:
9705                 logging.warn("pause-sync of instance %s for disk %d failed",
9706                              instance, idx)
9707
9708           feedback_fn("* running the instance OS create scripts...")
9709           # FIXME: pass debug option from opcode to backend
9710           os_add_result = \
9711             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9712                                           self.op.debug_level)
9713           if pause_sync:
9714             feedback_fn("* resuming disk sync")
9715             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9716                                                               iobj.disks, False)
9717             for idx, success in enumerate(result.payload):
9718               if not success:
9719                 logging.warn("resume-sync of instance %s for disk %d failed",
9720                              instance, idx)
9721
9722           os_add_result.Raise("Could not add os for instance %s"
9723                               " on node %s" % (instance, pnode_name))
9724
9725       elif self.op.mode == constants.INSTANCE_IMPORT:
9726         feedback_fn("* running the instance OS import scripts...")
9727
9728         transfers = []
9729
9730         for idx, image in enumerate(self.src_images):
9731           if not image:
9732             continue
9733
9734           # FIXME: pass debug option from opcode to backend
9735           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9736                                              constants.IEIO_FILE, (image, ),
9737                                              constants.IEIO_SCRIPT,
9738                                              (iobj.disks[idx], idx),
9739                                              None)
9740           transfers.append(dt)
9741
9742         import_result = \
9743           masterd.instance.TransferInstanceData(self, feedback_fn,
9744                                                 self.op.src_node, pnode_name,
9745                                                 self.pnode.secondary_ip,
9746                                                 iobj, transfers)
9747         if not compat.all(import_result):
9748           self.LogWarning("Some disks for instance %s on node %s were not"
9749                           " imported successfully" % (instance, pnode_name))
9750
9751       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9752         feedback_fn("* preparing remote import...")
9753         # The source cluster will stop the instance before attempting to make a
9754         # connection. In some cases stopping an instance can take a long time,
9755         # hence the shutdown timeout is added to the connection timeout.
9756         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9757                            self.op.source_shutdown_timeout)
9758         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9759
9760         assert iobj.primary_node == self.pnode.name
9761         disk_results = \
9762           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9763                                         self.source_x509_ca,
9764                                         self._cds, timeouts)
9765         if not compat.all(disk_results):
9766           # TODO: Should the instance still be started, even if some disks
9767           # failed to import (valid for local imports, too)?
9768           self.LogWarning("Some disks for instance %s on node %s were not"
9769                           " imported successfully" % (instance, pnode_name))
9770
9771         # Run rename script on newly imported instance
9772         assert iobj.name == instance
9773         feedback_fn("Running rename script for %s" % instance)
9774         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9775                                                    self.source_instance_name,
9776                                                    self.op.debug_level)
9777         if result.fail_msg:
9778           self.LogWarning("Failed to run rename script for %s on node"
9779                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9780
9781       else:
9782         # also checked in the prereq part
9783         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9784                                      % self.op.mode)
9785
9786     assert not self.owned_locks(locking.LEVEL_NODE_RES)
9787
9788     if self.op.start:
9789       iobj.admin_state = constants.ADMINST_UP
9790       self.cfg.Update(iobj, feedback_fn)
9791       logging.info("Starting instance %s on node %s", instance, pnode_name)
9792       feedback_fn("* starting instance...")
9793       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9794                                             False)
9795       result.Raise("Could not start instance")
9796
9797     return list(iobj.all_nodes)
9798
9799
9800 class LUInstanceConsole(NoHooksLU):
9801   """Connect to an instance's console.
9802
9803   This is somewhat special in that it returns the command line that
9804   you need to run on the master node in order to connect to the
9805   console.
9806
9807   """
9808   REQ_BGL = False
9809
9810   def ExpandNames(self):
9811     self.share_locks = _ShareAll()
9812     self._ExpandAndLockInstance()
9813
9814   def CheckPrereq(self):
9815     """Check prerequisites.
9816
9817     This checks that the instance is in the cluster.
9818
9819     """
9820     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9821     assert self.instance is not None, \
9822       "Cannot retrieve locked instance %s" % self.op.instance_name
9823     _CheckNodeOnline(self, self.instance.primary_node)
9824
9825   def Exec(self, feedback_fn):
9826     """Connect to the console of an instance
9827
9828     """
9829     instance = self.instance
9830     node = instance.primary_node
9831
9832     node_insts = self.rpc.call_instance_list([node],
9833                                              [instance.hypervisor])[node]
9834     node_insts.Raise("Can't get node information from %s" % node)
9835
9836     if instance.name not in node_insts.payload:
9837       if instance.admin_state == constants.ADMINST_UP:
9838         state = constants.INSTST_ERRORDOWN
9839       elif instance.admin_state == constants.ADMINST_DOWN:
9840         state = constants.INSTST_ADMINDOWN
9841       else:
9842         state = constants.INSTST_ADMINOFFLINE
9843       raise errors.OpExecError("Instance %s is not running (state %s)" %
9844                                (instance.name, state))
9845
9846     logging.debug("Connecting to console of %s on %s", instance.name, node)
9847
9848     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9849
9850
9851 def _GetInstanceConsole(cluster, instance):
9852   """Returns console information for an instance.
9853
9854   @type cluster: L{objects.Cluster}
9855   @type instance: L{objects.Instance}
9856   @rtype: dict
9857
9858   """
9859   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9860   # beparams and hvparams are passed separately, to avoid editing the
9861   # instance and then saving the defaults in the instance itself.
9862   hvparams = cluster.FillHV(instance)
9863   beparams = cluster.FillBE(instance)
9864   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9865
9866   assert console.instance == instance.name
9867   assert console.Validate()
9868
9869   return console.ToDict()
9870
9871
9872 class LUInstanceReplaceDisks(LogicalUnit):
9873   """Replace the disks of an instance.
9874
9875   """
9876   HPATH = "mirrors-replace"
9877   HTYPE = constants.HTYPE_INSTANCE
9878   REQ_BGL = False
9879
9880   def CheckArguments(self):
9881     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9882                                   self.op.iallocator)
9883
9884   def ExpandNames(self):
9885     self._ExpandAndLockInstance()
9886
9887     assert locking.LEVEL_NODE not in self.needed_locks
9888     assert locking.LEVEL_NODE_RES not in self.needed_locks
9889     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9890
9891     assert self.op.iallocator is None or self.op.remote_node is None, \
9892       "Conflicting options"
9893
9894     if self.op.remote_node is not None:
9895       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9896
9897       # Warning: do not remove the locking of the new secondary here
9898       # unless DRBD8.AddChildren is changed to work in parallel;
9899       # currently it doesn't since parallel invocations of
9900       # FindUnusedMinor will conflict
9901       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9902       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9903     else:
9904       self.needed_locks[locking.LEVEL_NODE] = []
9905       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9906
9907       if self.op.iallocator is not None:
9908         # iallocator will select a new node in the same group
9909         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9910
9911     self.needed_locks[locking.LEVEL_NODE_RES] = []
9912
9913     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9914                                    self.op.iallocator, self.op.remote_node,
9915                                    self.op.disks, False, self.op.early_release)
9916
9917     self.tasklets = [self.replacer]
9918
9919   def DeclareLocks(self, level):
9920     if level == locking.LEVEL_NODEGROUP:
9921       assert self.op.remote_node is None
9922       assert self.op.iallocator is not None
9923       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9924
9925       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9926       # Lock all groups used by instance optimistically; this requires going
9927       # via the node before it's locked, requiring verification later on
9928       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9929         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9930
9931     elif level == locking.LEVEL_NODE:
9932       if self.op.iallocator is not None:
9933         assert self.op.remote_node is None
9934         assert not self.needed_locks[locking.LEVEL_NODE]
9935
9936         # Lock member nodes of all locked groups
9937         self.needed_locks[locking.LEVEL_NODE] = [node_name
9938           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9939           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9940       else:
9941         self._LockInstancesNodes()
9942     elif level == locking.LEVEL_NODE_RES:
9943       # Reuse node locks
9944       self.needed_locks[locking.LEVEL_NODE_RES] = \
9945         self.needed_locks[locking.LEVEL_NODE]
9946
9947   def BuildHooksEnv(self):
9948     """Build hooks env.
9949
9950     This runs on the master, the primary and all the secondaries.
9951
9952     """
9953     instance = self.replacer.instance
9954     env = {
9955       "MODE": self.op.mode,
9956       "NEW_SECONDARY": self.op.remote_node,
9957       "OLD_SECONDARY": instance.secondary_nodes[0],
9958       }
9959     env.update(_BuildInstanceHookEnvByObject(self, instance))
9960     return env
9961
9962   def BuildHooksNodes(self):
9963     """Build hooks nodes.
9964
9965     """
9966     instance = self.replacer.instance
9967     nl = [
9968       self.cfg.GetMasterNode(),
9969       instance.primary_node,
9970       ]
9971     if self.op.remote_node is not None:
9972       nl.append(self.op.remote_node)
9973     return nl, nl
9974
9975   def CheckPrereq(self):
9976     """Check prerequisites.
9977
9978     """
9979     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9980             self.op.iallocator is None)
9981
9982     # Verify if node group locks are still correct
9983     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9984     if owned_groups:
9985       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9986
9987     return LogicalUnit.CheckPrereq(self)
9988
9989
9990 class TLReplaceDisks(Tasklet):
9991   """Replaces disks for an instance.
9992
9993   Note: Locking is not within the scope of this class.
9994
9995   """
9996   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9997                disks, delay_iallocator, early_release):
9998     """Initializes this class.
9999
10000     """
10001     Tasklet.__init__(self, lu)
10002
10003     # Parameters
10004     self.instance_name = instance_name
10005     self.mode = mode
10006     self.iallocator_name = iallocator_name
10007     self.remote_node = remote_node
10008     self.disks = disks
10009     self.delay_iallocator = delay_iallocator
10010     self.early_release = early_release
10011
10012     # Runtime data
10013     self.instance = None
10014     self.new_node = None
10015     self.target_node = None
10016     self.other_node = None
10017     self.remote_node_info = None
10018     self.node_secondary_ip = None
10019
10020   @staticmethod
10021   def CheckArguments(mode, remote_node, iallocator):
10022     """Helper function for users of this class.
10023
10024     """
10025     # check for valid parameter combination
10026     if mode == constants.REPLACE_DISK_CHG:
10027       if remote_node is None and iallocator is None:
10028         raise errors.OpPrereqError("When changing the secondary either an"
10029                                    " iallocator script must be used or the"
10030                                    " new node given", errors.ECODE_INVAL)
10031
10032       if remote_node is not None and iallocator is not None:
10033         raise errors.OpPrereqError("Give either the iallocator or the new"
10034                                    " secondary, not both", errors.ECODE_INVAL)
10035
10036     elif remote_node is not None or iallocator is not None:
10037       # Not replacing the secondary
10038       raise errors.OpPrereqError("The iallocator and new node options can"
10039                                  " only be used when changing the"
10040                                  " secondary node", errors.ECODE_INVAL)
10041
10042   @staticmethod
10043   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10044     """Compute a new secondary node using an IAllocator.
10045
10046     """
10047     ial = IAllocator(lu.cfg, lu.rpc,
10048                      mode=constants.IALLOCATOR_MODE_RELOC,
10049                      name=instance_name,
10050                      relocate_from=list(relocate_from))
10051
10052     ial.Run(iallocator_name)
10053
10054     if not ial.success:
10055       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10056                                  " %s" % (iallocator_name, ial.info),
10057                                  errors.ECODE_NORES)
10058
10059     if len(ial.result) != ial.required_nodes:
10060       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10061                                  " of nodes (%s), required %s" %
10062                                  (iallocator_name,
10063                                   len(ial.result), ial.required_nodes),
10064                                  errors.ECODE_FAULT)
10065
10066     remote_node_name = ial.result[0]
10067
10068     lu.LogInfo("Selected new secondary for instance '%s': %s",
10069                instance_name, remote_node_name)
10070
10071     return remote_node_name
10072
10073   def _FindFaultyDisks(self, node_name):
10074     """Wrapper for L{_FindFaultyInstanceDisks}.
10075
10076     """
10077     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10078                                     node_name, True)
10079
10080   def _CheckDisksActivated(self, instance):
10081     """Checks if the instance disks are activated.
10082
10083     @param instance: The instance to check disks
10084     @return: True if they are activated, False otherwise
10085
10086     """
10087     nodes = instance.all_nodes
10088
10089     for idx, dev in enumerate(instance.disks):
10090       for node in nodes:
10091         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10092         self.cfg.SetDiskID(dev, node)
10093
10094         result = self.rpc.call_blockdev_find(node, dev)
10095
10096         if result.offline:
10097           continue
10098         elif result.fail_msg or not result.payload:
10099           return False
10100
10101     return True
10102
10103   def CheckPrereq(self):
10104     """Check prerequisites.
10105
10106     This checks that the instance is in the cluster.
10107
10108     """
10109     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10110     assert instance is not None, \
10111       "Cannot retrieve locked instance %s" % self.instance_name
10112
10113     if instance.disk_template != constants.DT_DRBD8:
10114       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10115                                  " instances", errors.ECODE_INVAL)
10116
10117     if len(instance.secondary_nodes) != 1:
10118       raise errors.OpPrereqError("The instance has a strange layout,"
10119                                  " expected one secondary but found %d" %
10120                                  len(instance.secondary_nodes),
10121                                  errors.ECODE_FAULT)
10122
10123     if not self.delay_iallocator:
10124       self._CheckPrereq2()
10125
10126   def _CheckPrereq2(self):
10127     """Check prerequisites, second part.
10128
10129     This function should always be part of CheckPrereq. It was separated and is
10130     now called from Exec because during node evacuation iallocator was only
10131     called with an unmodified cluster model, not taking planned changes into
10132     account.
10133
10134     """
10135     instance = self.instance
10136     secondary_node = instance.secondary_nodes[0]
10137
10138     if self.iallocator_name is None:
10139       remote_node = self.remote_node
10140     else:
10141       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10142                                        instance.name, instance.secondary_nodes)
10143
10144     if remote_node is None:
10145       self.remote_node_info = None
10146     else:
10147       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10148              "Remote node '%s' is not locked" % remote_node
10149
10150       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10151       assert self.remote_node_info is not None, \
10152         "Cannot retrieve locked node %s" % remote_node
10153
10154     if remote_node == self.instance.primary_node:
10155       raise errors.OpPrereqError("The specified node is the primary node of"
10156                                  " the instance", errors.ECODE_INVAL)
10157
10158     if remote_node == secondary_node:
10159       raise errors.OpPrereqError("The specified node is already the"
10160                                  " secondary node of the instance",
10161                                  errors.ECODE_INVAL)
10162
10163     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10164                                     constants.REPLACE_DISK_CHG):
10165       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10166                                  errors.ECODE_INVAL)
10167
10168     if self.mode == constants.REPLACE_DISK_AUTO:
10169       if not self._CheckDisksActivated(instance):
10170         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10171                                    " first" % self.instance_name,
10172                                    errors.ECODE_STATE)
10173       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10174       faulty_secondary = self._FindFaultyDisks(secondary_node)
10175
10176       if faulty_primary and faulty_secondary:
10177         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10178                                    " one node and can not be repaired"
10179                                    " automatically" % self.instance_name,
10180                                    errors.ECODE_STATE)
10181
10182       if faulty_primary:
10183         self.disks = faulty_primary
10184         self.target_node = instance.primary_node
10185         self.other_node = secondary_node
10186         check_nodes = [self.target_node, self.other_node]
10187       elif faulty_secondary:
10188         self.disks = faulty_secondary
10189         self.target_node = secondary_node
10190         self.other_node = instance.primary_node
10191         check_nodes = [self.target_node, self.other_node]
10192       else:
10193         self.disks = []
10194         check_nodes = []
10195
10196     else:
10197       # Non-automatic modes
10198       if self.mode == constants.REPLACE_DISK_PRI:
10199         self.target_node = instance.primary_node
10200         self.other_node = secondary_node
10201         check_nodes = [self.target_node, self.other_node]
10202
10203       elif self.mode == constants.REPLACE_DISK_SEC:
10204         self.target_node = secondary_node
10205         self.other_node = instance.primary_node
10206         check_nodes = [self.target_node, self.other_node]
10207
10208       elif self.mode == constants.REPLACE_DISK_CHG:
10209         self.new_node = remote_node
10210         self.other_node = instance.primary_node
10211         self.target_node = secondary_node
10212         check_nodes = [self.new_node, self.other_node]
10213
10214         _CheckNodeNotDrained(self.lu, remote_node)
10215         _CheckNodeVmCapable(self.lu, remote_node)
10216
10217         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10218         assert old_node_info is not None
10219         if old_node_info.offline and not self.early_release:
10220           # doesn't make sense to delay the release
10221           self.early_release = True
10222           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10223                           " early-release mode", secondary_node)
10224
10225       else:
10226         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10227                                      self.mode)
10228
10229       # If not specified all disks should be replaced
10230       if not self.disks:
10231         self.disks = range(len(self.instance.disks))
10232
10233     # TODO: compute disk parameters
10234     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10235     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10236     if primary_node_info.group != secondary_node_info.group:
10237       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10238                       " different node groups; the disk parameters of the"
10239                       " primary node's group will be applied.")
10240
10241     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10242
10243     for node in check_nodes:
10244       _CheckNodeOnline(self.lu, node)
10245
10246     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10247                                                           self.other_node,
10248                                                           self.target_node]
10249                               if node_name is not None)
10250
10251     # Release unneeded node and node resource locks
10252     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10253     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10254
10255     # Release any owned node group
10256     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10257       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10258
10259     # Check whether disks are valid
10260     for disk_idx in self.disks:
10261       instance.FindDisk(disk_idx)
10262
10263     # Get secondary node IP addresses
10264     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10265                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10266
10267   def Exec(self, feedback_fn):
10268     """Execute disk replacement.
10269
10270     This dispatches the disk replacement to the appropriate handler.
10271
10272     """
10273     if self.delay_iallocator:
10274       self._CheckPrereq2()
10275
10276     if __debug__:
10277       # Verify owned locks before starting operation
10278       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10279       assert set(owned_nodes) == set(self.node_secondary_ip), \
10280           ("Incorrect node locks, owning %s, expected %s" %
10281            (owned_nodes, self.node_secondary_ip.keys()))
10282       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10283               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10284
10285       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10286       assert list(owned_instances) == [self.instance_name], \
10287           "Instance '%s' not locked" % self.instance_name
10288
10289       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10290           "Should not own any node group lock at this point"
10291
10292     if not self.disks:
10293       feedback_fn("No disks need replacement")
10294       return
10295
10296     feedback_fn("Replacing disk(s) %s for %s" %
10297                 (utils.CommaJoin(self.disks), self.instance.name))
10298
10299     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10300
10301     # Activate the instance disks if we're replacing them on a down instance
10302     if activate_disks:
10303       _StartInstanceDisks(self.lu, self.instance, True)
10304
10305     try:
10306       # Should we replace the secondary node?
10307       if self.new_node is not None:
10308         fn = self._ExecDrbd8Secondary
10309       else:
10310         fn = self._ExecDrbd8DiskOnly
10311
10312       result = fn(feedback_fn)
10313     finally:
10314       # Deactivate the instance disks if we're replacing them on a
10315       # down instance
10316       if activate_disks:
10317         _SafeShutdownInstanceDisks(self.lu, self.instance)
10318
10319     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10320
10321     if __debug__:
10322       # Verify owned locks
10323       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10324       nodes = frozenset(self.node_secondary_ip)
10325       assert ((self.early_release and not owned_nodes) or
10326               (not self.early_release and not (set(owned_nodes) - nodes))), \
10327         ("Not owning the correct locks, early_release=%s, owned=%r,"
10328          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10329
10330     return result
10331
10332   def _CheckVolumeGroup(self, nodes):
10333     self.lu.LogInfo("Checking volume groups")
10334
10335     vgname = self.cfg.GetVGName()
10336
10337     # Make sure volume group exists on all involved nodes
10338     results = self.rpc.call_vg_list(nodes)
10339     if not results:
10340       raise errors.OpExecError("Can't list volume groups on the nodes")
10341
10342     for node in nodes:
10343       res = results[node]
10344       res.Raise("Error checking node %s" % node)
10345       if vgname not in res.payload:
10346         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10347                                  (vgname, node))
10348
10349   def _CheckDisksExistence(self, nodes):
10350     # Check disk existence
10351     for idx, dev in enumerate(self.instance.disks):
10352       if idx not in self.disks:
10353         continue
10354
10355       for node in nodes:
10356         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10357         self.cfg.SetDiskID(dev, node)
10358
10359         result = self.rpc.call_blockdev_find(node, dev)
10360
10361         msg = result.fail_msg
10362         if msg or not result.payload:
10363           if not msg:
10364             msg = "disk not found"
10365           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10366                                    (idx, node, msg))
10367
10368   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10369     for idx, dev in enumerate(self.instance.disks):
10370       if idx not in self.disks:
10371         continue
10372
10373       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10374                       (idx, node_name))
10375
10376       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10377                                    ldisk=ldisk):
10378         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10379                                  " replace disks for instance %s" %
10380                                  (node_name, self.instance.name))
10381
10382   def _CreateNewStorage(self, node_name):
10383     """Create new storage on the primary or secondary node.
10384
10385     This is only used for same-node replaces, not for changing the
10386     secondary node, hence we don't want to modify the existing disk.
10387
10388     """
10389     iv_names = {}
10390
10391     for idx, dev in enumerate(self.instance.disks):
10392       if idx not in self.disks:
10393         continue
10394
10395       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10396
10397       self.cfg.SetDiskID(dev, node_name)
10398
10399       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10400       names = _GenerateUniqueNames(self.lu, lv_names)
10401
10402       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10403
10404       vg_data = dev.children[0].logical_id[0]
10405       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10406                              logical_id=(vg_data, names[0]), params=data_p)
10407       vg_meta = dev.children[1].logical_id[0]
10408       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10409                              logical_id=(vg_meta, names[1]), params=meta_p)
10410
10411       new_lvs = [lv_data, lv_meta]
10412       old_lvs = [child.Copy() for child in dev.children]
10413       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10414
10415       # we pass force_create=True to force the LVM creation
10416       for new_lv in new_lvs:
10417         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10418                         _GetInstanceInfoText(self.instance), False)
10419
10420     return iv_names
10421
10422   def _CheckDevices(self, node_name, iv_names):
10423     for name, (dev, _, _) in iv_names.iteritems():
10424       self.cfg.SetDiskID(dev, node_name)
10425
10426       result = self.rpc.call_blockdev_find(node_name, dev)
10427
10428       msg = result.fail_msg
10429       if msg or not result.payload:
10430         if not msg:
10431           msg = "disk not found"
10432         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10433                                  (name, msg))
10434
10435       if result.payload.is_degraded:
10436         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10437
10438   def _RemoveOldStorage(self, node_name, iv_names):
10439     for name, (_, old_lvs, _) in iv_names.iteritems():
10440       self.lu.LogInfo("Remove logical volumes for %s" % name)
10441
10442       for lv in old_lvs:
10443         self.cfg.SetDiskID(lv, node_name)
10444
10445         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10446         if msg:
10447           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10448                              hint="remove unused LVs manually")
10449
10450   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10451     """Replace a disk on the primary or secondary for DRBD 8.
10452
10453     The algorithm for replace is quite complicated:
10454
10455       1. for each disk to be replaced:
10456
10457         1. create new LVs on the target node with unique names
10458         1. detach old LVs from the drbd device
10459         1. rename old LVs to name_replaced.<time_t>
10460         1. rename new LVs to old LVs
10461         1. attach the new LVs (with the old names now) to the drbd device
10462
10463       1. wait for sync across all devices
10464
10465       1. for each modified disk:
10466
10467         1. remove old LVs (which have the name name_replaces.<time_t>)
10468
10469     Failures are not very well handled.
10470
10471     """
10472     steps_total = 6
10473
10474     # Step: check device activation
10475     self.lu.LogStep(1, steps_total, "Check device existence")
10476     self._CheckDisksExistence([self.other_node, self.target_node])
10477     self._CheckVolumeGroup([self.target_node, self.other_node])
10478
10479     # Step: check other node consistency
10480     self.lu.LogStep(2, steps_total, "Check peer consistency")
10481     self._CheckDisksConsistency(self.other_node,
10482                                 self.other_node == self.instance.primary_node,
10483                                 False)
10484
10485     # Step: create new storage
10486     self.lu.LogStep(3, steps_total, "Allocate new storage")
10487     iv_names = self._CreateNewStorage(self.target_node)
10488
10489     # Step: for each lv, detach+rename*2+attach
10490     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10491     for dev, old_lvs, new_lvs in iv_names.itervalues():
10492       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10493
10494       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10495                                                      old_lvs)
10496       result.Raise("Can't detach drbd from local storage on node"
10497                    " %s for device %s" % (self.target_node, dev.iv_name))
10498       #dev.children = []
10499       #cfg.Update(instance)
10500
10501       # ok, we created the new LVs, so now we know we have the needed
10502       # storage; as such, we proceed on the target node to rename
10503       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10504       # using the assumption that logical_id == physical_id (which in
10505       # turn is the unique_id on that node)
10506
10507       # FIXME(iustin): use a better name for the replaced LVs
10508       temp_suffix = int(time.time())
10509       ren_fn = lambda d, suff: (d.physical_id[0],
10510                                 d.physical_id[1] + "_replaced-%s" % suff)
10511
10512       # Build the rename list based on what LVs exist on the node
10513       rename_old_to_new = []
10514       for to_ren in old_lvs:
10515         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10516         if not result.fail_msg and result.payload:
10517           # device exists
10518           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10519
10520       self.lu.LogInfo("Renaming the old LVs on the target node")
10521       result = self.rpc.call_blockdev_rename(self.target_node,
10522                                              rename_old_to_new)
10523       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10524
10525       # Now we rename the new LVs to the old LVs
10526       self.lu.LogInfo("Renaming the new LVs on the target node")
10527       rename_new_to_old = [(new, old.physical_id)
10528                            for old, new in zip(old_lvs, new_lvs)]
10529       result = self.rpc.call_blockdev_rename(self.target_node,
10530                                              rename_new_to_old)
10531       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10532
10533       # Intermediate steps of in memory modifications
10534       for old, new in zip(old_lvs, new_lvs):
10535         new.logical_id = old.logical_id
10536         self.cfg.SetDiskID(new, self.target_node)
10537
10538       # We need to modify old_lvs so that removal later removes the
10539       # right LVs, not the newly added ones; note that old_lvs is a
10540       # copy here
10541       for disk in old_lvs:
10542         disk.logical_id = ren_fn(disk, temp_suffix)
10543         self.cfg.SetDiskID(disk, self.target_node)
10544
10545       # Now that the new lvs have the old name, we can add them to the device
10546       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10547       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10548                                                   new_lvs)
10549       msg = result.fail_msg
10550       if msg:
10551         for new_lv in new_lvs:
10552           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10553                                                new_lv).fail_msg
10554           if msg2:
10555             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10556                                hint=("cleanup manually the unused logical"
10557                                      "volumes"))
10558         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10559
10560     cstep = itertools.count(5)
10561
10562     if self.early_release:
10563       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10564       self._RemoveOldStorage(self.target_node, iv_names)
10565       # TODO: Check if releasing locks early still makes sense
10566       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10567     else:
10568       # Release all resource locks except those used by the instance
10569       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10570                     keep=self.node_secondary_ip.keys())
10571
10572     # Release all node locks while waiting for sync
10573     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10574
10575     # TODO: Can the instance lock be downgraded here? Take the optional disk
10576     # shutdown in the caller into consideration.
10577
10578     # Wait for sync
10579     # This can fail as the old devices are degraded and _WaitForSync
10580     # does a combined result over all disks, so we don't check its return value
10581     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10582     _WaitForSync(self.lu, self.instance)
10583
10584     # Check all devices manually
10585     self._CheckDevices(self.instance.primary_node, iv_names)
10586
10587     # Step: remove old storage
10588     if not self.early_release:
10589       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10590       self._RemoveOldStorage(self.target_node, iv_names)
10591
10592   def _ExecDrbd8Secondary(self, feedback_fn):
10593     """Replace the secondary node for DRBD 8.
10594
10595     The algorithm for replace is quite complicated:
10596       - for all disks of the instance:
10597         - create new LVs on the new node with same names
10598         - shutdown the drbd device on the old secondary
10599         - disconnect the drbd network on the primary
10600         - create the drbd device on the new secondary
10601         - network attach the drbd on the primary, using an artifice:
10602           the drbd code for Attach() will connect to the network if it
10603           finds a device which is connected to the good local disks but
10604           not network enabled
10605       - wait for sync across all devices
10606       - remove all disks from the old secondary
10607
10608     Failures are not very well handled.
10609
10610     """
10611     steps_total = 6
10612
10613     pnode = self.instance.primary_node
10614
10615     # Step: check device activation
10616     self.lu.LogStep(1, steps_total, "Check device existence")
10617     self._CheckDisksExistence([self.instance.primary_node])
10618     self._CheckVolumeGroup([self.instance.primary_node])
10619
10620     # Step: check other node consistency
10621     self.lu.LogStep(2, steps_total, "Check peer consistency")
10622     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10623
10624     # Step: create new storage
10625     self.lu.LogStep(3, steps_total, "Allocate new storage")
10626     for idx, dev in enumerate(self.instance.disks):
10627       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10628                       (self.new_node, idx))
10629       # we pass force_create=True to force LVM creation
10630       for new_lv in dev.children:
10631         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10632                         _GetInstanceInfoText(self.instance), False)
10633
10634     # Step 4: dbrd minors and drbd setups changes
10635     # after this, we must manually remove the drbd minors on both the
10636     # error and the success paths
10637     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10638     minors = self.cfg.AllocateDRBDMinor([self.new_node
10639                                          for dev in self.instance.disks],
10640                                         self.instance.name)
10641     logging.debug("Allocated minors %r", minors)
10642
10643     iv_names = {}
10644     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10645       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10646                       (self.new_node, idx))
10647       # create new devices on new_node; note that we create two IDs:
10648       # one without port, so the drbd will be activated without
10649       # networking information on the new node at this stage, and one
10650       # with network, for the latter activation in step 4
10651       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10652       if self.instance.primary_node == o_node1:
10653         p_minor = o_minor1
10654       else:
10655         assert self.instance.primary_node == o_node2, "Three-node instance?"
10656         p_minor = o_minor2
10657
10658       new_alone_id = (self.instance.primary_node, self.new_node, None,
10659                       p_minor, new_minor, o_secret)
10660       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10661                     p_minor, new_minor, o_secret)
10662
10663       iv_names[idx] = (dev, dev.children, new_net_id)
10664       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10665                     new_net_id)
10666       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10667       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10668                               logical_id=new_alone_id,
10669                               children=dev.children,
10670                               size=dev.size,
10671                               params=drbd_params)
10672       try:
10673         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10674                               _GetInstanceInfoText(self.instance), False)
10675       except errors.GenericError:
10676         self.cfg.ReleaseDRBDMinors(self.instance.name)
10677         raise
10678
10679     # We have new devices, shutdown the drbd on the old secondary
10680     for idx, dev in enumerate(self.instance.disks):
10681       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10682       self.cfg.SetDiskID(dev, self.target_node)
10683       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10684       if msg:
10685         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10686                            "node: %s" % (idx, msg),
10687                            hint=("Please cleanup this device manually as"
10688                                  " soon as possible"))
10689
10690     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10691     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10692                                                self.instance.disks)[pnode]
10693
10694     msg = result.fail_msg
10695     if msg:
10696       # detaches didn't succeed (unlikely)
10697       self.cfg.ReleaseDRBDMinors(self.instance.name)
10698       raise errors.OpExecError("Can't detach the disks from the network on"
10699                                " old node: %s" % (msg,))
10700
10701     # if we managed to detach at least one, we update all the disks of
10702     # the instance to point to the new secondary
10703     self.lu.LogInfo("Updating instance configuration")
10704     for dev, _, new_logical_id in iv_names.itervalues():
10705       dev.logical_id = new_logical_id
10706       self.cfg.SetDiskID(dev, self.instance.primary_node)
10707
10708     self.cfg.Update(self.instance, feedback_fn)
10709
10710     # Release all node locks (the configuration has been updated)
10711     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10712
10713     # and now perform the drbd attach
10714     self.lu.LogInfo("Attaching primary drbds to new secondary"
10715                     " (standalone => connected)")
10716     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10717                                             self.new_node],
10718                                            self.node_secondary_ip,
10719                                            self.instance.disks,
10720                                            self.instance.name,
10721                                            False)
10722     for to_node, to_result in result.items():
10723       msg = to_result.fail_msg
10724       if msg:
10725         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10726                            to_node, msg,
10727                            hint=("please do a gnt-instance info to see the"
10728                                  " status of disks"))
10729
10730     cstep = itertools.count(5)
10731
10732     if self.early_release:
10733       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10734       self._RemoveOldStorage(self.target_node, iv_names)
10735       # TODO: Check if releasing locks early still makes sense
10736       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10737     else:
10738       # Release all resource locks except those used by the instance
10739       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10740                     keep=self.node_secondary_ip.keys())
10741
10742     # TODO: Can the instance lock be downgraded here? Take the optional disk
10743     # shutdown in the caller into consideration.
10744
10745     # Wait for sync
10746     # This can fail as the old devices are degraded and _WaitForSync
10747     # does a combined result over all disks, so we don't check its return value
10748     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10749     _WaitForSync(self.lu, self.instance)
10750
10751     # Check all devices manually
10752     self._CheckDevices(self.instance.primary_node, iv_names)
10753
10754     # Step: remove old storage
10755     if not self.early_release:
10756       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10757       self._RemoveOldStorage(self.target_node, iv_names)
10758
10759
10760 class LURepairNodeStorage(NoHooksLU):
10761   """Repairs the volume group on a node.
10762
10763   """
10764   REQ_BGL = False
10765
10766   def CheckArguments(self):
10767     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10768
10769     storage_type = self.op.storage_type
10770
10771     if (constants.SO_FIX_CONSISTENCY not in
10772         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10773       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10774                                  " repaired" % storage_type,
10775                                  errors.ECODE_INVAL)
10776
10777   def ExpandNames(self):
10778     self.needed_locks = {
10779       locking.LEVEL_NODE: [self.op.node_name],
10780       }
10781
10782   def _CheckFaultyDisks(self, instance, node_name):
10783     """Ensure faulty disks abort the opcode or at least warn."""
10784     try:
10785       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10786                                   node_name, True):
10787         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10788                                    " node '%s'" % (instance.name, node_name),
10789                                    errors.ECODE_STATE)
10790     except errors.OpPrereqError, err:
10791       if self.op.ignore_consistency:
10792         self.proc.LogWarning(str(err.args[0]))
10793       else:
10794         raise
10795
10796   def CheckPrereq(self):
10797     """Check prerequisites.
10798
10799     """
10800     # Check whether any instance on this node has faulty disks
10801     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10802       if inst.admin_state != constants.ADMINST_UP:
10803         continue
10804       check_nodes = set(inst.all_nodes)
10805       check_nodes.discard(self.op.node_name)
10806       for inst_node_name in check_nodes:
10807         self._CheckFaultyDisks(inst, inst_node_name)
10808
10809   def Exec(self, feedback_fn):
10810     feedback_fn("Repairing storage unit '%s' on %s ..." %
10811                 (self.op.name, self.op.node_name))
10812
10813     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10814     result = self.rpc.call_storage_execute(self.op.node_name,
10815                                            self.op.storage_type, st_args,
10816                                            self.op.name,
10817                                            constants.SO_FIX_CONSISTENCY)
10818     result.Raise("Failed to repair storage unit '%s' on %s" %
10819                  (self.op.name, self.op.node_name))
10820
10821
10822 class LUNodeEvacuate(NoHooksLU):
10823   """Evacuates instances off a list of nodes.
10824
10825   """
10826   REQ_BGL = False
10827
10828   _MODE2IALLOCATOR = {
10829     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10830     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10831     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10832     }
10833   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10834   assert (frozenset(_MODE2IALLOCATOR.values()) ==
10835           constants.IALLOCATOR_NEVAC_MODES)
10836
10837   def CheckArguments(self):
10838     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10839
10840   def ExpandNames(self):
10841     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10842
10843     if self.op.remote_node is not None:
10844       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10845       assert self.op.remote_node
10846
10847       if self.op.remote_node == self.op.node_name:
10848         raise errors.OpPrereqError("Can not use evacuated node as a new"
10849                                    " secondary node", errors.ECODE_INVAL)
10850
10851       if self.op.mode != constants.NODE_EVAC_SEC:
10852         raise errors.OpPrereqError("Without the use of an iallocator only"
10853                                    " secondary instances can be evacuated",
10854                                    errors.ECODE_INVAL)
10855
10856     # Declare locks
10857     self.share_locks = _ShareAll()
10858     self.needed_locks = {
10859       locking.LEVEL_INSTANCE: [],
10860       locking.LEVEL_NODEGROUP: [],
10861       locking.LEVEL_NODE: [],
10862       }
10863
10864     # Determine nodes (via group) optimistically, needs verification once locks
10865     # have been acquired
10866     self.lock_nodes = self._DetermineNodes()
10867
10868   def _DetermineNodes(self):
10869     """Gets the list of nodes to operate on.
10870
10871     """
10872     if self.op.remote_node is None:
10873       # Iallocator will choose any node(s) in the same group
10874       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10875     else:
10876       group_nodes = frozenset([self.op.remote_node])
10877
10878     # Determine nodes to be locked
10879     return set([self.op.node_name]) | group_nodes
10880
10881   def _DetermineInstances(self):
10882     """Builds list of instances to operate on.
10883
10884     """
10885     assert self.op.mode in constants.NODE_EVAC_MODES
10886
10887     if self.op.mode == constants.NODE_EVAC_PRI:
10888       # Primary instances only
10889       inst_fn = _GetNodePrimaryInstances
10890       assert self.op.remote_node is None, \
10891         "Evacuating primary instances requires iallocator"
10892     elif self.op.mode == constants.NODE_EVAC_SEC:
10893       # Secondary instances only
10894       inst_fn = _GetNodeSecondaryInstances
10895     else:
10896       # All instances
10897       assert self.op.mode == constants.NODE_EVAC_ALL
10898       inst_fn = _GetNodeInstances
10899       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10900       # per instance
10901       raise errors.OpPrereqError("Due to an issue with the iallocator"
10902                                  " interface it is not possible to evacuate"
10903                                  " all instances at once; specify explicitly"
10904                                  " whether to evacuate primary or secondary"
10905                                  " instances",
10906                                  errors.ECODE_INVAL)
10907
10908     return inst_fn(self.cfg, self.op.node_name)
10909
10910   def DeclareLocks(self, level):
10911     if level == locking.LEVEL_INSTANCE:
10912       # Lock instances optimistically, needs verification once node and group
10913       # locks have been acquired
10914       self.needed_locks[locking.LEVEL_INSTANCE] = \
10915         set(i.name for i in self._DetermineInstances())
10916
10917     elif level == locking.LEVEL_NODEGROUP:
10918       # Lock node groups for all potential target nodes optimistically, needs
10919       # verification once nodes have been acquired
10920       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10921         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10922
10923     elif level == locking.LEVEL_NODE:
10924       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10925
10926   def CheckPrereq(self):
10927     # Verify locks
10928     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10929     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10930     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10931
10932     need_nodes = self._DetermineNodes()
10933
10934     if not owned_nodes.issuperset(need_nodes):
10935       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10936                                  " locks were acquired, current nodes are"
10937                                  " are '%s', used to be '%s'; retry the"
10938                                  " operation" %
10939                                  (self.op.node_name,
10940                                   utils.CommaJoin(need_nodes),
10941                                   utils.CommaJoin(owned_nodes)),
10942                                  errors.ECODE_STATE)
10943
10944     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10945     if owned_groups != wanted_groups:
10946       raise errors.OpExecError("Node groups changed since locks were acquired,"
10947                                " current groups are '%s', used to be '%s';"
10948                                " retry the operation" %
10949                                (utils.CommaJoin(wanted_groups),
10950                                 utils.CommaJoin(owned_groups)))
10951
10952     # Determine affected instances
10953     self.instances = self._DetermineInstances()
10954     self.instance_names = [i.name for i in self.instances]
10955
10956     if set(self.instance_names) != owned_instances:
10957       raise errors.OpExecError("Instances on node '%s' changed since locks"
10958                                " were acquired, current instances are '%s',"
10959                                " used to be '%s'; retry the operation" %
10960                                (self.op.node_name,
10961                                 utils.CommaJoin(self.instance_names),
10962                                 utils.CommaJoin(owned_instances)))
10963
10964     if self.instance_names:
10965       self.LogInfo("Evacuating instances from node '%s': %s",
10966                    self.op.node_name,
10967                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10968     else:
10969       self.LogInfo("No instances to evacuate from node '%s'",
10970                    self.op.node_name)
10971
10972     if self.op.remote_node is not None:
10973       for i in self.instances:
10974         if i.primary_node == self.op.remote_node:
10975           raise errors.OpPrereqError("Node %s is the primary node of"
10976                                      " instance %s, cannot use it as"
10977                                      " secondary" %
10978                                      (self.op.remote_node, i.name),
10979                                      errors.ECODE_INVAL)
10980
10981   def Exec(self, feedback_fn):
10982     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10983
10984     if not self.instance_names:
10985       # No instances to evacuate
10986       jobs = []
10987
10988     elif self.op.iallocator is not None:
10989       # TODO: Implement relocation to other group
10990       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10991                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10992                        instances=list(self.instance_names))
10993
10994       ial.Run(self.op.iallocator)
10995
10996       if not ial.success:
10997         raise errors.OpPrereqError("Can't compute node evacuation using"
10998                                    " iallocator '%s': %s" %
10999                                    (self.op.iallocator, ial.info),
11000                                    errors.ECODE_NORES)
11001
11002       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11003
11004     elif self.op.remote_node is not None:
11005       assert self.op.mode == constants.NODE_EVAC_SEC
11006       jobs = [
11007         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11008                                         remote_node=self.op.remote_node,
11009                                         disks=[],
11010                                         mode=constants.REPLACE_DISK_CHG,
11011                                         early_release=self.op.early_release)]
11012         for instance_name in self.instance_names
11013         ]
11014
11015     else:
11016       raise errors.ProgrammerError("No iallocator or remote node")
11017
11018     return ResultWithJobs(jobs)
11019
11020
11021 def _SetOpEarlyRelease(early_release, op):
11022   """Sets C{early_release} flag on opcodes if available.
11023
11024   """
11025   try:
11026     op.early_release = early_release
11027   except AttributeError:
11028     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11029
11030   return op
11031
11032
11033 def _NodeEvacDest(use_nodes, group, nodes):
11034   """Returns group or nodes depending on caller's choice.
11035
11036   """
11037   if use_nodes:
11038     return utils.CommaJoin(nodes)
11039   else:
11040     return group
11041
11042
11043 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11044   """Unpacks the result of change-group and node-evacuate iallocator requests.
11045
11046   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11047   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11048
11049   @type lu: L{LogicalUnit}
11050   @param lu: Logical unit instance
11051   @type alloc_result: tuple/list
11052   @param alloc_result: Result from iallocator
11053   @type early_release: bool
11054   @param early_release: Whether to release locks early if possible
11055   @type use_nodes: bool
11056   @param use_nodes: Whether to display node names instead of groups
11057
11058   """
11059   (moved, failed, jobs) = alloc_result
11060
11061   if failed:
11062     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11063                                  for (name, reason) in failed)
11064     lu.LogWarning("Unable to evacuate instances %s", failreason)
11065     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11066
11067   if moved:
11068     lu.LogInfo("Instances to be moved: %s",
11069                utils.CommaJoin("%s (to %s)" %
11070                                (name, _NodeEvacDest(use_nodes, group, nodes))
11071                                for (name, group, nodes) in moved))
11072
11073   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11074               map(opcodes.OpCode.LoadOpCode, ops))
11075           for ops in jobs]
11076
11077
11078 class LUInstanceGrowDisk(LogicalUnit):
11079   """Grow a disk of an instance.
11080
11081   """
11082   HPATH = "disk-grow"
11083   HTYPE = constants.HTYPE_INSTANCE
11084   REQ_BGL = False
11085
11086   def ExpandNames(self):
11087     self._ExpandAndLockInstance()
11088     self.needed_locks[locking.LEVEL_NODE] = []
11089     self.needed_locks[locking.LEVEL_NODE_RES] = []
11090     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11091
11092   def DeclareLocks(self, level):
11093     if level == locking.LEVEL_NODE:
11094       self._LockInstancesNodes()
11095     elif level == locking.LEVEL_NODE_RES:
11096       # Copy node locks
11097       self.needed_locks[locking.LEVEL_NODE_RES] = \
11098         self.needed_locks[locking.LEVEL_NODE][:]
11099
11100   def BuildHooksEnv(self):
11101     """Build hooks env.
11102
11103     This runs on the master, the primary and all the secondaries.
11104
11105     """
11106     env = {
11107       "DISK": self.op.disk,
11108       "AMOUNT": self.op.amount,
11109       }
11110     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11111     return env
11112
11113   def BuildHooksNodes(self):
11114     """Build hooks nodes.
11115
11116     """
11117     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11118     return (nl, nl)
11119
11120   def CheckPrereq(self):
11121     """Check prerequisites.
11122
11123     This checks that the instance is in the cluster.
11124
11125     """
11126     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11127     assert instance is not None, \
11128       "Cannot retrieve locked instance %s" % self.op.instance_name
11129     nodenames = list(instance.all_nodes)
11130     for node in nodenames:
11131       _CheckNodeOnline(self, node)
11132
11133     self.instance = instance
11134
11135     if instance.disk_template not in constants.DTS_GROWABLE:
11136       raise errors.OpPrereqError("Instance's disk layout does not support"
11137                                  " growing", errors.ECODE_INVAL)
11138
11139     self.disk = instance.FindDisk(self.op.disk)
11140
11141     if instance.disk_template not in (constants.DT_FILE,
11142                                       constants.DT_SHARED_FILE):
11143       # TODO: check the free disk space for file, when that feature will be
11144       # supported
11145       _CheckNodesFreeDiskPerVG(self, nodenames,
11146                                self.disk.ComputeGrowth(self.op.amount))
11147
11148   def Exec(self, feedback_fn):
11149     """Execute disk grow.
11150
11151     """
11152     instance = self.instance
11153     disk = self.disk
11154
11155     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11156     assert (self.owned_locks(locking.LEVEL_NODE) ==
11157             self.owned_locks(locking.LEVEL_NODE_RES))
11158
11159     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11160     if not disks_ok:
11161       raise errors.OpExecError("Cannot activate block device to grow")
11162
11163     feedback_fn("Growing disk %s of instance '%s' by %s" %
11164                 (self.op.disk, instance.name,
11165                  utils.FormatUnit(self.op.amount, "h")))
11166
11167     # First run all grow ops in dry-run mode
11168     for node in instance.all_nodes:
11169       self.cfg.SetDiskID(disk, node)
11170       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11171       result.Raise("Grow request failed to node %s" % node)
11172
11173     # We know that (as far as we can test) operations across different
11174     # nodes will succeed, time to run it for real
11175     for node in instance.all_nodes:
11176       self.cfg.SetDiskID(disk, node)
11177       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11178       result.Raise("Grow request failed to node %s" % node)
11179
11180       # TODO: Rewrite code to work properly
11181       # DRBD goes into sync mode for a short amount of time after executing the
11182       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11183       # calling "resize" in sync mode fails. Sleeping for a short amount of
11184       # time is a work-around.
11185       time.sleep(5)
11186
11187     disk.RecordGrow(self.op.amount)
11188     self.cfg.Update(instance, feedback_fn)
11189
11190     # Changes have been recorded, release node lock
11191     _ReleaseLocks(self, locking.LEVEL_NODE)
11192
11193     # Downgrade lock while waiting for sync
11194     self.glm.downgrade(locking.LEVEL_INSTANCE)
11195
11196     if self.op.wait_for_sync:
11197       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11198       if disk_abort:
11199         self.proc.LogWarning("Disk sync-ing has not returned a good"
11200                              " status; please check the instance")
11201       if instance.admin_state != constants.ADMINST_UP:
11202         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11203     elif instance.admin_state != constants.ADMINST_UP:
11204       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11205                            " not supposed to be running because no wait for"
11206                            " sync mode was requested")
11207
11208     assert self.owned_locks(locking.LEVEL_NODE_RES)
11209     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11210
11211
11212 class LUInstanceQueryData(NoHooksLU):
11213   """Query runtime instance data.
11214
11215   """
11216   REQ_BGL = False
11217
11218   def ExpandNames(self):
11219     self.needed_locks = {}
11220
11221     # Use locking if requested or when non-static information is wanted
11222     if not (self.op.static or self.op.use_locking):
11223       self.LogWarning("Non-static data requested, locks need to be acquired")
11224       self.op.use_locking = True
11225
11226     if self.op.instances or not self.op.use_locking:
11227       # Expand instance names right here
11228       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11229     else:
11230       # Will use acquired locks
11231       self.wanted_names = None
11232
11233     if self.op.use_locking:
11234       self.share_locks = _ShareAll()
11235
11236       if self.wanted_names is None:
11237         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11238       else:
11239         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11240
11241       self.needed_locks[locking.LEVEL_NODE] = []
11242       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11243
11244   def DeclareLocks(self, level):
11245     if self.op.use_locking and level == locking.LEVEL_NODE:
11246       self._LockInstancesNodes()
11247
11248   def CheckPrereq(self):
11249     """Check prerequisites.
11250
11251     This only checks the optional instance list against the existing names.
11252
11253     """
11254     if self.wanted_names is None:
11255       assert self.op.use_locking, "Locking was not used"
11256       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11257
11258     self.wanted_instances = \
11259         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11260
11261   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11262     """Returns the status of a block device
11263
11264     """
11265     if self.op.static or not node:
11266       return None
11267
11268     self.cfg.SetDiskID(dev, node)
11269
11270     result = self.rpc.call_blockdev_find(node, dev)
11271     if result.offline:
11272       return None
11273
11274     result.Raise("Can't compute disk status for %s" % instance_name)
11275
11276     status = result.payload
11277     if status is None:
11278       return None
11279
11280     return (status.dev_path, status.major, status.minor,
11281             status.sync_percent, status.estimated_time,
11282             status.is_degraded, status.ldisk_status)
11283
11284   def _ComputeDiskStatus(self, instance, snode, dev):
11285     """Compute block device status.
11286
11287     """
11288     if dev.dev_type in constants.LDS_DRBD:
11289       # we change the snode then (otherwise we use the one passed in)
11290       if dev.logical_id[0] == instance.primary_node:
11291         snode = dev.logical_id[1]
11292       else:
11293         snode = dev.logical_id[0]
11294
11295     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11296                                               instance.name, dev)
11297     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11298
11299     if dev.children:
11300       dev_children = map(compat.partial(self._ComputeDiskStatus,
11301                                         instance, snode),
11302                          dev.children)
11303     else:
11304       dev_children = []
11305
11306     return {
11307       "iv_name": dev.iv_name,
11308       "dev_type": dev.dev_type,
11309       "logical_id": dev.logical_id,
11310       "physical_id": dev.physical_id,
11311       "pstatus": dev_pstatus,
11312       "sstatus": dev_sstatus,
11313       "children": dev_children,
11314       "mode": dev.mode,
11315       "size": dev.size,
11316       }
11317
11318   def Exec(self, feedback_fn):
11319     """Gather and return data"""
11320     result = {}
11321
11322     cluster = self.cfg.GetClusterInfo()
11323
11324     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11325                                           for i in self.wanted_instances)
11326     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11327       if self.op.static or pnode.offline:
11328         remote_state = None
11329         if pnode.offline:
11330           self.LogWarning("Primary node %s is marked offline, returning static"
11331                           " information only for instance %s" %
11332                           (pnode.name, instance.name))
11333       else:
11334         remote_info = self.rpc.call_instance_info(instance.primary_node,
11335                                                   instance.name,
11336                                                   instance.hypervisor)
11337         remote_info.Raise("Error checking node %s" % instance.primary_node)
11338         remote_info = remote_info.payload
11339         if remote_info and "state" in remote_info:
11340           remote_state = "up"
11341         else:
11342           if instance.admin_state == constants.ADMINST_UP:
11343             remote_state = "down"
11344           else:
11345             remote_state = instance.admin_state
11346
11347       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11348                   instance.disks)
11349
11350       result[instance.name] = {
11351         "name": instance.name,
11352         "config_state": instance.admin_state,
11353         "run_state": remote_state,
11354         "pnode": instance.primary_node,
11355         "snodes": instance.secondary_nodes,
11356         "os": instance.os,
11357         # this happens to be the same format used for hooks
11358         "nics": _NICListToTuple(self, instance.nics),
11359         "disk_template": instance.disk_template,
11360         "disks": disks,
11361         "hypervisor": instance.hypervisor,
11362         "network_port": instance.network_port,
11363         "hv_instance": instance.hvparams,
11364         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11365         "be_instance": instance.beparams,
11366         "be_actual": cluster.FillBE(instance),
11367         "os_instance": instance.osparams,
11368         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11369         "serial_no": instance.serial_no,
11370         "mtime": instance.mtime,
11371         "ctime": instance.ctime,
11372         "uuid": instance.uuid,
11373         }
11374
11375     return result
11376
11377
11378 class LUInstanceSetParams(LogicalUnit):
11379   """Modifies an instances's parameters.
11380
11381   """
11382   HPATH = "instance-modify"
11383   HTYPE = constants.HTYPE_INSTANCE
11384   REQ_BGL = False
11385
11386   def CheckArguments(self):
11387     if not (self.op.nics or self.op.disks or self.op.disk_template or
11388             self.op.hvparams or self.op.beparams or self.op.os_name or
11389             self.op.online_inst or self.op.offline_inst):
11390       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11391
11392     if self.op.hvparams:
11393       _CheckGlobalHvParams(self.op.hvparams)
11394
11395     # Disk validation
11396     disk_addremove = 0
11397     for disk_op, disk_dict in self.op.disks:
11398       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11399       if disk_op == constants.DDM_REMOVE:
11400         disk_addremove += 1
11401         continue
11402       elif disk_op == constants.DDM_ADD:
11403         disk_addremove += 1
11404       else:
11405         if not isinstance(disk_op, int):
11406           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11407         if not isinstance(disk_dict, dict):
11408           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11409           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11410
11411       if disk_op == constants.DDM_ADD:
11412         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11413         if mode not in constants.DISK_ACCESS_SET:
11414           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11415                                      errors.ECODE_INVAL)
11416         size = disk_dict.get(constants.IDISK_SIZE, None)
11417         if size is None:
11418           raise errors.OpPrereqError("Required disk parameter size missing",
11419                                      errors.ECODE_INVAL)
11420         try:
11421           size = int(size)
11422         except (TypeError, ValueError), err:
11423           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11424                                      str(err), errors.ECODE_INVAL)
11425         disk_dict[constants.IDISK_SIZE] = size
11426       else:
11427         # modification of disk
11428         if constants.IDISK_SIZE in disk_dict:
11429           raise errors.OpPrereqError("Disk size change not possible, use"
11430                                      " grow-disk", errors.ECODE_INVAL)
11431
11432     if disk_addremove > 1:
11433       raise errors.OpPrereqError("Only one disk add or remove operation"
11434                                  " supported at a time", errors.ECODE_INVAL)
11435
11436     if self.op.disks and self.op.disk_template is not None:
11437       raise errors.OpPrereqError("Disk template conversion and other disk"
11438                                  " changes not supported at the same time",
11439                                  errors.ECODE_INVAL)
11440
11441     if (self.op.disk_template and
11442         self.op.disk_template in constants.DTS_INT_MIRROR and
11443         self.op.remote_node is None):
11444       raise errors.OpPrereqError("Changing the disk template to a mirrored"
11445                                  " one requires specifying a secondary node",
11446                                  errors.ECODE_INVAL)
11447
11448     # NIC validation
11449     nic_addremove = 0
11450     for nic_op, nic_dict in self.op.nics:
11451       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11452       if nic_op == constants.DDM_REMOVE:
11453         nic_addremove += 1
11454         continue
11455       elif nic_op == constants.DDM_ADD:
11456         nic_addremove += 1
11457       else:
11458         if not isinstance(nic_op, int):
11459           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11460         if not isinstance(nic_dict, dict):
11461           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11462           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11463
11464       # nic_dict should be a dict
11465       nic_ip = nic_dict.get(constants.INIC_IP, None)
11466       if nic_ip is not None:
11467         if nic_ip.lower() == constants.VALUE_NONE:
11468           nic_dict[constants.INIC_IP] = None
11469         else:
11470           if not netutils.IPAddress.IsValid(nic_ip):
11471             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11472                                        errors.ECODE_INVAL)
11473
11474       nic_bridge = nic_dict.get("bridge", None)
11475       nic_link = nic_dict.get(constants.INIC_LINK, None)
11476       if nic_bridge and nic_link:
11477         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11478                                    " at the same time", errors.ECODE_INVAL)
11479       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11480         nic_dict["bridge"] = None
11481       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11482         nic_dict[constants.INIC_LINK] = None
11483
11484       if nic_op == constants.DDM_ADD:
11485         nic_mac = nic_dict.get(constants.INIC_MAC, None)
11486         if nic_mac is None:
11487           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11488
11489       if constants.INIC_MAC in nic_dict:
11490         nic_mac = nic_dict[constants.INIC_MAC]
11491         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11492           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11493
11494         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11495           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11496                                      " modifying an existing nic",
11497                                      errors.ECODE_INVAL)
11498
11499     if nic_addremove > 1:
11500       raise errors.OpPrereqError("Only one NIC add or remove operation"
11501                                  " supported at a time", errors.ECODE_INVAL)
11502
11503   def ExpandNames(self):
11504     self._ExpandAndLockInstance()
11505     # Can't even acquire node locks in shared mode as upcoming changes in
11506     # Ganeti 2.6 will start to modify the node object on disk conversion
11507     self.needed_locks[locking.LEVEL_NODE] = []
11508     self.needed_locks[locking.LEVEL_NODE_RES] = []
11509     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11510
11511   def DeclareLocks(self, level):
11512     if level == locking.LEVEL_NODE:
11513       self._LockInstancesNodes()
11514       if self.op.disk_template and self.op.remote_node:
11515         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11516         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11517     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11518       # Copy node locks
11519       self.needed_locks[locking.LEVEL_NODE_RES] = \
11520         self.needed_locks[locking.LEVEL_NODE][:]
11521
11522   def BuildHooksEnv(self):
11523     """Build hooks env.
11524
11525     This runs on the master, primary and secondaries.
11526
11527     """
11528     args = dict()
11529     if constants.BE_MINMEM in self.be_new:
11530       args["minmem"] = self.be_new[constants.BE_MINMEM]
11531     if constants.BE_MAXMEM in self.be_new:
11532       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11533     if constants.BE_VCPUS in self.be_new:
11534       args["vcpus"] = self.be_new[constants.BE_VCPUS]
11535     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11536     # information at all.
11537     if self.op.nics:
11538       args["nics"] = []
11539       nic_override = dict(self.op.nics)
11540       for idx, nic in enumerate(self.instance.nics):
11541         if idx in nic_override:
11542           this_nic_override = nic_override[idx]
11543         else:
11544           this_nic_override = {}
11545         if constants.INIC_IP in this_nic_override:
11546           ip = this_nic_override[constants.INIC_IP]
11547         else:
11548           ip = nic.ip
11549         if constants.INIC_MAC in this_nic_override:
11550           mac = this_nic_override[constants.INIC_MAC]
11551         else:
11552           mac = nic.mac
11553         if idx in self.nic_pnew:
11554           nicparams = self.nic_pnew[idx]
11555         else:
11556           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11557         mode = nicparams[constants.NIC_MODE]
11558         link = nicparams[constants.NIC_LINK]
11559         args["nics"].append((ip, mac, mode, link))
11560       if constants.DDM_ADD in nic_override:
11561         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11562         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11563         nicparams = self.nic_pnew[constants.DDM_ADD]
11564         mode = nicparams[constants.NIC_MODE]
11565         link = nicparams[constants.NIC_LINK]
11566         args["nics"].append((ip, mac, mode, link))
11567       elif constants.DDM_REMOVE in nic_override:
11568         del args["nics"][-1]
11569
11570     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11571     if self.op.disk_template:
11572       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11573
11574     return env
11575
11576   def BuildHooksNodes(self):
11577     """Build hooks nodes.
11578
11579     """
11580     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11581     return (nl, nl)
11582
11583   def CheckPrereq(self):
11584     """Check prerequisites.
11585
11586     This only checks the instance list against the existing names.
11587
11588     """
11589     # checking the new params on the primary/secondary nodes
11590
11591     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11592     cluster = self.cluster = self.cfg.GetClusterInfo()
11593     assert self.instance is not None, \
11594       "Cannot retrieve locked instance %s" % self.op.instance_name
11595     pnode = instance.primary_node
11596     nodelist = list(instance.all_nodes)
11597     pnode_info = self.cfg.GetNodeInfo(pnode)
11598     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11599
11600     # OS change
11601     if self.op.os_name and not self.op.force:
11602       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11603                       self.op.force_variant)
11604       instance_os = self.op.os_name
11605     else:
11606       instance_os = instance.os
11607
11608     if self.op.disk_template:
11609       if instance.disk_template == self.op.disk_template:
11610         raise errors.OpPrereqError("Instance already has disk template %s" %
11611                                    instance.disk_template, errors.ECODE_INVAL)
11612
11613       if (instance.disk_template,
11614           self.op.disk_template) not in self._DISK_CONVERSIONS:
11615         raise errors.OpPrereqError("Unsupported disk template conversion from"
11616                                    " %s to %s" % (instance.disk_template,
11617                                                   self.op.disk_template),
11618                                    errors.ECODE_INVAL)
11619       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11620                           msg="cannot change disk template")
11621       if self.op.disk_template in constants.DTS_INT_MIRROR:
11622         if self.op.remote_node == pnode:
11623           raise errors.OpPrereqError("Given new secondary node %s is the same"
11624                                      " as the primary node of the instance" %
11625                                      self.op.remote_node, errors.ECODE_STATE)
11626         _CheckNodeOnline(self, self.op.remote_node)
11627         _CheckNodeNotDrained(self, self.op.remote_node)
11628         # FIXME: here we assume that the old instance type is DT_PLAIN
11629         assert instance.disk_template == constants.DT_PLAIN
11630         disks = [{constants.IDISK_SIZE: d.size,
11631                   constants.IDISK_VG: d.logical_id[0]}
11632                  for d in instance.disks]
11633         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11634         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11635
11636         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11637         if pnode_info.group != snode_info.group:
11638           self.LogWarning("The primary and secondary nodes are in two"
11639                           " different node groups; the disk parameters"
11640                           " from the first disk's node group will be"
11641                           " used")
11642
11643     # hvparams processing
11644     if self.op.hvparams:
11645       hv_type = instance.hypervisor
11646       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11647       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11648       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11649
11650       # local check
11651       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11652       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11653       self.hv_proposed = self.hv_new = hv_new # the new actual values
11654       self.hv_inst = i_hvdict # the new dict (without defaults)
11655     else:
11656       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11657                                               instance.hvparams)
11658       self.hv_new = self.hv_inst = {}
11659
11660     # beparams processing
11661     if self.op.beparams:
11662       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11663                                    use_none=True)
11664       objects.UpgradeBeParams(i_bedict)
11665       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11666       be_new = cluster.SimpleFillBE(i_bedict)
11667       self.be_proposed = self.be_new = be_new # the new actual values
11668       self.be_inst = i_bedict # the new dict (without defaults)
11669     else:
11670       self.be_new = self.be_inst = {}
11671       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11672     be_old = cluster.FillBE(instance)
11673
11674     # CPU param validation -- checking every time a paramtere is
11675     # changed to cover all cases where either CPU mask or vcpus have
11676     # changed
11677     if (constants.BE_VCPUS in self.be_proposed and
11678         constants.HV_CPU_MASK in self.hv_proposed):
11679       cpu_list = \
11680         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11681       # Verify mask is consistent with number of vCPUs. Can skip this
11682       # test if only 1 entry in the CPU mask, which means same mask
11683       # is applied to all vCPUs.
11684       if (len(cpu_list) > 1 and
11685           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11686         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11687                                    " CPU mask [%s]" %
11688                                    (self.be_proposed[constants.BE_VCPUS],
11689                                     self.hv_proposed[constants.HV_CPU_MASK]),
11690                                    errors.ECODE_INVAL)
11691
11692       # Only perform this test if a new CPU mask is given
11693       if constants.HV_CPU_MASK in self.hv_new:
11694         # Calculate the largest CPU number requested
11695         max_requested_cpu = max(map(max, cpu_list))
11696         # Check that all of the instance's nodes have enough physical CPUs to
11697         # satisfy the requested CPU mask
11698         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11699                                 max_requested_cpu + 1, instance.hypervisor)
11700
11701     # osparams processing
11702     if self.op.osparams:
11703       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11704       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11705       self.os_inst = i_osdict # the new dict (without defaults)
11706     else:
11707       self.os_inst = {}
11708
11709     self.warn = []
11710
11711     #TODO(dynmem): do the appropriate check involving MINMEM
11712     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11713         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11714       mem_check_list = [pnode]
11715       if be_new[constants.BE_AUTO_BALANCE]:
11716         # either we changed auto_balance to yes or it was from before
11717         mem_check_list.extend(instance.secondary_nodes)
11718       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11719                                                   instance.hypervisor)
11720       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11721                                          [instance.hypervisor])
11722       pninfo = nodeinfo[pnode]
11723       msg = pninfo.fail_msg
11724       if msg:
11725         # Assume the primary node is unreachable and go ahead
11726         self.warn.append("Can't get info from primary node %s: %s" %
11727                          (pnode, msg))
11728       else:
11729         (_, _, (pnhvinfo, )) = pninfo.payload
11730         if not isinstance(pnhvinfo.get("memory_free", None), int):
11731           self.warn.append("Node data from primary node %s doesn't contain"
11732                            " free memory information" % pnode)
11733         elif instance_info.fail_msg:
11734           self.warn.append("Can't get instance runtime information: %s" %
11735                           instance_info.fail_msg)
11736         else:
11737           if instance_info.payload:
11738             current_mem = int(instance_info.payload["memory"])
11739           else:
11740             # Assume instance not running
11741             # (there is a slight race condition here, but it's not very
11742             # probable, and we have no other way to check)
11743             # TODO: Describe race condition
11744             current_mem = 0
11745           #TODO(dynmem): do the appropriate check involving MINMEM
11746           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11747                       pnhvinfo["memory_free"])
11748           if miss_mem > 0:
11749             raise errors.OpPrereqError("This change will prevent the instance"
11750                                        " from starting, due to %d MB of memory"
11751                                        " missing on its primary node" %
11752                                        miss_mem,
11753                                        errors.ECODE_NORES)
11754
11755       if be_new[constants.BE_AUTO_BALANCE]:
11756         for node, nres in nodeinfo.items():
11757           if node not in instance.secondary_nodes:
11758             continue
11759           nres.Raise("Can't get info from secondary node %s" % node,
11760                      prereq=True, ecode=errors.ECODE_STATE)
11761           (_, _, (nhvinfo, )) = nres.payload
11762           if not isinstance(nhvinfo.get("memory_free", None), int):
11763             raise errors.OpPrereqError("Secondary node %s didn't return free"
11764                                        " memory information" % node,
11765                                        errors.ECODE_STATE)
11766           #TODO(dynmem): do the appropriate check involving MINMEM
11767           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11768             raise errors.OpPrereqError("This change will prevent the instance"
11769                                        " from failover to its secondary node"
11770                                        " %s, due to not enough memory" % node,
11771                                        errors.ECODE_STATE)
11772
11773     # NIC processing
11774     self.nic_pnew = {}
11775     self.nic_pinst = {}
11776     for nic_op, nic_dict in self.op.nics:
11777       if nic_op == constants.DDM_REMOVE:
11778         if not instance.nics:
11779           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11780                                      errors.ECODE_INVAL)
11781         continue
11782       if nic_op != constants.DDM_ADD:
11783         # an existing nic
11784         if not instance.nics:
11785           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11786                                      " no NICs" % nic_op,
11787                                      errors.ECODE_INVAL)
11788         if nic_op < 0 or nic_op >= len(instance.nics):
11789           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11790                                      " are 0 to %d" %
11791                                      (nic_op, len(instance.nics) - 1),
11792                                      errors.ECODE_INVAL)
11793         old_nic_params = instance.nics[nic_op].nicparams
11794         old_nic_ip = instance.nics[nic_op].ip
11795       else:
11796         old_nic_params = {}
11797         old_nic_ip = None
11798
11799       update_params_dict = dict([(key, nic_dict[key])
11800                                  for key in constants.NICS_PARAMETERS
11801                                  if key in nic_dict])
11802
11803       if "bridge" in nic_dict:
11804         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11805
11806       new_nic_params = _GetUpdatedParams(old_nic_params,
11807                                          update_params_dict)
11808       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11809       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11810       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11811       self.nic_pinst[nic_op] = new_nic_params
11812       self.nic_pnew[nic_op] = new_filled_nic_params
11813       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11814
11815       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11816         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11817         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11818         if msg:
11819           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11820           if self.op.force:
11821             self.warn.append(msg)
11822           else:
11823             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11824       if new_nic_mode == constants.NIC_MODE_ROUTED:
11825         if constants.INIC_IP in nic_dict:
11826           nic_ip = nic_dict[constants.INIC_IP]
11827         else:
11828           nic_ip = old_nic_ip
11829         if nic_ip is None:
11830           raise errors.OpPrereqError("Cannot set the nic ip to None"
11831                                      " on a routed nic", errors.ECODE_INVAL)
11832       if constants.INIC_MAC in nic_dict:
11833         nic_mac = nic_dict[constants.INIC_MAC]
11834         if nic_mac is None:
11835           raise errors.OpPrereqError("Cannot set the nic mac to None",
11836                                      errors.ECODE_INVAL)
11837         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11838           # otherwise generate the mac
11839           nic_dict[constants.INIC_MAC] = \
11840             self.cfg.GenerateMAC(self.proc.GetECId())
11841         else:
11842           # or validate/reserve the current one
11843           try:
11844             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11845           except errors.ReservationError:
11846             raise errors.OpPrereqError("MAC address %s already in use"
11847                                        " in cluster" % nic_mac,
11848                                        errors.ECODE_NOTUNIQUE)
11849
11850     # DISK processing
11851     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11852       raise errors.OpPrereqError("Disk operations not supported for"
11853                                  " diskless instances",
11854                                  errors.ECODE_INVAL)
11855     for disk_op, _ in self.op.disks:
11856       if disk_op == constants.DDM_REMOVE:
11857         if len(instance.disks) == 1:
11858           raise errors.OpPrereqError("Cannot remove the last disk of"
11859                                      " an instance", errors.ECODE_INVAL)
11860         _CheckInstanceState(self, instance, INSTANCE_DOWN,
11861                             msg="cannot remove disks")
11862
11863       if (disk_op == constants.DDM_ADD and
11864           len(instance.disks) >= constants.MAX_DISKS):
11865         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11866                                    " add more" % constants.MAX_DISKS,
11867                                    errors.ECODE_STATE)
11868       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11869         # an existing disk
11870         if disk_op < 0 or disk_op >= len(instance.disks):
11871           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11872                                      " are 0 to %d" %
11873                                      (disk_op, len(instance.disks)),
11874                                      errors.ECODE_INVAL)
11875
11876     # disabling the instance
11877     if self.op.offline_inst:
11878       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11879                           msg="cannot change instance state to offline")
11880
11881     # enabling the instance
11882     if self.op.online_inst:
11883       _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11884                           msg="cannot make instance go online")
11885
11886   def _ConvertPlainToDrbd(self, feedback_fn):
11887     """Converts an instance from plain to drbd.
11888
11889     """
11890     feedback_fn("Converting template to drbd")
11891     instance = self.instance
11892     pnode = instance.primary_node
11893     snode = self.op.remote_node
11894
11895     assert instance.disk_template == constants.DT_PLAIN
11896
11897     # create a fake disk info for _GenerateDiskTemplate
11898     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11899                   constants.IDISK_VG: d.logical_id[0]}
11900                  for d in instance.disks]
11901     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11902                                       instance.name, pnode, [snode],
11903                                       disk_info, None, None, 0, feedback_fn,
11904                                       self.diskparams)
11905     info = _GetInstanceInfoText(instance)
11906     feedback_fn("Creating aditional volumes...")
11907     # first, create the missing data and meta devices
11908     for disk in new_disks:
11909       # unfortunately this is... not too nice
11910       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11911                             info, True)
11912       for child in disk.children:
11913         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11914     # at this stage, all new LVs have been created, we can rename the
11915     # old ones
11916     feedback_fn("Renaming original volumes...")
11917     rename_list = [(o, n.children[0].logical_id)
11918                    for (o, n) in zip(instance.disks, new_disks)]
11919     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11920     result.Raise("Failed to rename original LVs")
11921
11922     feedback_fn("Initializing DRBD devices...")
11923     # all child devices are in place, we can now create the DRBD devices
11924     for disk in new_disks:
11925       for node in [pnode, snode]:
11926         f_create = node == pnode
11927         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11928
11929     # at this point, the instance has been modified
11930     instance.disk_template = constants.DT_DRBD8
11931     instance.disks = new_disks
11932     self.cfg.Update(instance, feedback_fn)
11933
11934     # Release node locks while waiting for sync
11935     _ReleaseLocks(self, locking.LEVEL_NODE)
11936
11937     # disks are created, waiting for sync
11938     disk_abort = not _WaitForSync(self, instance,
11939                                   oneshot=not self.op.wait_for_sync)
11940     if disk_abort:
11941       raise errors.OpExecError("There are some degraded disks for"
11942                                " this instance, please cleanup manually")
11943
11944     # Node resource locks will be released by caller
11945
11946   def _ConvertDrbdToPlain(self, feedback_fn):
11947     """Converts an instance from drbd to plain.
11948
11949     """
11950     instance = self.instance
11951
11952     assert len(instance.secondary_nodes) == 1
11953     assert instance.disk_template == constants.DT_DRBD8
11954
11955     pnode = instance.primary_node
11956     snode = instance.secondary_nodes[0]
11957     feedback_fn("Converting template to plain")
11958
11959     old_disks = instance.disks
11960     new_disks = [d.children[0] for d in old_disks]
11961
11962     # copy over size and mode
11963     for parent, child in zip(old_disks, new_disks):
11964       child.size = parent.size
11965       child.mode = parent.mode
11966
11967     # update instance structure
11968     instance.disks = new_disks
11969     instance.disk_template = constants.DT_PLAIN
11970     self.cfg.Update(instance, feedback_fn)
11971
11972     # Release locks in case removing disks takes a while
11973     _ReleaseLocks(self, locking.LEVEL_NODE)
11974
11975     feedback_fn("Removing volumes on the secondary node...")
11976     for disk in old_disks:
11977       self.cfg.SetDiskID(disk, snode)
11978       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11979       if msg:
11980         self.LogWarning("Could not remove block device %s on node %s,"
11981                         " continuing anyway: %s", disk.iv_name, snode, msg)
11982
11983     feedback_fn("Removing unneeded volumes on the primary node...")
11984     for idx, disk in enumerate(old_disks):
11985       meta = disk.children[1]
11986       self.cfg.SetDiskID(meta, pnode)
11987       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11988       if msg:
11989         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11990                         " continuing anyway: %s", idx, pnode, msg)
11991
11992     # this is a DRBD disk, return its port to the pool
11993     for disk in old_disks:
11994       tcp_port = disk.logical_id[2]
11995       self.cfg.AddTcpUdpPort(tcp_port)
11996
11997     # Node resource locks will be released by caller
11998
11999   def Exec(self, feedback_fn):
12000     """Modifies an instance.
12001
12002     All parameters take effect only at the next restart of the instance.
12003
12004     """
12005     # Process here the warnings from CheckPrereq, as we don't have a
12006     # feedback_fn there.
12007     for warn in self.warn:
12008       feedback_fn("WARNING: %s" % warn)
12009
12010     assert ((self.op.disk_template is None) ^
12011             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12012       "Not owning any node resource locks"
12013
12014     result = []
12015     instance = self.instance
12016     # disk changes
12017     for disk_op, disk_dict in self.op.disks:
12018       if disk_op == constants.DDM_REMOVE:
12019         # remove the last disk
12020         device = instance.disks.pop()
12021         device_idx = len(instance.disks)
12022         for node, disk in device.ComputeNodeTree(instance.primary_node):
12023           self.cfg.SetDiskID(disk, node)
12024           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12025           if msg:
12026             self.LogWarning("Could not remove disk/%d on node %s: %s,"
12027                             " continuing anyway", device_idx, node, msg)
12028         result.append(("disk/%d" % device_idx, "remove"))
12029
12030         # if this is a DRBD disk, return its port to the pool
12031         if device.dev_type in constants.LDS_DRBD:
12032           tcp_port = device.logical_id[2]
12033           self.cfg.AddTcpUdpPort(tcp_port)
12034       elif disk_op == constants.DDM_ADD:
12035         # add a new disk
12036         if instance.disk_template in (constants.DT_FILE,
12037                                         constants.DT_SHARED_FILE):
12038           file_driver, file_path = instance.disks[0].logical_id
12039           file_path = os.path.dirname(file_path)
12040         else:
12041           file_driver = file_path = None
12042         disk_idx_base = len(instance.disks)
12043         new_disk = _GenerateDiskTemplate(self,
12044                                          instance.disk_template,
12045                                          instance.name, instance.primary_node,
12046                                          instance.secondary_nodes,
12047                                          [disk_dict],
12048                                          file_path,
12049                                          file_driver,
12050                                          disk_idx_base,
12051                                          feedback_fn,
12052                                          self.diskparams)[0]
12053         instance.disks.append(new_disk)
12054         info = _GetInstanceInfoText(instance)
12055
12056         logging.info("Creating volume %s for instance %s",
12057                      new_disk.iv_name, instance.name)
12058         # Note: this needs to be kept in sync with _CreateDisks
12059         #HARDCODE
12060         for node in instance.all_nodes:
12061           f_create = node == instance.primary_node
12062           try:
12063             _CreateBlockDev(self, node, instance, new_disk,
12064                             f_create, info, f_create)
12065           except errors.OpExecError, err:
12066             self.LogWarning("Failed to create volume %s (%s) on"
12067                             " node %s: %s",
12068                             new_disk.iv_name, new_disk, node, err)
12069         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12070                        (new_disk.size, new_disk.mode)))
12071       else:
12072         # change a given disk
12073         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12074         result.append(("disk.mode/%d" % disk_op,
12075                        disk_dict[constants.IDISK_MODE]))
12076
12077     if self.op.disk_template:
12078       if __debug__:
12079         check_nodes = set(instance.all_nodes)
12080         if self.op.remote_node:
12081           check_nodes.add(self.op.remote_node)
12082         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12083           owned = self.owned_locks(level)
12084           assert not (check_nodes - owned), \
12085             ("Not owning the correct locks, owning %r, expected at least %r" %
12086              (owned, check_nodes))
12087
12088       r_shut = _ShutdownInstanceDisks(self, instance)
12089       if not r_shut:
12090         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12091                                  " proceed with disk template conversion")
12092       mode = (instance.disk_template, self.op.disk_template)
12093       try:
12094         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12095       except:
12096         self.cfg.ReleaseDRBDMinors(instance.name)
12097         raise
12098       result.append(("disk_template", self.op.disk_template))
12099
12100       assert instance.disk_template == self.op.disk_template, \
12101         ("Expected disk template '%s', found '%s'" %
12102          (self.op.disk_template, instance.disk_template))
12103
12104     # Release node and resource locks if there are any (they might already have
12105     # been released during disk conversion)
12106     _ReleaseLocks(self, locking.LEVEL_NODE)
12107     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12108
12109     # NIC changes
12110     for nic_op, nic_dict in self.op.nics:
12111       if nic_op == constants.DDM_REMOVE:
12112         # remove the last nic
12113         del instance.nics[-1]
12114         result.append(("nic.%d" % len(instance.nics), "remove"))
12115       elif nic_op == constants.DDM_ADD:
12116         # mac and bridge should be set, by now
12117         mac = nic_dict[constants.INIC_MAC]
12118         ip = nic_dict.get(constants.INIC_IP, None)
12119         nicparams = self.nic_pinst[constants.DDM_ADD]
12120         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12121         instance.nics.append(new_nic)
12122         result.append(("nic.%d" % (len(instance.nics) - 1),
12123                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12124                        (new_nic.mac, new_nic.ip,
12125                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12126                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12127                        )))
12128       else:
12129         for key in (constants.INIC_MAC, constants.INIC_IP):
12130           if key in nic_dict:
12131             setattr(instance.nics[nic_op], key, nic_dict[key])
12132         if nic_op in self.nic_pinst:
12133           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12134         for key, val in nic_dict.iteritems():
12135           result.append(("nic.%s/%d" % (key, nic_op), val))
12136
12137     # hvparams changes
12138     if self.op.hvparams:
12139       instance.hvparams = self.hv_inst
12140       for key, val in self.op.hvparams.iteritems():
12141         result.append(("hv/%s" % key, val))
12142
12143     # beparams changes
12144     if self.op.beparams:
12145       instance.beparams = self.be_inst
12146       for key, val in self.op.beparams.iteritems():
12147         result.append(("be/%s" % key, val))
12148
12149     # OS change
12150     if self.op.os_name:
12151       instance.os = self.op.os_name
12152
12153     # osparams changes
12154     if self.op.osparams:
12155       instance.osparams = self.os_inst
12156       for key, val in self.op.osparams.iteritems():
12157         result.append(("os/%s" % key, val))
12158
12159     # online/offline instance
12160     if self.op.online_inst:
12161       self.cfg.MarkInstanceDown(instance.name)
12162       result.append(("admin_state", constants.ADMINST_DOWN))
12163     if self.op.offline_inst:
12164       self.cfg.MarkInstanceOffline(instance.name)
12165       result.append(("admin_state", constants.ADMINST_OFFLINE))
12166
12167     self.cfg.Update(instance, feedback_fn)
12168
12169     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12170                 self.owned_locks(locking.LEVEL_NODE)), \
12171       "All node locks should have been released by now"
12172
12173     return result
12174
12175   _DISK_CONVERSIONS = {
12176     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12177     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12178     }
12179
12180
12181 class LUInstanceChangeGroup(LogicalUnit):
12182   HPATH = "instance-change-group"
12183   HTYPE = constants.HTYPE_INSTANCE
12184   REQ_BGL = False
12185
12186   def ExpandNames(self):
12187     self.share_locks = _ShareAll()
12188     self.needed_locks = {
12189       locking.LEVEL_NODEGROUP: [],
12190       locking.LEVEL_NODE: [],
12191       }
12192
12193     self._ExpandAndLockInstance()
12194
12195     if self.op.target_groups:
12196       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12197                                   self.op.target_groups)
12198     else:
12199       self.req_target_uuids = None
12200
12201     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12202
12203   def DeclareLocks(self, level):
12204     if level == locking.LEVEL_NODEGROUP:
12205       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12206
12207       if self.req_target_uuids:
12208         lock_groups = set(self.req_target_uuids)
12209
12210         # Lock all groups used by instance optimistically; this requires going
12211         # via the node before it's locked, requiring verification later on
12212         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12213         lock_groups.update(instance_groups)
12214       else:
12215         # No target groups, need to lock all of them
12216         lock_groups = locking.ALL_SET
12217
12218       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12219
12220     elif level == locking.LEVEL_NODE:
12221       if self.req_target_uuids:
12222         # Lock all nodes used by instances
12223         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12224         self._LockInstancesNodes()
12225
12226         # Lock all nodes in all potential target groups
12227         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12228                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12229         member_nodes = [node_name
12230                         for group in lock_groups
12231                         for node_name in self.cfg.GetNodeGroup(group).members]
12232         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12233       else:
12234         # Lock all nodes as all groups are potential targets
12235         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12236
12237   def CheckPrereq(self):
12238     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12239     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12240     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12241
12242     assert (self.req_target_uuids is None or
12243             owned_groups.issuperset(self.req_target_uuids))
12244     assert owned_instances == set([self.op.instance_name])
12245
12246     # Get instance information
12247     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12248
12249     # Check if node groups for locked instance are still correct
12250     assert owned_nodes.issuperset(self.instance.all_nodes), \
12251       ("Instance %s's nodes changed while we kept the lock" %
12252        self.op.instance_name)
12253
12254     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12255                                            owned_groups)
12256
12257     if self.req_target_uuids:
12258       # User requested specific target groups
12259       self.target_uuids = self.req_target_uuids
12260     else:
12261       # All groups except those used by the instance are potential targets
12262       self.target_uuids = owned_groups - inst_groups
12263
12264     conflicting_groups = self.target_uuids & inst_groups
12265     if conflicting_groups:
12266       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12267                                  " used by the instance '%s'" %
12268                                  (utils.CommaJoin(conflicting_groups),
12269                                   self.op.instance_name),
12270                                  errors.ECODE_INVAL)
12271
12272     if not self.target_uuids:
12273       raise errors.OpPrereqError("There are no possible target groups",
12274                                  errors.ECODE_INVAL)
12275
12276   def BuildHooksEnv(self):
12277     """Build hooks env.
12278
12279     """
12280     assert self.target_uuids
12281
12282     env = {
12283       "TARGET_GROUPS": " ".join(self.target_uuids),
12284       }
12285
12286     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12287
12288     return env
12289
12290   def BuildHooksNodes(self):
12291     """Build hooks nodes.
12292
12293     """
12294     mn = self.cfg.GetMasterNode()
12295     return ([mn], [mn])
12296
12297   def Exec(self, feedback_fn):
12298     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12299
12300     assert instances == [self.op.instance_name], "Instance not locked"
12301
12302     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12303                      instances=instances, target_groups=list(self.target_uuids))
12304
12305     ial.Run(self.op.iallocator)
12306
12307     if not ial.success:
12308       raise errors.OpPrereqError("Can't compute solution for changing group of"
12309                                  " instance '%s' using iallocator '%s': %s" %
12310                                  (self.op.instance_name, self.op.iallocator,
12311                                   ial.info),
12312                                  errors.ECODE_NORES)
12313
12314     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12315
12316     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12317                  " instance '%s'", len(jobs), self.op.instance_name)
12318
12319     return ResultWithJobs(jobs)
12320
12321
12322 class LUBackupQuery(NoHooksLU):
12323   """Query the exports list
12324
12325   """
12326   REQ_BGL = False
12327
12328   def ExpandNames(self):
12329     self.needed_locks = {}
12330     self.share_locks[locking.LEVEL_NODE] = 1
12331     if not self.op.nodes:
12332       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12333     else:
12334       self.needed_locks[locking.LEVEL_NODE] = \
12335         _GetWantedNodes(self, self.op.nodes)
12336
12337   def Exec(self, feedback_fn):
12338     """Compute the list of all the exported system images.
12339
12340     @rtype: dict
12341     @return: a dictionary with the structure node->(export-list)
12342         where export-list is a list of the instances exported on
12343         that node.
12344
12345     """
12346     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12347     rpcresult = self.rpc.call_export_list(self.nodes)
12348     result = {}
12349     for node in rpcresult:
12350       if rpcresult[node].fail_msg:
12351         result[node] = False
12352       else:
12353         result[node] = rpcresult[node].payload
12354
12355     return result
12356
12357
12358 class LUBackupPrepare(NoHooksLU):
12359   """Prepares an instance for an export and returns useful information.
12360
12361   """
12362   REQ_BGL = False
12363
12364   def ExpandNames(self):
12365     self._ExpandAndLockInstance()
12366
12367   def CheckPrereq(self):
12368     """Check prerequisites.
12369
12370     """
12371     instance_name = self.op.instance_name
12372
12373     self.instance = self.cfg.GetInstanceInfo(instance_name)
12374     assert self.instance is not None, \
12375           "Cannot retrieve locked instance %s" % self.op.instance_name
12376     _CheckNodeOnline(self, self.instance.primary_node)
12377
12378     self._cds = _GetClusterDomainSecret()
12379
12380   def Exec(self, feedback_fn):
12381     """Prepares an instance for an export.
12382
12383     """
12384     instance = self.instance
12385
12386     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12387       salt = utils.GenerateSecret(8)
12388
12389       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12390       result = self.rpc.call_x509_cert_create(instance.primary_node,
12391                                               constants.RIE_CERT_VALIDITY)
12392       result.Raise("Can't create X509 key and certificate on %s" % result.node)
12393
12394       (name, cert_pem) = result.payload
12395
12396       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12397                                              cert_pem)
12398
12399       return {
12400         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12401         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12402                           salt),
12403         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12404         }
12405
12406     return None
12407
12408
12409 class LUBackupExport(LogicalUnit):
12410   """Export an instance to an image in the cluster.
12411
12412   """
12413   HPATH = "instance-export"
12414   HTYPE = constants.HTYPE_INSTANCE
12415   REQ_BGL = False
12416
12417   def CheckArguments(self):
12418     """Check the arguments.
12419
12420     """
12421     self.x509_key_name = self.op.x509_key_name
12422     self.dest_x509_ca_pem = self.op.destination_x509_ca
12423
12424     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12425       if not self.x509_key_name:
12426         raise errors.OpPrereqError("Missing X509 key name for encryption",
12427                                    errors.ECODE_INVAL)
12428
12429       if not self.dest_x509_ca_pem:
12430         raise errors.OpPrereqError("Missing destination X509 CA",
12431                                    errors.ECODE_INVAL)
12432
12433   def ExpandNames(self):
12434     self._ExpandAndLockInstance()
12435
12436     # Lock all nodes for local exports
12437     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12438       # FIXME: lock only instance primary and destination node
12439       #
12440       # Sad but true, for now we have do lock all nodes, as we don't know where
12441       # the previous export might be, and in this LU we search for it and
12442       # remove it from its current node. In the future we could fix this by:
12443       #  - making a tasklet to search (share-lock all), then create the
12444       #    new one, then one to remove, after
12445       #  - removing the removal operation altogether
12446       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12447
12448   def DeclareLocks(self, level):
12449     """Last minute lock declaration."""
12450     # All nodes are locked anyway, so nothing to do here.
12451
12452   def BuildHooksEnv(self):
12453     """Build hooks env.
12454
12455     This will run on the master, primary node and target node.
12456
12457     """
12458     env = {
12459       "EXPORT_MODE": self.op.mode,
12460       "EXPORT_NODE": self.op.target_node,
12461       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12462       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12463       # TODO: Generic function for boolean env variables
12464       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12465       }
12466
12467     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12468
12469     return env
12470
12471   def BuildHooksNodes(self):
12472     """Build hooks nodes.
12473
12474     """
12475     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12476
12477     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12478       nl.append(self.op.target_node)
12479
12480     return (nl, nl)
12481
12482   def CheckPrereq(self):
12483     """Check prerequisites.
12484
12485     This checks that the instance and node names are valid.
12486
12487     """
12488     instance_name = self.op.instance_name
12489
12490     self.instance = self.cfg.GetInstanceInfo(instance_name)
12491     assert self.instance is not None, \
12492           "Cannot retrieve locked instance %s" % self.op.instance_name
12493     _CheckNodeOnline(self, self.instance.primary_node)
12494
12495     if (self.op.remove_instance and
12496         self.instance.admin_state == constants.ADMINST_UP and
12497         not self.op.shutdown):
12498       raise errors.OpPrereqError("Can not remove instance without shutting it"
12499                                  " down before")
12500
12501     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12502       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12503       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12504       assert self.dst_node is not None
12505
12506       _CheckNodeOnline(self, self.dst_node.name)
12507       _CheckNodeNotDrained(self, self.dst_node.name)
12508
12509       self._cds = None
12510       self.dest_disk_info = None
12511       self.dest_x509_ca = None
12512
12513     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12514       self.dst_node = None
12515
12516       if len(self.op.target_node) != len(self.instance.disks):
12517         raise errors.OpPrereqError(("Received destination information for %s"
12518                                     " disks, but instance %s has %s disks") %
12519                                    (len(self.op.target_node), instance_name,
12520                                     len(self.instance.disks)),
12521                                    errors.ECODE_INVAL)
12522
12523       cds = _GetClusterDomainSecret()
12524
12525       # Check X509 key name
12526       try:
12527         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12528       except (TypeError, ValueError), err:
12529         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12530
12531       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12532         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12533                                    errors.ECODE_INVAL)
12534
12535       # Load and verify CA
12536       try:
12537         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12538       except OpenSSL.crypto.Error, err:
12539         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12540                                    (err, ), errors.ECODE_INVAL)
12541
12542       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12543       if errcode is not None:
12544         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12545                                    (msg, ), errors.ECODE_INVAL)
12546
12547       self.dest_x509_ca = cert
12548
12549       # Verify target information
12550       disk_info = []
12551       for idx, disk_data in enumerate(self.op.target_node):
12552         try:
12553           (host, port, magic) = \
12554             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12555         except errors.GenericError, err:
12556           raise errors.OpPrereqError("Target info for disk %s: %s" %
12557                                      (idx, err), errors.ECODE_INVAL)
12558
12559         disk_info.append((host, port, magic))
12560
12561       assert len(disk_info) == len(self.op.target_node)
12562       self.dest_disk_info = disk_info
12563
12564     else:
12565       raise errors.ProgrammerError("Unhandled export mode %r" %
12566                                    self.op.mode)
12567
12568     # instance disk type verification
12569     # TODO: Implement export support for file-based disks
12570     for disk in self.instance.disks:
12571       if disk.dev_type == constants.LD_FILE:
12572         raise errors.OpPrereqError("Export not supported for instances with"
12573                                    " file-based disks", errors.ECODE_INVAL)
12574
12575   def _CleanupExports(self, feedback_fn):
12576     """Removes exports of current instance from all other nodes.
12577
12578     If an instance in a cluster with nodes A..D was exported to node C, its
12579     exports will be removed from the nodes A, B and D.
12580
12581     """
12582     assert self.op.mode != constants.EXPORT_MODE_REMOTE
12583
12584     nodelist = self.cfg.GetNodeList()
12585     nodelist.remove(self.dst_node.name)
12586
12587     # on one-node clusters nodelist will be empty after the removal
12588     # if we proceed the backup would be removed because OpBackupQuery
12589     # substitutes an empty list with the full cluster node list.
12590     iname = self.instance.name
12591     if nodelist:
12592       feedback_fn("Removing old exports for instance %s" % iname)
12593       exportlist = self.rpc.call_export_list(nodelist)
12594       for node in exportlist:
12595         if exportlist[node].fail_msg:
12596           continue
12597         if iname in exportlist[node].payload:
12598           msg = self.rpc.call_export_remove(node, iname).fail_msg
12599           if msg:
12600             self.LogWarning("Could not remove older export for instance %s"
12601                             " on node %s: %s", iname, node, msg)
12602
12603   def Exec(self, feedback_fn):
12604     """Export an instance to an image in the cluster.
12605
12606     """
12607     assert self.op.mode in constants.EXPORT_MODES
12608
12609     instance = self.instance
12610     src_node = instance.primary_node
12611
12612     if self.op.shutdown:
12613       # shutdown the instance, but not the disks
12614       feedback_fn("Shutting down instance %s" % instance.name)
12615       result = self.rpc.call_instance_shutdown(src_node, instance,
12616                                                self.op.shutdown_timeout)
12617       # TODO: Maybe ignore failures if ignore_remove_failures is set
12618       result.Raise("Could not shutdown instance %s on"
12619                    " node %s" % (instance.name, src_node))
12620
12621     # set the disks ID correctly since call_instance_start needs the
12622     # correct drbd minor to create the symlinks
12623     for disk in instance.disks:
12624       self.cfg.SetDiskID(disk, src_node)
12625
12626     activate_disks = (instance.admin_state != constants.ADMINST_UP)
12627
12628     if activate_disks:
12629       # Activate the instance disks if we'exporting a stopped instance
12630       feedback_fn("Activating disks for %s" % instance.name)
12631       _StartInstanceDisks(self, instance, None)
12632
12633     try:
12634       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12635                                                      instance)
12636
12637       helper.CreateSnapshots()
12638       try:
12639         if (self.op.shutdown and
12640             instance.admin_state == constants.ADMINST_UP and
12641             not self.op.remove_instance):
12642           assert not activate_disks
12643           feedback_fn("Starting instance %s" % instance.name)
12644           result = self.rpc.call_instance_start(src_node,
12645                                                 (instance, None, None), False)
12646           msg = result.fail_msg
12647           if msg:
12648             feedback_fn("Failed to start instance: %s" % msg)
12649             _ShutdownInstanceDisks(self, instance)
12650             raise errors.OpExecError("Could not start instance: %s" % msg)
12651
12652         if self.op.mode == constants.EXPORT_MODE_LOCAL:
12653           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12654         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12655           connect_timeout = constants.RIE_CONNECT_TIMEOUT
12656           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12657
12658           (key_name, _, _) = self.x509_key_name
12659
12660           dest_ca_pem = \
12661             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12662                                             self.dest_x509_ca)
12663
12664           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12665                                                      key_name, dest_ca_pem,
12666                                                      timeouts)
12667       finally:
12668         helper.Cleanup()
12669
12670       # Check for backwards compatibility
12671       assert len(dresults) == len(instance.disks)
12672       assert compat.all(isinstance(i, bool) for i in dresults), \
12673              "Not all results are boolean: %r" % dresults
12674
12675     finally:
12676       if activate_disks:
12677         feedback_fn("Deactivating disks for %s" % instance.name)
12678         _ShutdownInstanceDisks(self, instance)
12679
12680     if not (compat.all(dresults) and fin_resu):
12681       failures = []
12682       if not fin_resu:
12683         failures.append("export finalization")
12684       if not compat.all(dresults):
12685         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12686                                if not dsk)
12687         failures.append("disk export: disk(s) %s" % fdsk)
12688
12689       raise errors.OpExecError("Export failed, errors in %s" %
12690                                utils.CommaJoin(failures))
12691
12692     # At this point, the export was successful, we can cleanup/finish
12693
12694     # Remove instance if requested
12695     if self.op.remove_instance:
12696       feedback_fn("Removing instance %s" % instance.name)
12697       _RemoveInstance(self, feedback_fn, instance,
12698                       self.op.ignore_remove_failures)
12699
12700     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12701       self._CleanupExports(feedback_fn)
12702
12703     return fin_resu, dresults
12704
12705
12706 class LUBackupRemove(NoHooksLU):
12707   """Remove exports related to the named instance.
12708
12709   """
12710   REQ_BGL = False
12711
12712   def ExpandNames(self):
12713     self.needed_locks = {}
12714     # We need all nodes to be locked in order for RemoveExport to work, but we
12715     # don't need to lock the instance itself, as nothing will happen to it (and
12716     # we can remove exports also for a removed instance)
12717     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12718
12719   def Exec(self, feedback_fn):
12720     """Remove any export.
12721
12722     """
12723     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12724     # If the instance was not found we'll try with the name that was passed in.
12725     # This will only work if it was an FQDN, though.
12726     fqdn_warn = False
12727     if not instance_name:
12728       fqdn_warn = True
12729       instance_name = self.op.instance_name
12730
12731     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12732     exportlist = self.rpc.call_export_list(locked_nodes)
12733     found = False
12734     for node in exportlist:
12735       msg = exportlist[node].fail_msg
12736       if msg:
12737         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12738         continue
12739       if instance_name in exportlist[node].payload:
12740         found = True
12741         result = self.rpc.call_export_remove(node, instance_name)
12742         msg = result.fail_msg
12743         if msg:
12744           logging.error("Could not remove export for instance %s"
12745                         " on node %s: %s", instance_name, node, msg)
12746
12747     if fqdn_warn and not found:
12748       feedback_fn("Export not found. If trying to remove an export belonging"
12749                   " to a deleted instance please use its Fully Qualified"
12750                   " Domain Name.")
12751
12752
12753 class LUGroupAdd(LogicalUnit):
12754   """Logical unit for creating node groups.
12755
12756   """
12757   HPATH = "group-add"
12758   HTYPE = constants.HTYPE_GROUP
12759   REQ_BGL = False
12760
12761   def ExpandNames(self):
12762     # We need the new group's UUID here so that we can create and acquire the
12763     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12764     # that it should not check whether the UUID exists in the configuration.
12765     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12766     self.needed_locks = {}
12767     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12768
12769   def CheckPrereq(self):
12770     """Check prerequisites.
12771
12772     This checks that the given group name is not an existing node group
12773     already.
12774
12775     """
12776     try:
12777       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12778     except errors.OpPrereqError:
12779       pass
12780     else:
12781       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12782                                  " node group (UUID: %s)" %
12783                                  (self.op.group_name, existing_uuid),
12784                                  errors.ECODE_EXISTS)
12785
12786     if self.op.ndparams:
12787       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12788
12789     if self.op.hv_state:
12790       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
12791     else:
12792       self.new_hv_state = None
12793
12794     if self.op.disk_state:
12795       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
12796     else:
12797       self.new_disk_state = None
12798
12799     if self.op.diskparams:
12800       for templ in constants.DISK_TEMPLATES:
12801         if templ not in self.op.diskparams:
12802           self.op.diskparams[templ] = {}
12803         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12804     else:
12805       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12806
12807     if self.op.ipolicy:
12808       cluster = self.cfg.GetClusterInfo()
12809       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
12810       objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12811
12812   def BuildHooksEnv(self):
12813     """Build hooks env.
12814
12815     """
12816     return {
12817       "GROUP_NAME": self.op.group_name,
12818       }
12819
12820   def BuildHooksNodes(self):
12821     """Build hooks nodes.
12822
12823     """
12824     mn = self.cfg.GetMasterNode()
12825     return ([mn], [mn])
12826
12827   def Exec(self, feedback_fn):
12828     """Add the node group to the cluster.
12829
12830     """
12831     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12832                                   uuid=self.group_uuid,
12833                                   alloc_policy=self.op.alloc_policy,
12834                                   ndparams=self.op.ndparams,
12835                                   diskparams=self.op.diskparams,
12836                                   ipolicy=self.op.ipolicy,
12837                                   hv_state_static=self.new_hv_state,
12838                                   disk_state_static=self.new_disk_state)
12839
12840     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12841     del self.remove_locks[locking.LEVEL_NODEGROUP]
12842
12843
12844 class LUGroupAssignNodes(NoHooksLU):
12845   """Logical unit for assigning nodes to groups.
12846
12847   """
12848   REQ_BGL = False
12849
12850   def ExpandNames(self):
12851     # These raise errors.OpPrereqError on their own:
12852     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12853     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12854
12855     # We want to lock all the affected nodes and groups. We have readily
12856     # available the list of nodes, and the *destination* group. To gather the
12857     # list of "source" groups, we need to fetch node information later on.
12858     self.needed_locks = {
12859       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12860       locking.LEVEL_NODE: self.op.nodes,
12861       }
12862
12863   def DeclareLocks(self, level):
12864     if level == locking.LEVEL_NODEGROUP:
12865       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12866
12867       # Try to get all affected nodes' groups without having the group or node
12868       # lock yet. Needs verification later in the code flow.
12869       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12870
12871       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12872
12873   def CheckPrereq(self):
12874     """Check prerequisites.
12875
12876     """
12877     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12878     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12879             frozenset(self.op.nodes))
12880
12881     expected_locks = (set([self.group_uuid]) |
12882                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12883     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12884     if actual_locks != expected_locks:
12885       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12886                                " current groups are '%s', used to be '%s'" %
12887                                (utils.CommaJoin(expected_locks),
12888                                 utils.CommaJoin(actual_locks)))
12889
12890     self.node_data = self.cfg.GetAllNodesInfo()
12891     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12892     instance_data = self.cfg.GetAllInstancesInfo()
12893
12894     if self.group is None:
12895       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12896                                (self.op.group_name, self.group_uuid))
12897
12898     (new_splits, previous_splits) = \
12899       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12900                                              for node in self.op.nodes],
12901                                             self.node_data, instance_data)
12902
12903     if new_splits:
12904       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12905
12906       if not self.op.force:
12907         raise errors.OpExecError("The following instances get split by this"
12908                                  " change and --force was not given: %s" %
12909                                  fmt_new_splits)
12910       else:
12911         self.LogWarning("This operation will split the following instances: %s",
12912                         fmt_new_splits)
12913
12914         if previous_splits:
12915           self.LogWarning("In addition, these already-split instances continue"
12916                           " to be split across groups: %s",
12917                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12918
12919   def Exec(self, feedback_fn):
12920     """Assign nodes to a new group.
12921
12922     """
12923     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12924
12925     self.cfg.AssignGroupNodes(mods)
12926
12927   @staticmethod
12928   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12929     """Check for split instances after a node assignment.
12930
12931     This method considers a series of node assignments as an atomic operation,
12932     and returns information about split instances after applying the set of
12933     changes.
12934
12935     In particular, it returns information about newly split instances, and
12936     instances that were already split, and remain so after the change.
12937
12938     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12939     considered.
12940
12941     @type changes: list of (node_name, new_group_uuid) pairs.
12942     @param changes: list of node assignments to consider.
12943     @param node_data: a dict with data for all nodes
12944     @param instance_data: a dict with all instances to consider
12945     @rtype: a two-tuple
12946     @return: a list of instances that were previously okay and result split as a
12947       consequence of this change, and a list of instances that were previously
12948       split and this change does not fix.
12949
12950     """
12951     changed_nodes = dict((node, group) for node, group in changes
12952                          if node_data[node].group != group)
12953
12954     all_split_instances = set()
12955     previously_split_instances = set()
12956
12957     def InstanceNodes(instance):
12958       return [instance.primary_node] + list(instance.secondary_nodes)
12959
12960     for inst in instance_data.values():
12961       if inst.disk_template not in constants.DTS_INT_MIRROR:
12962         continue
12963
12964       instance_nodes = InstanceNodes(inst)
12965
12966       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12967         previously_split_instances.add(inst.name)
12968
12969       if len(set(changed_nodes.get(node, node_data[node].group)
12970                  for node in instance_nodes)) > 1:
12971         all_split_instances.add(inst.name)
12972
12973     return (list(all_split_instances - previously_split_instances),
12974             list(previously_split_instances & all_split_instances))
12975
12976
12977 class _GroupQuery(_QueryBase):
12978   FIELDS = query.GROUP_FIELDS
12979
12980   def ExpandNames(self, lu):
12981     lu.needed_locks = {}
12982
12983     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12984     self._cluster = lu.cfg.GetClusterInfo()
12985     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12986
12987     if not self.names:
12988       self.wanted = [name_to_uuid[name]
12989                      for name in utils.NiceSort(name_to_uuid.keys())]
12990     else:
12991       # Accept names to be either names or UUIDs.
12992       missing = []
12993       self.wanted = []
12994       all_uuid = frozenset(self._all_groups.keys())
12995
12996       for name in self.names:
12997         if name in all_uuid:
12998           self.wanted.append(name)
12999         elif name in name_to_uuid:
13000           self.wanted.append(name_to_uuid[name])
13001         else:
13002           missing.append(name)
13003
13004       if missing:
13005         raise errors.OpPrereqError("Some groups do not exist: %s" %
13006                                    utils.CommaJoin(missing),
13007                                    errors.ECODE_NOENT)
13008
13009   def DeclareLocks(self, lu, level):
13010     pass
13011
13012   def _GetQueryData(self, lu):
13013     """Computes the list of node groups and their attributes.
13014
13015     """
13016     do_nodes = query.GQ_NODE in self.requested_data
13017     do_instances = query.GQ_INST in self.requested_data
13018
13019     group_to_nodes = None
13020     group_to_instances = None
13021
13022     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13023     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13024     # latter GetAllInstancesInfo() is not enough, for we have to go through
13025     # instance->node. Hence, we will need to process nodes even if we only need
13026     # instance information.
13027     if do_nodes or do_instances:
13028       all_nodes = lu.cfg.GetAllNodesInfo()
13029       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13030       node_to_group = {}
13031
13032       for node in all_nodes.values():
13033         if node.group in group_to_nodes:
13034           group_to_nodes[node.group].append(node.name)
13035           node_to_group[node.name] = node.group
13036
13037       if do_instances:
13038         all_instances = lu.cfg.GetAllInstancesInfo()
13039         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13040
13041         for instance in all_instances.values():
13042           node = instance.primary_node
13043           if node in node_to_group:
13044             group_to_instances[node_to_group[node]].append(instance.name)
13045
13046         if not do_nodes:
13047           # Do not pass on node information if it was not requested.
13048           group_to_nodes = None
13049
13050     return query.GroupQueryData(self._cluster,
13051                                 [self._all_groups[uuid]
13052                                  for uuid in self.wanted],
13053                                 group_to_nodes, group_to_instances)
13054
13055
13056 class LUGroupQuery(NoHooksLU):
13057   """Logical unit for querying node groups.
13058
13059   """
13060   REQ_BGL = False
13061
13062   def CheckArguments(self):
13063     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13064                           self.op.output_fields, False)
13065
13066   def ExpandNames(self):
13067     self.gq.ExpandNames(self)
13068
13069   def DeclareLocks(self, level):
13070     self.gq.DeclareLocks(self, level)
13071
13072   def Exec(self, feedback_fn):
13073     return self.gq.OldStyleQuery(self)
13074
13075
13076 class LUGroupSetParams(LogicalUnit):
13077   """Modifies the parameters of a node group.
13078
13079   """
13080   HPATH = "group-modify"
13081   HTYPE = constants.HTYPE_GROUP
13082   REQ_BGL = False
13083
13084   def CheckArguments(self):
13085     all_changes = [
13086       self.op.ndparams,
13087       self.op.diskparams,
13088       self.op.alloc_policy,
13089       self.op.hv_state,
13090       self.op.disk_state,
13091       self.op.ipolicy,
13092       ]
13093
13094     if all_changes.count(None) == len(all_changes):
13095       raise errors.OpPrereqError("Please pass at least one modification",
13096                                  errors.ECODE_INVAL)
13097
13098   def ExpandNames(self):
13099     # This raises errors.OpPrereqError on its own:
13100     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13101
13102     self.needed_locks = {
13103       locking.LEVEL_NODEGROUP: [self.group_uuid],
13104       }
13105
13106   def CheckPrereq(self):
13107     """Check prerequisites.
13108
13109     """
13110     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13111
13112     if self.group is None:
13113       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13114                                (self.op.group_name, self.group_uuid))
13115
13116     if self.op.ndparams:
13117       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13118       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13119       self.new_ndparams = new_ndparams
13120
13121     if self.op.diskparams:
13122       self.new_diskparams = dict()
13123       for templ in constants.DISK_TEMPLATES:
13124         if templ not in self.op.diskparams:
13125           self.op.diskparams[templ] = {}
13126         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13127                                              self.op.diskparams[templ])
13128         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13129         self.new_diskparams[templ] = new_templ_params
13130
13131     if self.op.hv_state:
13132       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13133                                                  self.group.hv_state_static)
13134
13135     if self.op.disk_state:
13136       self.new_disk_state = \
13137         _MergeAndVerifyDiskState(self.op.disk_state,
13138                                  self.group.disk_state_static)
13139
13140     if self.op.ipolicy:
13141       g_ipolicy = {}
13142       for key, value in self.op.ipolicy.iteritems():
13143         g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13144                                            value,
13145                                            use_none=True)
13146         utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13147       self.new_ipolicy = g_ipolicy
13148       objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13149
13150   def BuildHooksEnv(self):
13151     """Build hooks env.
13152
13153     """
13154     return {
13155       "GROUP_NAME": self.op.group_name,
13156       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13157       }
13158
13159   def BuildHooksNodes(self):
13160     """Build hooks nodes.
13161
13162     """
13163     mn = self.cfg.GetMasterNode()
13164     return ([mn], [mn])
13165
13166   def Exec(self, feedback_fn):
13167     """Modifies the node group.
13168
13169     """
13170     result = []
13171
13172     if self.op.ndparams:
13173       self.group.ndparams = self.new_ndparams
13174       result.append(("ndparams", str(self.group.ndparams)))
13175
13176     if self.op.diskparams:
13177       self.group.diskparams = self.new_diskparams
13178       result.append(("diskparams", str(self.group.diskparams)))
13179
13180     if self.op.alloc_policy:
13181       self.group.alloc_policy = self.op.alloc_policy
13182
13183     if self.op.hv_state:
13184       self.group.hv_state_static = self.new_hv_state
13185
13186     if self.op.disk_state:
13187       self.group.disk_state_static = self.new_disk_state
13188
13189     if self.op.ipolicy:
13190       self.group.ipolicy = self.new_ipolicy
13191
13192     self.cfg.Update(self.group, feedback_fn)
13193     return result
13194
13195
13196 class LUGroupRemove(LogicalUnit):
13197   HPATH = "group-remove"
13198   HTYPE = constants.HTYPE_GROUP
13199   REQ_BGL = False
13200
13201   def ExpandNames(self):
13202     # This will raises errors.OpPrereqError on its own:
13203     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13204     self.needed_locks = {
13205       locking.LEVEL_NODEGROUP: [self.group_uuid],
13206       }
13207
13208   def CheckPrereq(self):
13209     """Check prerequisites.
13210
13211     This checks that the given group name exists as a node group, that is
13212     empty (i.e., contains no nodes), and that is not the last group of the
13213     cluster.
13214
13215     """
13216     # Verify that the group is empty.
13217     group_nodes = [node.name
13218                    for node in self.cfg.GetAllNodesInfo().values()
13219                    if node.group == self.group_uuid]
13220
13221     if group_nodes:
13222       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13223                                  " nodes: %s" %
13224                                  (self.op.group_name,
13225                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13226                                  errors.ECODE_STATE)
13227
13228     # Verify the cluster would not be left group-less.
13229     if len(self.cfg.GetNodeGroupList()) == 1:
13230       raise errors.OpPrereqError("Group '%s' is the only group,"
13231                                  " cannot be removed" %
13232                                  self.op.group_name,
13233                                  errors.ECODE_STATE)
13234
13235   def BuildHooksEnv(self):
13236     """Build hooks env.
13237
13238     """
13239     return {
13240       "GROUP_NAME": self.op.group_name,
13241       }
13242
13243   def BuildHooksNodes(self):
13244     """Build hooks nodes.
13245
13246     """
13247     mn = self.cfg.GetMasterNode()
13248     return ([mn], [mn])
13249
13250   def Exec(self, feedback_fn):
13251     """Remove the node group.
13252
13253     """
13254     try:
13255       self.cfg.RemoveNodeGroup(self.group_uuid)
13256     except errors.ConfigurationError:
13257       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13258                                (self.op.group_name, self.group_uuid))
13259
13260     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13261
13262
13263 class LUGroupRename(LogicalUnit):
13264   HPATH = "group-rename"
13265   HTYPE = constants.HTYPE_GROUP
13266   REQ_BGL = False
13267
13268   def ExpandNames(self):
13269     # This raises errors.OpPrereqError on its own:
13270     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13271
13272     self.needed_locks = {
13273       locking.LEVEL_NODEGROUP: [self.group_uuid],
13274       }
13275
13276   def CheckPrereq(self):
13277     """Check prerequisites.
13278
13279     Ensures requested new name is not yet used.
13280
13281     """
13282     try:
13283       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13284     except errors.OpPrereqError:
13285       pass
13286     else:
13287       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13288                                  " node group (UUID: %s)" %
13289                                  (self.op.new_name, new_name_uuid),
13290                                  errors.ECODE_EXISTS)
13291
13292   def BuildHooksEnv(self):
13293     """Build hooks env.
13294
13295     """
13296     return {
13297       "OLD_NAME": self.op.group_name,
13298       "NEW_NAME": self.op.new_name,
13299       }
13300
13301   def BuildHooksNodes(self):
13302     """Build hooks nodes.
13303
13304     """
13305     mn = self.cfg.GetMasterNode()
13306
13307     all_nodes = self.cfg.GetAllNodesInfo()
13308     all_nodes.pop(mn, None)
13309
13310     run_nodes = [mn]
13311     run_nodes.extend(node.name for node in all_nodes.values()
13312                      if node.group == self.group_uuid)
13313
13314     return (run_nodes, run_nodes)
13315
13316   def Exec(self, feedback_fn):
13317     """Rename the node group.
13318
13319     """
13320     group = self.cfg.GetNodeGroup(self.group_uuid)
13321
13322     if group is None:
13323       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13324                                (self.op.group_name, self.group_uuid))
13325
13326     group.name = self.op.new_name
13327     self.cfg.Update(group, feedback_fn)
13328
13329     return self.op.new_name
13330
13331
13332 class LUGroupEvacuate(LogicalUnit):
13333   HPATH = "group-evacuate"
13334   HTYPE = constants.HTYPE_GROUP
13335   REQ_BGL = False
13336
13337   def ExpandNames(self):
13338     # This raises errors.OpPrereqError on its own:
13339     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13340
13341     if self.op.target_groups:
13342       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13343                                   self.op.target_groups)
13344     else:
13345       self.req_target_uuids = []
13346
13347     if self.group_uuid in self.req_target_uuids:
13348       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13349                                  " as a target group (targets are %s)" %
13350                                  (self.group_uuid,
13351                                   utils.CommaJoin(self.req_target_uuids)),
13352                                  errors.ECODE_INVAL)
13353
13354     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13355
13356     self.share_locks = _ShareAll()
13357     self.needed_locks = {
13358       locking.LEVEL_INSTANCE: [],
13359       locking.LEVEL_NODEGROUP: [],
13360       locking.LEVEL_NODE: [],
13361       }
13362
13363   def DeclareLocks(self, level):
13364     if level == locking.LEVEL_INSTANCE:
13365       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13366
13367       # Lock instances optimistically, needs verification once node and group
13368       # locks have been acquired
13369       self.needed_locks[locking.LEVEL_INSTANCE] = \
13370         self.cfg.GetNodeGroupInstances(self.group_uuid)
13371
13372     elif level == locking.LEVEL_NODEGROUP:
13373       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13374
13375       if self.req_target_uuids:
13376         lock_groups = set([self.group_uuid] + self.req_target_uuids)
13377
13378         # Lock all groups used by instances optimistically; this requires going
13379         # via the node before it's locked, requiring verification later on
13380         lock_groups.update(group_uuid
13381                            for instance_name in
13382                              self.owned_locks(locking.LEVEL_INSTANCE)
13383                            for group_uuid in
13384                              self.cfg.GetInstanceNodeGroups(instance_name))
13385       else:
13386         # No target groups, need to lock all of them
13387         lock_groups = locking.ALL_SET
13388
13389       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13390
13391     elif level == locking.LEVEL_NODE:
13392       # This will only lock the nodes in the group to be evacuated which
13393       # contain actual instances
13394       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13395       self._LockInstancesNodes()
13396
13397       # Lock all nodes in group to be evacuated and target groups
13398       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13399       assert self.group_uuid in owned_groups
13400       member_nodes = [node_name
13401                       for group in owned_groups
13402                       for node_name in self.cfg.GetNodeGroup(group).members]
13403       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13404
13405   def CheckPrereq(self):
13406     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13407     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13408     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13409
13410     assert owned_groups.issuperset(self.req_target_uuids)
13411     assert self.group_uuid in owned_groups
13412
13413     # Check if locked instances are still correct
13414     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13415
13416     # Get instance information
13417     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13418
13419     # Check if node groups for locked instances are still correct
13420     for instance_name in owned_instances:
13421       inst = self.instances[instance_name]
13422       assert owned_nodes.issuperset(inst.all_nodes), \
13423         "Instance %s's nodes changed while we kept the lock" % instance_name
13424
13425       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13426                                              owned_groups)
13427
13428       assert self.group_uuid in inst_groups, \
13429         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13430
13431     if self.req_target_uuids:
13432       # User requested specific target groups
13433       self.target_uuids = self.req_target_uuids
13434     else:
13435       # All groups except the one to be evacuated are potential targets
13436       self.target_uuids = [group_uuid for group_uuid in owned_groups
13437                            if group_uuid != self.group_uuid]
13438
13439       if not self.target_uuids:
13440         raise errors.OpPrereqError("There are no possible target groups",
13441                                    errors.ECODE_INVAL)
13442
13443   def BuildHooksEnv(self):
13444     """Build hooks env.
13445
13446     """
13447     return {
13448       "GROUP_NAME": self.op.group_name,
13449       "TARGET_GROUPS": " ".join(self.target_uuids),
13450       }
13451
13452   def BuildHooksNodes(self):
13453     """Build hooks nodes.
13454
13455     """
13456     mn = self.cfg.GetMasterNode()
13457
13458     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13459
13460     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13461
13462     return (run_nodes, run_nodes)
13463
13464   def Exec(self, feedback_fn):
13465     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13466
13467     assert self.group_uuid not in self.target_uuids
13468
13469     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13470                      instances=instances, target_groups=self.target_uuids)
13471
13472     ial.Run(self.op.iallocator)
13473
13474     if not ial.success:
13475       raise errors.OpPrereqError("Can't compute group evacuation using"
13476                                  " iallocator '%s': %s" %
13477                                  (self.op.iallocator, ial.info),
13478                                  errors.ECODE_NORES)
13479
13480     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13481
13482     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13483                  len(jobs), self.op.group_name)
13484
13485     return ResultWithJobs(jobs)
13486
13487
13488 class TagsLU(NoHooksLU): # pylint: disable=W0223
13489   """Generic tags LU.
13490
13491   This is an abstract class which is the parent of all the other tags LUs.
13492
13493   """
13494   def ExpandNames(self):
13495     self.group_uuid = None
13496     self.needed_locks = {}
13497     if self.op.kind == constants.TAG_NODE:
13498       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13499       self.needed_locks[locking.LEVEL_NODE] = self.op.name
13500     elif self.op.kind == constants.TAG_INSTANCE:
13501       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13502       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13503     elif self.op.kind == constants.TAG_NODEGROUP:
13504       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13505
13506     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13507     # not possible to acquire the BGL based on opcode parameters)
13508
13509   def CheckPrereq(self):
13510     """Check prerequisites.
13511
13512     """
13513     if self.op.kind == constants.TAG_CLUSTER:
13514       self.target = self.cfg.GetClusterInfo()
13515     elif self.op.kind == constants.TAG_NODE:
13516       self.target = self.cfg.GetNodeInfo(self.op.name)
13517     elif self.op.kind == constants.TAG_INSTANCE:
13518       self.target = self.cfg.GetInstanceInfo(self.op.name)
13519     elif self.op.kind == constants.TAG_NODEGROUP:
13520       self.target = self.cfg.GetNodeGroup(self.group_uuid)
13521     else:
13522       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13523                                  str(self.op.kind), errors.ECODE_INVAL)
13524
13525
13526 class LUTagsGet(TagsLU):
13527   """Returns the tags of a given object.
13528
13529   """
13530   REQ_BGL = False
13531
13532   def ExpandNames(self):
13533     TagsLU.ExpandNames(self)
13534
13535     # Share locks as this is only a read operation
13536     self.share_locks = _ShareAll()
13537
13538   def Exec(self, feedback_fn):
13539     """Returns the tag list.
13540
13541     """
13542     return list(self.target.GetTags())
13543
13544
13545 class LUTagsSearch(NoHooksLU):
13546   """Searches the tags for a given pattern.
13547
13548   """
13549   REQ_BGL = False
13550
13551   def ExpandNames(self):
13552     self.needed_locks = {}
13553
13554   def CheckPrereq(self):
13555     """Check prerequisites.
13556
13557     This checks the pattern passed for validity by compiling it.
13558
13559     """
13560     try:
13561       self.re = re.compile(self.op.pattern)
13562     except re.error, err:
13563       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13564                                  (self.op.pattern, err), errors.ECODE_INVAL)
13565
13566   def Exec(self, feedback_fn):
13567     """Returns the tag list.
13568
13569     """
13570     cfg = self.cfg
13571     tgts = [("/cluster", cfg.GetClusterInfo())]
13572     ilist = cfg.GetAllInstancesInfo().values()
13573     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13574     nlist = cfg.GetAllNodesInfo().values()
13575     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13576     tgts.extend(("/nodegroup/%s" % n.name, n)
13577                 for n in cfg.GetAllNodeGroupsInfo().values())
13578     results = []
13579     for path, target in tgts:
13580       for tag in target.GetTags():
13581         if self.re.search(tag):
13582           results.append((path, tag))
13583     return results
13584
13585
13586 class LUTagsSet(TagsLU):
13587   """Sets a tag on a given object.
13588
13589   """
13590   REQ_BGL = False
13591
13592   def CheckPrereq(self):
13593     """Check prerequisites.
13594
13595     This checks the type and length of the tag name and value.
13596
13597     """
13598     TagsLU.CheckPrereq(self)
13599     for tag in self.op.tags:
13600       objects.TaggableObject.ValidateTag(tag)
13601
13602   def Exec(self, feedback_fn):
13603     """Sets the tag.
13604
13605     """
13606     try:
13607       for tag in self.op.tags:
13608         self.target.AddTag(tag)
13609     except errors.TagError, err:
13610       raise errors.OpExecError("Error while setting tag: %s" % str(err))
13611     self.cfg.Update(self.target, feedback_fn)
13612
13613
13614 class LUTagsDel(TagsLU):
13615   """Delete a list of tags from a given object.
13616
13617   """
13618   REQ_BGL = False
13619
13620   def CheckPrereq(self):
13621     """Check prerequisites.
13622
13623     This checks that we have the given tag.
13624
13625     """
13626     TagsLU.CheckPrereq(self)
13627     for tag in self.op.tags:
13628       objects.TaggableObject.ValidateTag(tag)
13629     del_tags = frozenset(self.op.tags)
13630     cur_tags = self.target.GetTags()
13631
13632     diff_tags = del_tags - cur_tags
13633     if diff_tags:
13634       diff_names = ("'%s'" % i for i in sorted(diff_tags))
13635       raise errors.OpPrereqError("Tag(s) %s not found" %
13636                                  (utils.CommaJoin(diff_names), ),
13637                                  errors.ECODE_NOENT)
13638
13639   def Exec(self, feedback_fn):
13640     """Remove the tag from the object.
13641
13642     """
13643     for tag in self.op.tags:
13644       self.target.RemoveTag(tag)
13645     self.cfg.Update(self.target, feedback_fn)
13646
13647
13648 class LUTestDelay(NoHooksLU):
13649   """Sleep for a specified amount of time.
13650
13651   This LU sleeps on the master and/or nodes for a specified amount of
13652   time.
13653
13654   """
13655   REQ_BGL = False
13656
13657   def ExpandNames(self):
13658     """Expand names and set required locks.
13659
13660     This expands the node list, if any.
13661
13662     """
13663     self.needed_locks = {}
13664     if self.op.on_nodes:
13665       # _GetWantedNodes can be used here, but is not always appropriate to use
13666       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13667       # more information.
13668       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13669       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13670
13671   def _TestDelay(self):
13672     """Do the actual sleep.
13673
13674     """
13675     if self.op.on_master:
13676       if not utils.TestDelay(self.op.duration):
13677         raise errors.OpExecError("Error during master delay test")
13678     if self.op.on_nodes:
13679       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13680       for node, node_result in result.items():
13681         node_result.Raise("Failure during rpc call to node %s" % node)
13682
13683   def Exec(self, feedback_fn):
13684     """Execute the test delay opcode, with the wanted repetitions.
13685
13686     """
13687     if self.op.repeat == 0:
13688       self._TestDelay()
13689     else:
13690       top_value = self.op.repeat - 1
13691       for i in range(self.op.repeat):
13692         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13693         self._TestDelay()
13694
13695
13696 class LUTestJqueue(NoHooksLU):
13697   """Utility LU to test some aspects of the job queue.
13698
13699   """
13700   REQ_BGL = False
13701
13702   # Must be lower than default timeout for WaitForJobChange to see whether it
13703   # notices changed jobs
13704   _CLIENT_CONNECT_TIMEOUT = 20.0
13705   _CLIENT_CONFIRM_TIMEOUT = 60.0
13706
13707   @classmethod
13708   def _NotifyUsingSocket(cls, cb, errcls):
13709     """Opens a Unix socket and waits for another program to connect.
13710
13711     @type cb: callable
13712     @param cb: Callback to send socket name to client
13713     @type errcls: class
13714     @param errcls: Exception class to use for errors
13715
13716     """
13717     # Using a temporary directory as there's no easy way to create temporary
13718     # sockets without writing a custom loop around tempfile.mktemp and
13719     # socket.bind
13720     tmpdir = tempfile.mkdtemp()
13721     try:
13722       tmpsock = utils.PathJoin(tmpdir, "sock")
13723
13724       logging.debug("Creating temporary socket at %s", tmpsock)
13725       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13726       try:
13727         sock.bind(tmpsock)
13728         sock.listen(1)
13729
13730         # Send details to client
13731         cb(tmpsock)
13732
13733         # Wait for client to connect before continuing
13734         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13735         try:
13736           (conn, _) = sock.accept()
13737         except socket.error, err:
13738           raise errcls("Client didn't connect in time (%s)" % err)
13739       finally:
13740         sock.close()
13741     finally:
13742       # Remove as soon as client is connected
13743       shutil.rmtree(tmpdir)
13744
13745     # Wait for client to close
13746     try:
13747       try:
13748         # pylint: disable=E1101
13749         # Instance of '_socketobject' has no ... member
13750         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13751         conn.recv(1)
13752       except socket.error, err:
13753         raise errcls("Client failed to confirm notification (%s)" % err)
13754     finally:
13755       conn.close()
13756
13757   def _SendNotification(self, test, arg, sockname):
13758     """Sends a notification to the client.
13759
13760     @type test: string
13761     @param test: Test name
13762     @param arg: Test argument (depends on test)
13763     @type sockname: string
13764     @param sockname: Socket path
13765
13766     """
13767     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13768
13769   def _Notify(self, prereq, test, arg):
13770     """Notifies the client of a test.
13771
13772     @type prereq: bool
13773     @param prereq: Whether this is a prereq-phase test
13774     @type test: string
13775     @param test: Test name
13776     @param arg: Test argument (depends on test)
13777
13778     """
13779     if prereq:
13780       errcls = errors.OpPrereqError
13781     else:
13782       errcls = errors.OpExecError
13783
13784     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13785                                                   test, arg),
13786                                    errcls)
13787
13788   def CheckArguments(self):
13789     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13790     self.expandnames_calls = 0
13791
13792   def ExpandNames(self):
13793     checkargs_calls = getattr(self, "checkargs_calls", 0)
13794     if checkargs_calls < 1:
13795       raise errors.ProgrammerError("CheckArguments was not called")
13796
13797     self.expandnames_calls += 1
13798
13799     if self.op.notify_waitlock:
13800       self._Notify(True, constants.JQT_EXPANDNAMES, None)
13801
13802     self.LogInfo("Expanding names")
13803
13804     # Get lock on master node (just to get a lock, not for a particular reason)
13805     self.needed_locks = {
13806       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13807       }
13808
13809   def Exec(self, feedback_fn):
13810     if self.expandnames_calls < 1:
13811       raise errors.ProgrammerError("ExpandNames was not called")
13812
13813     if self.op.notify_exec:
13814       self._Notify(False, constants.JQT_EXEC, None)
13815
13816     self.LogInfo("Executing")
13817
13818     if self.op.log_messages:
13819       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13820       for idx, msg in enumerate(self.op.log_messages):
13821         self.LogInfo("Sending log message %s", idx + 1)
13822         feedback_fn(constants.JQT_MSGPREFIX + msg)
13823         # Report how many test messages have been sent
13824         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13825
13826     if self.op.fail:
13827       raise errors.OpExecError("Opcode failure was requested")
13828
13829     return True
13830
13831
13832 class IAllocator(object):
13833   """IAllocator framework.
13834
13835   An IAllocator instance has three sets of attributes:
13836     - cfg that is needed to query the cluster
13837     - input data (all members of the _KEYS class attribute are required)
13838     - four buffer attributes (in|out_data|text), that represent the
13839       input (to the external script) in text and data structure format,
13840       and the output from it, again in two formats
13841     - the result variables from the script (success, info, nodes) for
13842       easy usage
13843
13844   """
13845   # pylint: disable=R0902
13846   # lots of instance attributes
13847
13848   def __init__(self, cfg, rpc_runner, mode, **kwargs):
13849     self.cfg = cfg
13850     self.rpc = rpc_runner
13851     # init buffer variables
13852     self.in_text = self.out_text = self.in_data = self.out_data = None
13853     # init all input fields so that pylint is happy
13854     self.mode = mode
13855     self.memory = self.disks = self.disk_template = None
13856     self.os = self.tags = self.nics = self.vcpus = None
13857     self.hypervisor = None
13858     self.relocate_from = None
13859     self.name = None
13860     self.instances = None
13861     self.evac_mode = None
13862     self.target_groups = []
13863     # computed fields
13864     self.required_nodes = None
13865     # init result fields
13866     self.success = self.info = self.result = None
13867
13868     try:
13869       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13870     except KeyError:
13871       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13872                                    " IAllocator" % self.mode)
13873
13874     keyset = [n for (n, _) in keydata]
13875
13876     for key in kwargs:
13877       if key not in keyset:
13878         raise errors.ProgrammerError("Invalid input parameter '%s' to"
13879                                      " IAllocator" % key)
13880       setattr(self, key, kwargs[key])
13881
13882     for key in keyset:
13883       if key not in kwargs:
13884         raise errors.ProgrammerError("Missing input parameter '%s' to"
13885                                      " IAllocator" % key)
13886     self._BuildInputData(compat.partial(fn, self), keydata)
13887
13888   def _ComputeClusterData(self):
13889     """Compute the generic allocator input data.
13890
13891     This is the data that is independent of the actual operation.
13892
13893     """
13894     cfg = self.cfg
13895     cluster_info = cfg.GetClusterInfo()
13896     # cluster data
13897     data = {
13898       "version": constants.IALLOCATOR_VERSION,
13899       "cluster_name": cfg.GetClusterName(),
13900       "cluster_tags": list(cluster_info.GetTags()),
13901       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13902       # we don't have job IDs
13903       }
13904     ninfo = cfg.GetAllNodesInfo()
13905     iinfo = cfg.GetAllInstancesInfo().values()
13906     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13907
13908     # node data
13909     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13910
13911     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13912       hypervisor_name = self.hypervisor
13913     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13914       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13915     else:
13916       hypervisor_name = cluster_info.primary_hypervisor
13917
13918     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13919                                         [hypervisor_name])
13920     node_iinfo = \
13921       self.rpc.call_all_instances_info(node_list,
13922                                        cluster_info.enabled_hypervisors)
13923
13924     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13925
13926     config_ndata = self._ComputeBasicNodeData(ninfo)
13927     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13928                                                  i_list, config_ndata)
13929     assert len(data["nodes"]) == len(ninfo), \
13930         "Incomplete node data computed"
13931
13932     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13933
13934     self.in_data = data
13935
13936   @staticmethod
13937   def _ComputeNodeGroupData(cfg):
13938     """Compute node groups data.
13939
13940     """
13941     ng = dict((guuid, {
13942       "name": gdata.name,
13943       "alloc_policy": gdata.alloc_policy,
13944       })
13945       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13946
13947     return ng
13948
13949   @staticmethod
13950   def _ComputeBasicNodeData(node_cfg):
13951     """Compute global node data.
13952
13953     @rtype: dict
13954     @returns: a dict of name: (node dict, node config)
13955
13956     """
13957     # fill in static (config-based) values
13958     node_results = dict((ninfo.name, {
13959       "tags": list(ninfo.GetTags()),
13960       "primary_ip": ninfo.primary_ip,
13961       "secondary_ip": ninfo.secondary_ip,
13962       "offline": ninfo.offline,
13963       "drained": ninfo.drained,
13964       "master_candidate": ninfo.master_candidate,
13965       "group": ninfo.group,
13966       "master_capable": ninfo.master_capable,
13967       "vm_capable": ninfo.vm_capable,
13968       })
13969       for ninfo in node_cfg.values())
13970
13971     return node_results
13972
13973   @staticmethod
13974   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13975                               node_results):
13976     """Compute global node data.
13977
13978     @param node_results: the basic node structures as filled from the config
13979
13980     """
13981     #TODO(dynmem): compute the right data on MAX and MIN memory
13982     # make a copy of the current dict
13983     node_results = dict(node_results)
13984     for nname, nresult in node_data.items():
13985       assert nname in node_results, "Missing basic data for node %s" % nname
13986       ninfo = node_cfg[nname]
13987
13988       if not (ninfo.offline or ninfo.drained):
13989         nresult.Raise("Can't get data for node %s" % nname)
13990         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13991                                 nname)
13992         remote_info = _MakeLegacyNodeInfo(nresult.payload)
13993
13994         for attr in ["memory_total", "memory_free", "memory_dom0",
13995                      "vg_size", "vg_free", "cpu_total"]:
13996           if attr not in remote_info:
13997             raise errors.OpExecError("Node '%s' didn't return attribute"
13998                                      " '%s'" % (nname, attr))
13999           if not isinstance(remote_info[attr], int):
14000             raise errors.OpExecError("Node '%s' returned invalid value"
14001                                      " for '%s': %s" %
14002                                      (nname, attr, remote_info[attr]))
14003         # compute memory used by primary instances
14004         i_p_mem = i_p_up_mem = 0
14005         for iinfo, beinfo in i_list:
14006           if iinfo.primary_node == nname:
14007             i_p_mem += beinfo[constants.BE_MAXMEM]
14008             if iinfo.name not in node_iinfo[nname].payload:
14009               i_used_mem = 0
14010             else:
14011               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14012             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14013             remote_info["memory_free"] -= max(0, i_mem_diff)
14014
14015             if iinfo.admin_state == constants.ADMINST_UP:
14016               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14017
14018         # compute memory used by instances
14019         pnr_dyn = {
14020           "total_memory": remote_info["memory_total"],
14021           "reserved_memory": remote_info["memory_dom0"],
14022           "free_memory": remote_info["memory_free"],
14023           "total_disk": remote_info["vg_size"],
14024           "free_disk": remote_info["vg_free"],
14025           "total_cpus": remote_info["cpu_total"],
14026           "i_pri_memory": i_p_mem,
14027           "i_pri_up_memory": i_p_up_mem,
14028           }
14029         pnr_dyn.update(node_results[nname])
14030         node_results[nname] = pnr_dyn
14031
14032     return node_results
14033
14034   @staticmethod
14035   def _ComputeInstanceData(cluster_info, i_list):
14036     """Compute global instance data.
14037
14038     """
14039     instance_data = {}
14040     for iinfo, beinfo in i_list:
14041       nic_data = []
14042       for nic in iinfo.nics:
14043         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14044         nic_dict = {
14045           "mac": nic.mac,
14046           "ip": nic.ip,
14047           "mode": filled_params[constants.NIC_MODE],
14048           "link": filled_params[constants.NIC_LINK],
14049           }
14050         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14051           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14052         nic_data.append(nic_dict)
14053       pir = {
14054         "tags": list(iinfo.GetTags()),
14055         "admin_state": iinfo.admin_state,
14056         "vcpus": beinfo[constants.BE_VCPUS],
14057         "memory": beinfo[constants.BE_MAXMEM],
14058         "os": iinfo.os,
14059         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14060         "nics": nic_data,
14061         "disks": [{constants.IDISK_SIZE: dsk.size,
14062                    constants.IDISK_MODE: dsk.mode}
14063                   for dsk in iinfo.disks],
14064         "disk_template": iinfo.disk_template,
14065         "hypervisor": iinfo.hypervisor,
14066         }
14067       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14068                                                  pir["disks"])
14069       instance_data[iinfo.name] = pir
14070
14071     return instance_data
14072
14073   def _AddNewInstance(self):
14074     """Add new instance data to allocator structure.
14075
14076     This in combination with _AllocatorGetClusterData will create the
14077     correct structure needed as input for the allocator.
14078
14079     The checks for the completeness of the opcode must have already been
14080     done.
14081
14082     """
14083     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14084
14085     if self.disk_template in constants.DTS_INT_MIRROR:
14086       self.required_nodes = 2
14087     else:
14088       self.required_nodes = 1
14089
14090     request = {
14091       "name": self.name,
14092       "disk_template": self.disk_template,
14093       "tags": self.tags,
14094       "os": self.os,
14095       "vcpus": self.vcpus,
14096       "memory": self.memory,
14097       "disks": self.disks,
14098       "disk_space_total": disk_space,
14099       "nics": self.nics,
14100       "required_nodes": self.required_nodes,
14101       "hypervisor": self.hypervisor,
14102       }
14103
14104     return request
14105
14106   def _AddRelocateInstance(self):
14107     """Add relocate instance data to allocator structure.
14108
14109     This in combination with _IAllocatorGetClusterData will create the
14110     correct structure needed as input for the allocator.
14111
14112     The checks for the completeness of the opcode must have already been
14113     done.
14114
14115     """
14116     instance = self.cfg.GetInstanceInfo(self.name)
14117     if instance is None:
14118       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14119                                    " IAllocator" % self.name)
14120
14121     if instance.disk_template not in constants.DTS_MIRRORED:
14122       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14123                                  errors.ECODE_INVAL)
14124
14125     if instance.disk_template in constants.DTS_INT_MIRROR and \
14126         len(instance.secondary_nodes) != 1:
14127       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14128                                  errors.ECODE_STATE)
14129
14130     self.required_nodes = 1
14131     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14132     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14133
14134     request = {
14135       "name": self.name,
14136       "disk_space_total": disk_space,
14137       "required_nodes": self.required_nodes,
14138       "relocate_from": self.relocate_from,
14139       }
14140     return request
14141
14142   def _AddNodeEvacuate(self):
14143     """Get data for node-evacuate requests.
14144
14145     """
14146     return {
14147       "instances": self.instances,
14148       "evac_mode": self.evac_mode,
14149       }
14150
14151   def _AddChangeGroup(self):
14152     """Get data for node-evacuate requests.
14153
14154     """
14155     return {
14156       "instances": self.instances,
14157       "target_groups": self.target_groups,
14158       }
14159
14160   def _BuildInputData(self, fn, keydata):
14161     """Build input data structures.
14162
14163     """
14164     self._ComputeClusterData()
14165
14166     request = fn()
14167     request["type"] = self.mode
14168     for keyname, keytype in keydata:
14169       if keyname not in request:
14170         raise errors.ProgrammerError("Request parameter %s is missing" %
14171                                      keyname)
14172       val = request[keyname]
14173       if not keytype(val):
14174         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14175                                      " validation, value %s, expected"
14176                                      " type %s" % (keyname, val, keytype))
14177     self.in_data["request"] = request
14178
14179     self.in_text = serializer.Dump(self.in_data)
14180
14181   _STRING_LIST = ht.TListOf(ht.TString)
14182   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14183      # pylint: disable=E1101
14184      # Class '...' has no 'OP_ID' member
14185      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14186                           opcodes.OpInstanceMigrate.OP_ID,
14187                           opcodes.OpInstanceReplaceDisks.OP_ID])
14188      })))
14189
14190   _NEVAC_MOVED = \
14191     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14192                        ht.TItems([ht.TNonEmptyString,
14193                                   ht.TNonEmptyString,
14194                                   ht.TListOf(ht.TNonEmptyString),
14195                                  ])))
14196   _NEVAC_FAILED = \
14197     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14198                        ht.TItems([ht.TNonEmptyString,
14199                                   ht.TMaybeString,
14200                                  ])))
14201   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14202                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14203
14204   _MODE_DATA = {
14205     constants.IALLOCATOR_MODE_ALLOC:
14206       (_AddNewInstance,
14207        [
14208         ("name", ht.TString),
14209         ("memory", ht.TInt),
14210         ("disks", ht.TListOf(ht.TDict)),
14211         ("disk_template", ht.TString),
14212         ("os", ht.TString),
14213         ("tags", _STRING_LIST),
14214         ("nics", ht.TListOf(ht.TDict)),
14215         ("vcpus", ht.TInt),
14216         ("hypervisor", ht.TString),
14217         ], ht.TList),
14218     constants.IALLOCATOR_MODE_RELOC:
14219       (_AddRelocateInstance,
14220        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14221        ht.TList),
14222      constants.IALLOCATOR_MODE_NODE_EVAC:
14223       (_AddNodeEvacuate, [
14224         ("instances", _STRING_LIST),
14225         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14226         ], _NEVAC_RESULT),
14227      constants.IALLOCATOR_MODE_CHG_GROUP:
14228       (_AddChangeGroup, [
14229         ("instances", _STRING_LIST),
14230         ("target_groups", _STRING_LIST),
14231         ], _NEVAC_RESULT),
14232     }
14233
14234   def Run(self, name, validate=True, call_fn=None):
14235     """Run an instance allocator and return the results.
14236
14237     """
14238     if call_fn is None:
14239       call_fn = self.rpc.call_iallocator_runner
14240
14241     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14242     result.Raise("Failure while running the iallocator script")
14243
14244     self.out_text = result.payload
14245     if validate:
14246       self._ValidateResult()
14247
14248   def _ValidateResult(self):
14249     """Process the allocator results.
14250
14251     This will process and if successful save the result in
14252     self.out_data and the other parameters.
14253
14254     """
14255     try:
14256       rdict = serializer.Load(self.out_text)
14257     except Exception, err:
14258       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14259
14260     if not isinstance(rdict, dict):
14261       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14262
14263     # TODO: remove backwards compatiblity in later versions
14264     if "nodes" in rdict and "result" not in rdict:
14265       rdict["result"] = rdict["nodes"]
14266       del rdict["nodes"]
14267
14268     for key in "success", "info", "result":
14269       if key not in rdict:
14270         raise errors.OpExecError("Can't parse iallocator results:"
14271                                  " missing key '%s'" % key)
14272       setattr(self, key, rdict[key])
14273
14274     if not self._result_check(self.result):
14275       raise errors.OpExecError("Iallocator returned invalid result,"
14276                                " expected %s, got %s" %
14277                                (self._result_check, self.result),
14278                                errors.ECODE_INVAL)
14279
14280     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14281       assert self.relocate_from is not None
14282       assert self.required_nodes == 1
14283
14284       node2group = dict((name, ndata["group"])
14285                         for (name, ndata) in self.in_data["nodes"].items())
14286
14287       fn = compat.partial(self._NodesToGroups, node2group,
14288                           self.in_data["nodegroups"])
14289
14290       instance = self.cfg.GetInstanceInfo(self.name)
14291       request_groups = fn(self.relocate_from + [instance.primary_node])
14292       result_groups = fn(rdict["result"] + [instance.primary_node])
14293
14294       if self.success and not set(result_groups).issubset(request_groups):
14295         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14296                                  " differ from original groups (%s)" %
14297                                  (utils.CommaJoin(result_groups),
14298                                   utils.CommaJoin(request_groups)))
14299
14300     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14301       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14302
14303     self.out_data = rdict
14304
14305   @staticmethod
14306   def _NodesToGroups(node2group, groups, nodes):
14307     """Returns a list of unique group names for a list of nodes.
14308
14309     @type node2group: dict
14310     @param node2group: Map from node name to group UUID
14311     @type groups: dict
14312     @param groups: Group information
14313     @type nodes: list
14314     @param nodes: Node names
14315
14316     """
14317     result = set()
14318
14319     for node in nodes:
14320       try:
14321         group_uuid = node2group[node]
14322       except KeyError:
14323         # Ignore unknown node
14324         pass
14325       else:
14326         try:
14327           group = groups[group_uuid]
14328         except KeyError:
14329           # Can't find group, let's use UUID
14330           group_name = group_uuid
14331         else:
14332           group_name = group["name"]
14333
14334         result.add(group_name)
14335
14336     return sorted(result)
14337
14338
14339 class LUTestAllocator(NoHooksLU):
14340   """Run allocator tests.
14341
14342   This LU runs the allocator tests
14343
14344   """
14345   def CheckPrereq(self):
14346     """Check prerequisites.
14347
14348     This checks the opcode parameters depending on the director and mode test.
14349
14350     """
14351     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14352       for attr in ["memory", "disks", "disk_template",
14353                    "os", "tags", "nics", "vcpus"]:
14354         if not hasattr(self.op, attr):
14355           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14356                                      attr, errors.ECODE_INVAL)
14357       iname = self.cfg.ExpandInstanceName(self.op.name)
14358       if iname is not None:
14359         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14360                                    iname, errors.ECODE_EXISTS)
14361       if not isinstance(self.op.nics, list):
14362         raise errors.OpPrereqError("Invalid parameter 'nics'",
14363                                    errors.ECODE_INVAL)
14364       if not isinstance(self.op.disks, list):
14365         raise errors.OpPrereqError("Invalid parameter 'disks'",
14366                                    errors.ECODE_INVAL)
14367       for row in self.op.disks:
14368         if (not isinstance(row, dict) or
14369             constants.IDISK_SIZE not in row or
14370             not isinstance(row[constants.IDISK_SIZE], int) or
14371             constants.IDISK_MODE not in row or
14372             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14373           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14374                                      " parameter", errors.ECODE_INVAL)
14375       if self.op.hypervisor is None:
14376         self.op.hypervisor = self.cfg.GetHypervisorType()
14377     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14378       fname = _ExpandInstanceName(self.cfg, self.op.name)
14379       self.op.name = fname
14380       self.relocate_from = \
14381           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14382     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14383                           constants.IALLOCATOR_MODE_NODE_EVAC):
14384       if not self.op.instances:
14385         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14386       self.op.instances = _GetWantedInstances(self, self.op.instances)
14387     else:
14388       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14389                                  self.op.mode, errors.ECODE_INVAL)
14390
14391     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14392       if self.op.allocator is None:
14393         raise errors.OpPrereqError("Missing allocator name",
14394                                    errors.ECODE_INVAL)
14395     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14396       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14397                                  self.op.direction, errors.ECODE_INVAL)
14398
14399   def Exec(self, feedback_fn):
14400     """Run the allocator test.
14401
14402     """
14403     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14404       ial = IAllocator(self.cfg, self.rpc,
14405                        mode=self.op.mode,
14406                        name=self.op.name,
14407                        memory=self.op.memory,
14408                        disks=self.op.disks,
14409                        disk_template=self.op.disk_template,
14410                        os=self.op.os,
14411                        tags=self.op.tags,
14412                        nics=self.op.nics,
14413                        vcpus=self.op.vcpus,
14414                        hypervisor=self.op.hypervisor,
14415                        )
14416     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14417       ial = IAllocator(self.cfg, self.rpc,
14418                        mode=self.op.mode,
14419                        name=self.op.name,
14420                        relocate_from=list(self.relocate_from),
14421                        )
14422     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14423       ial = IAllocator(self.cfg, self.rpc,
14424                        mode=self.op.mode,
14425                        instances=self.op.instances,
14426                        target_groups=self.op.target_groups)
14427     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14428       ial = IAllocator(self.cfg, self.rpc,
14429                        mode=self.op.mode,
14430                        instances=self.op.instances,
14431                        evac_mode=self.op.evac_mode)
14432     else:
14433       raise errors.ProgrammerError("Uncatched mode %s in"
14434                                    " LUTestAllocator.Exec", self.op.mode)
14435
14436     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14437       result = ial.in_text
14438     else:
14439       ial.Run(self.op.allocator, validate=False)
14440       result = ial.out_text
14441     return result
14442
14443
14444 #: Query type implementations
14445 _QUERY_IMPL = {
14446   constants.QR_INSTANCE: _InstanceQuery,
14447   constants.QR_NODE: _NodeQuery,
14448   constants.QR_GROUP: _GroupQuery,
14449   constants.QR_OS: _OsQuery,
14450   }
14451
14452 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14453
14454
14455 def _GetQueryImplementation(name):
14456   """Returns the implemtnation for a query type.
14457
14458   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14459
14460   """
14461   try:
14462     return _QUERY_IMPL[name]
14463   except KeyError:
14464     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14465                                errors.ECODE_INVAL)