code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_UP = [constants.ADMINST_UP]
  72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
  74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that level
 197       - don't put anything for the BGL level
 198       - if you want all locks at a level use locking.ALL_SET as a value
 199
 200     If you need to share locks (rather than acquire them exclusively) at one
 201     level you can modify self.share_locks, setting a true value (usually 1) for
 202     that level. By default locks are not shared.
 203
 204     This function can also define a list of tasklets, which then will be
 205     executed in order instead of the usual LU-level CheckPrereq and Exec
 206     functions, if those are not defined by the LU.
 207
 208     Examples::
 209
 210       # Acquire all nodes and one instance
 211       self.needed_locks = {
 212         locking.LEVEL_NODE: locking.ALL_SET,
 213         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 214       }
 215       # Acquire just two nodes
 216       self.needed_locks = {
 217         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 218       }
 219       # Acquire no locks
 220       self.needed_locks = {} # No, you can't leave it to the default value None
 221
 222     """
 223     # The implementation of this method is mandatory only if the new LU is
 224     # concurrent, so that old LUs don't need to be changed all at the same
 225     # time.
 226     if self.REQ_BGL:
 227       self.needed_locks = {} # Exclusive LUs don't need locks.
 228     else:
 229       raise NotImplementedError
 230
 231   def DeclareLocks(self, level):
 232     """Declare LU locking needs for a level
 233
 234     While most LUs can just declare their locking needs at ExpandNames time,
 235     sometimes there's the need to calculate some locks after having acquired
 236     the ones before. This function is called just before acquiring locks at a
 237     particular level, but after acquiring the ones at lower levels, and permits
 238     such calculations. It can be used to modify self.needed_locks, and by
 239     default it does nothing.
 240
 241     This function is only called if you have something already set in
 242     self.needed_locks for the level.
 243
 244     @param level: Locking level which is going to be locked
 245     @type level: member of ganeti.locking.LEVELS
 246
 247     """
 248
 249   def CheckPrereq(self):
 250     """Check prerequisites for this LU.
 251
 252     This method should check that the prerequisites for the execution
 253     of this LU are fulfilled. It can do internode communication, but
 254     it should be idempotent - no cluster or system changes are
 255     allowed.
 256
 257     The method should raise errors.OpPrereqError in case something is
 258     not fulfilled. Its return value is ignored.
 259
 260     This method should also update all the parameters of the opcode to
 261     their canonical form if it hasn't been done by ExpandNames before.
 262
 263     """
 264     if self.tasklets is not None:
 265       for (idx, tl) in enumerate(self.tasklets):
 266         logging.debug("Checking prerequisites for tasklet %s/%s",
 267                       idx + 1, len(self.tasklets))
 268         tl.CheckPrereq()
 269     else:
 270       pass
 271
 272   def Exec(self, feedback_fn):
 273     """Execute the LU.
 274
 275     This method should implement the actual work. It should raise
 276     errors.OpExecError for failures that are somewhat dealt with in
 277     code, or expected.
 278
 279     """
 280     if self.tasklets is not None:
 281       for (idx, tl) in enumerate(self.tasklets):
 282         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 283         tl.Exec(feedback_fn)
 284     else:
 285       raise NotImplementedError
 286
 287   def BuildHooksEnv(self):
 288     """Build hooks environment for this LU.
 289
 290     @rtype: dict
 291     @return: Dictionary containing the environment that will be used for
 292       running the hooks for this LU. The keys of the dict must not be prefixed
 293       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 294       will extend the environment with additional variables. If no environment
 295       should be defined, an empty dictionary should be returned (not C{None}).
 296     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 297       will not be called.
 298
 299     """
 300     raise NotImplementedError
 301
 302   def BuildHooksNodes(self):
 303     """Build list of nodes to run LU's hooks.
 304
 305     @rtype: tuple; (list, list)
 306     @return: Tuple containing a list of node names on which the hook
 307       should run before the execution and a list of node names on which the
 308       hook should run after the execution. No nodes should be returned as an
 309       empty list (and not None).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 317     """Notify the LU about the results of its hooks.
 318
 319     This method is called every time a hooks phase is executed, and notifies
 320     the Logical Unit about the hooks' result. The LU can then use it to alter
 321     its result based on the hooks.  By default the method does nothing and the
 322     previous result is passed back unchanged but any LU can define it if it
 323     wants to use the local cluster hook-scripts somehow.
 324
 325     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 326         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 327     @param hook_results: the results of the multi-node hooks rpc call
 328     @param feedback_fn: function used send feedback back to the caller
 329     @param lu_result: the previous Exec result this LU had, or None
 330         in the PRE phase
 331     @return: the new Exec result, based on the previous result
 332         and hook results
 333
 334     """
 335     # API must be kept, thus we ignore the unused argument and could
 336     # be a function warnings
 337     # pylint: disable=W0613,R0201
 338     return lu_result
 339
 340   def _ExpandAndLockInstance(self):
 341     """Helper function to expand and lock an instance.
 342
 343     Many LUs that work on an instance take its name in self.op.instance_name
 344     and need to expand it and then declare the expanded name for locking. This
 345     function does it, and then updates self.op.instance_name to the expanded
 346     name. It also initializes needed_locks as a dict, if this hasn't been done
 347     before.
 348
 349     """
 350     if self.needed_locks is None:
 351       self.needed_locks = {}
 352     else:
 353       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 354         "_ExpandAndLockInstance called with instance-level locks set"
 355     self.op.instance_name = _ExpandInstanceName(self.cfg,
 356                                                 self.op.instance_name)
 357     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 358
 359   def _LockInstancesNodes(self, primary_only=False,
 360                           level=locking.LEVEL_NODE):
 361     """Helper function to declare instances' nodes for locking.
 362
 363     This function should be called after locking one or more instances to lock
 364     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 365     with all primary or secondary nodes for instances already locked and
 366     present in self.needed_locks[locking.LEVEL_INSTANCE].
 367
 368     It should be called from DeclareLocks, and for safety only works if
 369     self.recalculate_locks[locking.LEVEL_NODE] is set.
 370
 371     In the future it may grow parameters to just lock some instance's nodes, or
 372     to just lock primaries or secondary nodes, if needed.
 373
 374     If should be called in DeclareLocks in a way similar to::
 375
 376       if level == locking.LEVEL_NODE:
 377         self._LockInstancesNodes()
 378
 379     @type primary_only: boolean
 380     @param primary_only: only lock primary nodes of locked instances
 381     @param level: Which lock level to use for locking nodes
 382
 383     """
 384     assert level in self.recalculate_locks, \
 385       "_LockInstancesNodes helper function called with no nodes to recalculate"
 386
 387     # TODO: check if we're really been called with the instance locks held
 388
 389     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 390     # future we might want to have different behaviors depending on the value
 391     # of self.recalculate_locks[locking.LEVEL_NODE]
 392     wanted_nodes = []
 393     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 394     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 395       wanted_nodes.append(instance.primary_node)
 396       if not primary_only:
 397         wanted_nodes.extend(instance.secondary_nodes)
 398
 399     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 400       self.needed_locks[level] = wanted_nodes
 401     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 402       self.needed_locks[level].extend(wanted_nodes)
 403     else:
 404       raise errors.ProgrammerError("Unknown recalculation mode")
 405
 406     del self.recalculate_locks[level]
 407
 408
 409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 410   """Simple LU which runs no hooks.
 411
 412   This LU is intended as a parent for other LogicalUnits which will
 413   run no hooks, in order to reduce duplicate code.
 414
 415   """
 416   HPATH = None
 417   HTYPE = None
 418
 419   def BuildHooksEnv(self):
 420     """Empty BuildHooksEnv for NoHooksLu.
 421
 422     This just raises an error.
 423
 424     """
 425     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 426
 427   def BuildHooksNodes(self):
 428     """Empty BuildHooksNodes for NoHooksLU.
 429
 430     """
 431     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 432
 433
 434 class Tasklet:
 435   """Tasklet base class.
 436
 437   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 438   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 439   tasklets know nothing about locks.
 440
 441   Subclasses must follow these rules:
 442     - Implement CheckPrereq
 443     - Implement Exec
 444
 445   """
 446   def __init__(self, lu):
 447     self.lu = lu
 448
 449     # Shortcuts
 450     self.cfg = lu.cfg
 451     self.rpc = lu.rpc
 452
 453   def CheckPrereq(self):
 454     """Check prerequisites for this tasklets.
 455
 456     This method should check whether the prerequisites for the execution of
 457     this tasklet are fulfilled. It can do internode communication, but it
 458     should be idempotent - no cluster or system changes are allowed.
 459
 460     The method should raise errors.OpPrereqError in case something is not
 461     fulfilled. Its return value is ignored.
 462
 463     This method should also update all parameters to their canonical form if it
 464     hasn't been done before.
 465
 466     """
 467     pass
 468
 469   def Exec(self, feedback_fn):
 470     """Execute the tasklet.
 471
 472     This method should implement the actual work. It should raise
 473     errors.OpExecError for failures that are somewhat dealt with in code, or
 474     expected.
 475
 476     """
 477     raise NotImplementedError
 478
 479
 480 class _QueryBase:
 481   """Base for query utility classes.
 482
 483   """
 484   #: Attribute holding field definitions
 485   FIELDS = None
 486
 487   def __init__(self, qfilter, fields, use_locking):
 488     """Initializes this class.
 489
 490     """
 491     self.use_locking = use_locking
 492
 493     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 494                              namefield="name")
 495     self.requested_data = self.query.RequestedData()
 496     self.names = self.query.RequestedNames()
 497
 498     # Sort only if no names were requested
 499     self.sort_by_name = not self.names
 500
 501     self.do_locking = None
 502     self.wanted = None
 503
 504   def _GetNames(self, lu, all_names, lock_level):
 505     """Helper function to determine names asked for in the query.
 506
 507     """
 508     if self.do_locking:
 509       names = lu.owned_locks(lock_level)
 510     else:
 511       names = all_names
 512
 513     if self.wanted == locking.ALL_SET:
 514       assert not self.names
 515       # caller didn't specify names, so ordering is not important
 516       return utils.NiceSort(names)
 517
 518     # caller specified names and we must keep the same order
 519     assert self.names
 520     assert not self.do_locking or lu.glm.is_owned(lock_level)
 521
 522     missing = set(self.wanted).difference(names)
 523     if missing:
 524       raise errors.OpExecError("Some items were removed before retrieving"
 525                                " their data: %s" % missing)
 526
 527     # Return expanded names
 528     return self.wanted
 529
 530   def ExpandNames(self, lu):
 531     """Expand names for this query.
 532
 533     See L{LogicalUnit.ExpandNames}.
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def DeclareLocks(self, lu, level):
 539     """Declare locks for this query.
 540
 541     See L{LogicalUnit.DeclareLocks}.
 542
 543     """
 544     raise NotImplementedError()
 545
 546   def _GetQueryData(self, lu):
 547     """Collects all data for this query.
 548
 549     @return: Query data object
 550
 551     """
 552     raise NotImplementedError()
 553
 554   def NewStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 559                                   sort_by_name=self.sort_by_name)
 560
 561   def OldStyleQuery(self, lu):
 562     """Collect data and execute query.
 563
 564     """
 565     return self.query.OldStyleQuery(self._GetQueryData(lu),
 566                                     sort_by_name=self.sort_by_name)
 567
 568
 569 def _ShareAll():
 570   """Returns a dict declaring all lock levels shared.
 571
 572   """
 573   return dict.fromkeys(locking.LEVELS, 1)
 574
 575
 576 def _MakeLegacyNodeInfo(data):
 577   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 578
 579   Converts the data into a single dictionary. This is fine for most use cases,
 580   but some require information from more than one volume group or hypervisor.
 581
 582   """
 583   (bootid, (vg_info, ), (hv_info, )) = data
 584
 585   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 586     "bootid": bootid,
 587     })
 588
 589
 590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 591   """Checks if the owned node groups are still correct for an instance.
 592
 593   @type cfg: L{config.ConfigWriter}
 594   @param cfg: The cluster configuration
 595   @type instance_name: string
 596   @param instance_name: Instance name
 597   @type owned_groups: set or frozenset
 598   @param owned_groups: List of currently owned node groups
 599
 600   """
 601   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 602
 603   if not owned_groups.issuperset(inst_groups):
 604     raise errors.OpPrereqError("Instance %s's node groups changed since"
 605                                " locks were acquired, current groups are"
 606                                " are '%s', owning groups '%s'; retry the"
 607                                " operation" %
 608                                (instance_name,
 609                                 utils.CommaJoin(inst_groups),
 610                                 utils.CommaJoin(owned_groups)),
 611                                errors.ECODE_STATE)
 612
 613   return inst_groups
 614
 615
 616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 617   """Checks if the instances in a node group are still correct.
 618
 619   @type cfg: L{config.ConfigWriter}
 620   @param cfg: The cluster configuration
 621   @type group_uuid: string
 622   @param group_uuid: Node group UUID
 623   @type owned_instances: set or frozenset
 624   @param owned_instances: List of currently owned instances
 625
 626   """
 627   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 628   if owned_instances != wanted_instances:
 629     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 630                                " locks were acquired, wanted '%s', have '%s';"
 631                                " retry the operation" %
 632                                (group_uuid,
 633                                 utils.CommaJoin(wanted_instances),
 634                                 utils.CommaJoin(owned_instances)),
 635                                errors.ECODE_STATE)
 636
 637   return wanted_instances
 638
 639
 640 def _SupportsOob(cfg, node):
 641   """Tells if node supports OOB.
 642
 643   @type cfg: L{config.ConfigWriter}
 644   @param cfg: The cluster configuration
 645   @type node: L{objects.Node}
 646   @param node: The node
 647   @return: The OOB script if supported or an empty string otherwise
 648
 649   """
 650   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 651
 652
 653 def _GetWantedNodes(lu, nodes):
 654   """Returns list of checked and expanded node names.
 655
 656   @type lu: L{LogicalUnit}
 657   @param lu: the logical unit on whose behalf we execute
 658   @type nodes: list
 659   @param nodes: list of node names or None for all nodes
 660   @rtype: list
 661   @return: the list of nodes, sorted
 662   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 663
 664   """
 665   if nodes:
 666     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 667
 668   return utils.NiceSort(lu.cfg.GetNodeList())
 669
 670
 671 def _GetWantedInstances(lu, instances):
 672   """Returns list of checked and expanded instance names.
 673
 674   @type lu: L{LogicalUnit}
 675   @param lu: the logical unit on whose behalf we execute
 676   @type instances: list
 677   @param instances: list of instance names or None for all instances
 678   @rtype: list
 679   @return: the list of instances, sorted
 680   @raise errors.OpPrereqError: if the instances parameter is wrong type
 681   @raise errors.OpPrereqError: if any of the passed instances is not found
 682
 683   """
 684   if instances:
 685     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 686   else:
 687     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 688   return wanted
 689
 690
 691 def _GetUpdatedParams(old_params, update_dict,
 692                       use_default=True, use_none=False):
 693   """Return the new version of a parameter dictionary.
 694
 695   @type old_params: dict
 696   @param old_params: old parameters
 697   @type update_dict: dict
 698   @param update_dict: dict containing new parameter values, or
 699       constants.VALUE_DEFAULT to reset the parameter to its default
 700       value
 701   @param use_default: boolean
 702   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 703       values as 'to be deleted' values
 704   @param use_none: boolean
 705   @type use_none: whether to recognise C{None} values as 'to be
 706       deleted' values
 707   @rtype: dict
 708   @return: the new parameter dictionary
 709
 710   """
 711   params_copy = copy.deepcopy(old_params)
 712   for key, val in update_dict.iteritems():
 713     if ((use_default and val == constants.VALUE_DEFAULT) or
 714         (use_none and val is None)):
 715       try:
 716         del params_copy[key]
 717       except KeyError:
 718         pass
 719     else:
 720       params_copy[key] = val
 721   return params_copy
 722
 723
 724 def _UpdateAndVerifySubDict(base, updates, type_check):
 725   """Updates and verifies a dict with sub dicts of the same type.
 726
 727   @param base: The dict with the old data
 728   @param updates: The dict with the new data
 729   @param type_check: Dict suitable to ForceDictType to verify correct types
 730   @returns: A new dict with updated and verified values
 731
 732   """
 733   def fn(old, value):
 734     new = _GetUpdatedParams(old, value)
 735     utils.ForceDictType(new, type_check)
 736     return new
 737
 738   ret = copy.deepcopy(base)
 739   ret.update(dict((key, fn(base.get(key, {}), value))
 740                   for key, value in updates.items()))
 741   return ret
 742
 743
 744 def _MergeAndVerifyHvState(op_input, obj_input):
 745   """Combines the hv state from an opcode with the one of the object
 746
 747   @param op_input: The input dict from the opcode
 748   @param obj_input: The input dict from the objects
 749   @return: The verified and updated dict
 750
 751   """
 752   if op_input:
 753     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 754     if invalid_hvs:
 755       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 756                                  " %s" % utils.CommaJoin(invalid_hvs),
 757                                  errors.ECODE_INVAL)
 758     if obj_input is None:
 759       obj_input = {}
 760     type_check = constants.HVSTS_PARAMETER_TYPES
 761     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 762
 763   return None
 764
 765
 766 def _MergeAndVerifyDiskState(op_input, obj_input):
 767   """Combines the disk state from an opcode with the one of the object
 768
 769   @param op_input: The input dict from the opcode
 770   @param obj_input: The input dict from the objects
 771   @return: The verified and updated dict
 772   """
 773   if op_input:
 774     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 775     if invalid_dst:
 776       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 777                                  utils.CommaJoin(invalid_dst),
 778                                  errors.ECODE_INVAL)
 779     type_check = constants.DSS_PARAMETER_TYPES
 780     if obj_input is None:
 781       obj_input = {}
 782     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 783                                               type_check))
 784                 for key, value in op_input.items())
 785
 786   return None
 787
 788
 789 def _ReleaseLocks(lu, level, names=None, keep=None):
 790   """Releases locks owned by an LU.
 791
 792   @type lu: L{LogicalUnit}
 793   @param level: Lock level
 794   @type names: list or None
 795   @param names: Names of locks to release
 796   @type keep: list or None
 797   @param keep: Names of locks to retain
 798
 799   """
 800   assert not (keep is not None and names is not None), \
 801          "Only one of the 'names' and the 'keep' parameters can be given"
 802
 803   if names is not None:
 804     should_release = names.__contains__
 805   elif keep:
 806     should_release = lambda name: name not in keep
 807   else:
 808     should_release = None
 809
 810   owned = lu.owned_locks(level)
 811   if not owned:
 812     # Not owning any lock at this level, do nothing
 813     pass
 814
 815   elif should_release:
 816     retain = []
 817     release = []
 818
 819     # Determine which locks to release
 820     for name in owned:
 821       if should_release(name):
 822         release.append(name)
 823       else:
 824         retain.append(name)
 825
 826     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 827
 828     # Release just some locks
 829     lu.glm.release(level, names=release)
 830
 831     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 832   else:
 833     # Release everything
 834     lu.glm.release(level)
 835
 836     assert not lu.glm.is_owned(level), "No locks should be owned"
 837
 838
 839 def _MapInstanceDisksToNodes(instances):
 840   """Creates a map from (node, volume) to instance name.
 841
 842   @type instances: list of L{objects.Instance}
 843   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 844
 845   """
 846   return dict(((node, vol), inst.name)
 847               for inst in instances
 848               for (node, vols) in inst.MapLVsByNode().items()
 849               for vol in vols)
 850
 851
 852 def _RunPostHook(lu, node_name):
 853   """Runs the post-hook for an opcode on a single node.
 854
 855   """
 856   hm = lu.proc.BuildHooksManager(lu)
 857   try:
 858     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 859   except:
 860     # pylint: disable=W0702
 861     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 862
 863
 864 def _CheckOutputFields(static, dynamic, selected):
 865   """Checks whether all selected fields are valid.
 866
 867   @type static: L{utils.FieldSet}
 868   @param static: static fields set
 869   @type dynamic: L{utils.FieldSet}
 870   @param dynamic: dynamic fields set
 871
 872   """
 873   f = utils.FieldSet()
 874   f.Extend(static)
 875   f.Extend(dynamic)
 876
 877   delta = f.NonMatching(selected)
 878   if delta:
 879     raise errors.OpPrereqError("Unknown output fields selected: %s"
 880                                % ",".join(delta), errors.ECODE_INVAL)
 881
 882
 883 def _CheckGlobalHvParams(params):
 884   """Validates that given hypervisor params are not global ones.
 885
 886   This will ensure that instances don't get customised versions of
 887   global params.
 888
 889   """
 890   used_globals = constants.HVC_GLOBALS.intersection(params)
 891   if used_globals:
 892     msg = ("The following hypervisor parameters are global and cannot"
 893            " be customized at instance level, please modify them at"
 894            " cluster level: %s" % utils.CommaJoin(used_globals))
 895     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 896
 897
 898 def _CheckNodeOnline(lu, node, msg=None):
 899   """Ensure that a given node is online.
 900
 901   @param lu: the LU on behalf of which we make the check
 902   @param node: the node to check
 903   @param msg: if passed, should be a message to replace the default one
 904   @raise errors.OpPrereqError: if the node is offline
 905
 906   """
 907   if msg is None:
 908     msg = "Can't use offline node"
 909   if lu.cfg.GetNodeInfo(node).offline:
 910     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 911
 912
 913 def _CheckNodeNotDrained(lu, node):
 914   """Ensure that a given node is not drained.
 915
 916   @param lu: the LU on behalf of which we make the check
 917   @param node: the node to check
 918   @raise errors.OpPrereqError: if the node is drained
 919
 920   """
 921   if lu.cfg.GetNodeInfo(node).drained:
 922     raise errors.OpPrereqError("Can't use drained node %s" % node,
 923                                errors.ECODE_STATE)
 924
 925
 926 def _CheckNodeVmCapable(lu, node):
 927   """Ensure that a given node is vm capable.
 928
 929   @param lu: the LU on behalf of which we make the check
 930   @param node: the node to check
 931   @raise errors.OpPrereqError: if the node is not vm capable
 932
 933   """
 934   if not lu.cfg.GetNodeInfo(node).vm_capable:
 935     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 936                                errors.ECODE_STATE)
 937
 938
 939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 940   """Ensure that a node supports a given OS.
 941
 942   @param lu: the LU on behalf of which we make the check
 943   @param node: the node to check
 944   @param os_name: the OS to query about
 945   @param force_variant: whether to ignore variant errors
 946   @raise errors.OpPrereqError: if the node is not supporting the OS
 947
 948   """
 949   result = lu.rpc.call_os_get(node, os_name)
 950   result.Raise("OS '%s' not in supported OS list for node %s" %
 951                (os_name, node),
 952                prereq=True, ecode=errors.ECODE_INVAL)
 953   if not force_variant:
 954     _CheckOSVariant(result.payload, os_name)
 955
 956
 957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 958   """Ensure that a node has the given secondary ip.
 959
 960   @type lu: L{LogicalUnit}
 961   @param lu: the LU on behalf of which we make the check
 962   @type node: string
 963   @param node: the node to check
 964   @type secondary_ip: string
 965   @param secondary_ip: the ip to check
 966   @type prereq: boolean
 967   @param prereq: whether to throw a prerequisite or an execute error
 968   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 969   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 970
 971   """
 972   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 973   result.Raise("Failure checking secondary ip on node %s" % node,
 974                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 975   if not result.payload:
 976     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 977            " please fix and re-run this command" % secondary_ip)
 978     if prereq:
 979       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 980     else:
 981       raise errors.OpExecError(msg)
 982
 983
 984 def _GetClusterDomainSecret():
 985   """Reads the cluster domain secret.
 986
 987   """
 988   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 989                                strict=True)
 990
 991
 992 def _CheckInstanceState(lu, instance, req_states, msg=None):
 993   """Ensure that an instance is in one of the required states.
 994
 995   @param lu: the LU on behalf of which we make the check
 996   @param instance: the instance to check
 997   @param msg: if passed, should be a message to replace the default one
 998   @raise errors.OpPrereqError: if the instance is not in the required state
 999
1000   """
1001   if msg is None:
1002     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003   if instance.admin_state not in req_states:
1004     raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005                                (instance, instance.admin_state, msg),
1006                                errors.ECODE_STATE)
1007
1008   if constants.ADMINST_UP not in req_states:
1009     pnode = instance.primary_node
1010     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012                 prereq=True, ecode=errors.ECODE_ENVIRON)
1013
1014     if instance.name in ins_l.payload:
1015       raise errors.OpPrereqError("Instance %s is running, %s" %
1016                                  (instance.name, msg), errors.ECODE_STATE)
1017
1018
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020   """Checks if value is in the desired range.
1021
1022   @param name: name of the parameter for which we perform the check
1023   @param ipolicy: dictionary containing min, max and std values
1024   @param value: actual value that we want to use
1025   @return: None or element not meeting the criteria
1026
1027
1028   """
1029   if value in [None, constants.VALUE_AUTO]:
1030     return None
1031   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1032   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1033   if value > max_v or min_v > value:
1034     return ("%s value %s is not in range [%s, %s]" %
1035             (name, value, min_v, max_v))
1036   return None
1037
1038
1039 def _ExpandItemName(fn, name, kind):
1040   """Expand an item name.
1041
1042   @param fn: the function to use for expansion
1043   @param name: requested item name
1044   @param kind: text description ('Node' or 'Instance')
1045   @return: the resolved (full) name
1046   @raise errors.OpPrereqError: if the item is not found
1047
1048   """
1049   full_name = fn(name)
1050   if full_name is None:
1051     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1052                                errors.ECODE_NOENT)
1053   return full_name
1054
1055
1056 def _ExpandNodeName(cfg, name):
1057   """Wrapper over L{_ExpandItemName} for nodes."""
1058   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1059
1060
1061 def _ExpandInstanceName(cfg, name):
1062   """Wrapper over L{_ExpandItemName} for instance."""
1063   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1064
1065
1066 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1067                           minmem, maxmem, vcpus, nics, disk_template, disks,
1068                           bep, hvp, hypervisor_name, tags):
1069   """Builds instance related env variables for hooks
1070
1071   This builds the hook environment from individual variables.
1072
1073   @type name: string
1074   @param name: the name of the instance
1075   @type primary_node: string
1076   @param primary_node: the name of the instance's primary node
1077   @type secondary_nodes: list
1078   @param secondary_nodes: list of secondary nodes as strings
1079   @type os_type: string
1080   @param os_type: the name of the instance's OS
1081   @type status: string
1082   @param status: the desired status of the instance
1083   @type minmem: string
1084   @param minmem: the minimum memory size of the instance
1085   @type maxmem: string
1086   @param maxmem: the maximum memory size of the instance
1087   @type vcpus: string
1088   @param vcpus: the count of VCPUs the instance has
1089   @type nics: list
1090   @param nics: list of tuples (ip, mac, mode, link) representing
1091       the NICs the instance has
1092   @type disk_template: string
1093   @param disk_template: the disk template of the instance
1094   @type disks: list
1095   @param disks: the list of (size, mode) pairs
1096   @type bep: dict
1097   @param bep: the backend parameters for the instance
1098   @type hvp: dict
1099   @param hvp: the hypervisor parameters for the instance
1100   @type hypervisor_name: string
1101   @param hypervisor_name: the hypervisor for the instance
1102   @type tags: list
1103   @param tags: list of instance tags as strings
1104   @rtype: dict
1105   @return: the hook environment for this instance
1106
1107   """
1108   env = {
1109     "OP_TARGET": name,
1110     "INSTANCE_NAME": name,
1111     "INSTANCE_PRIMARY": primary_node,
1112     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1113     "INSTANCE_OS_TYPE": os_type,
1114     "INSTANCE_STATUS": status,
1115     "INSTANCE_MINMEM": minmem,
1116     "INSTANCE_MAXMEM": maxmem,
1117     # TODO(2.7) remove deprecated "memory" value
1118     "INSTANCE_MEMORY": maxmem,
1119     "INSTANCE_VCPUS": vcpus,
1120     "INSTANCE_DISK_TEMPLATE": disk_template,
1121     "INSTANCE_HYPERVISOR": hypervisor_name,
1122   }
1123   if nics:
1124     nic_count = len(nics)
1125     for idx, (ip, mac, mode, link) in enumerate(nics):
1126       if ip is None:
1127         ip = ""
1128       env["INSTANCE_NIC%d_IP" % idx] = ip
1129       env["INSTANCE_NIC%d_MAC" % idx] = mac
1130       env["INSTANCE_NIC%d_MODE" % idx] = mode
1131       env["INSTANCE_NIC%d_LINK" % idx] = link
1132       if mode == constants.NIC_MODE_BRIDGED:
1133         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1134   else:
1135     nic_count = 0
1136
1137   env["INSTANCE_NIC_COUNT"] = nic_count
1138
1139   if disks:
1140     disk_count = len(disks)
1141     for idx, (size, mode) in enumerate(disks):
1142       env["INSTANCE_DISK%d_SIZE" % idx] = size
1143       env["INSTANCE_DISK%d_MODE" % idx] = mode
1144   else:
1145     disk_count = 0
1146
1147   env["INSTANCE_DISK_COUNT"] = disk_count
1148
1149   if not tags:
1150     tags = []
1151
1152   env["INSTANCE_TAGS"] = " ".join(tags)
1153
1154   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1155     for key, value in source.items():
1156       env["INSTANCE_%s_%s" % (kind, key)] = value
1157
1158   return env
1159
1160
1161 def _NICListToTuple(lu, nics):
1162   """Build a list of nic information tuples.
1163
1164   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1165   value in LUInstanceQueryData.
1166
1167   @type lu:  L{LogicalUnit}
1168   @param lu: the logical unit on whose behalf we execute
1169   @type nics: list of L{objects.NIC}
1170   @param nics: list of nics to convert to hooks tuples
1171
1172   """
1173   hooks_nics = []
1174   cluster = lu.cfg.GetClusterInfo()
1175   for nic in nics:
1176     ip = nic.ip
1177     mac = nic.mac
1178     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1179     mode = filled_params[constants.NIC_MODE]
1180     link = filled_params[constants.NIC_LINK]
1181     hooks_nics.append((ip, mac, mode, link))
1182   return hooks_nics
1183
1184
1185 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1186   """Builds instance related env variables for hooks from an object.
1187
1188   @type lu: L{LogicalUnit}
1189   @param lu: the logical unit on whose behalf we execute
1190   @type instance: L{objects.Instance}
1191   @param instance: the instance for which we should build the
1192       environment
1193   @type override: dict
1194   @param override: dictionary with key/values that will override
1195       our values
1196   @rtype: dict
1197   @return: the hook environment dictionary
1198
1199   """
1200   cluster = lu.cfg.GetClusterInfo()
1201   bep = cluster.FillBE(instance)
1202   hvp = cluster.FillHV(instance)
1203   args = {
1204     "name": instance.name,
1205     "primary_node": instance.primary_node,
1206     "secondary_nodes": instance.secondary_nodes,
1207     "os_type": instance.os,
1208     "status": instance.admin_state,
1209     "maxmem": bep[constants.BE_MAXMEM],
1210     "minmem": bep[constants.BE_MINMEM],
1211     "vcpus": bep[constants.BE_VCPUS],
1212     "nics": _NICListToTuple(lu, instance.nics),
1213     "disk_template": instance.disk_template,
1214     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1215     "bep": bep,
1216     "hvp": hvp,
1217     "hypervisor_name": instance.hypervisor,
1218     "tags": instance.tags,
1219   }
1220   if override:
1221     args.update(override)
1222   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1223
1224
1225 def _AdjustCandidatePool(lu, exceptions):
1226   """Adjust the candidate pool after node operations.
1227
1228   """
1229   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1230   if mod_list:
1231     lu.LogInfo("Promoted nodes to master candidate role: %s",
1232                utils.CommaJoin(node.name for node in mod_list))
1233     for name in mod_list:
1234       lu.context.ReaddNode(name)
1235   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1236   if mc_now > mc_max:
1237     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1238                (mc_now, mc_max))
1239
1240
1241 def _DecideSelfPromotion(lu, exceptions=None):
1242   """Decide whether I should promote myself as a master candidate.
1243
1244   """
1245   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1246   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1247   # the new node will increase mc_max with one, so:
1248   mc_should = min(mc_should + 1, cp_size)
1249   return mc_now < mc_should
1250
1251
1252 def _CalculateGroupIPolicy(cfg, group):
1253   """Calculate instance policy for group.
1254
1255   """
1256   cluster = cfg.GetClusterInfo()
1257   return cluster.SimpleFillIPolicy(group.ipolicy)
1258
1259
1260 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1261   """Check that the brigdes needed by a list of nics exist.
1262
1263   """
1264   cluster = lu.cfg.GetClusterInfo()
1265   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1266   brlist = [params[constants.NIC_LINK] for params in paramslist
1267             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1268   if brlist:
1269     result = lu.rpc.call_bridges_exist(target_node, brlist)
1270     result.Raise("Error checking bridges on destination node '%s'" %
1271                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1272
1273
1274 def _CheckInstanceBridgesExist(lu, instance, node=None):
1275   """Check that the brigdes needed by an instance exist.
1276
1277   """
1278   if node is None:
1279     node = instance.primary_node
1280   _CheckNicsBridgesExist(lu, instance.nics, node)
1281
1282
1283 def _CheckOSVariant(os_obj, name):
1284   """Check whether an OS name conforms to the os variants specification.
1285
1286   @type os_obj: L{objects.OS}
1287   @param os_obj: OS object to check
1288   @type name: string
1289   @param name: OS name passed by the user, to check for validity
1290
1291   """
1292   variant = objects.OS.GetVariant(name)
1293   if not os_obj.supported_variants:
1294     if variant:
1295       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1296                                  " passed)" % (os_obj.name, variant),
1297                                  errors.ECODE_INVAL)
1298     return
1299   if not variant:
1300     raise errors.OpPrereqError("OS name must include a variant",
1301                                errors.ECODE_INVAL)
1302
1303   if variant not in os_obj.supported_variants:
1304     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1305
1306
1307 def _GetNodeInstancesInner(cfg, fn):
1308   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1309
1310
1311 def _GetNodeInstances(cfg, node_name):
1312   """Returns a list of all primary and secondary instances on a node.
1313
1314   """
1315
1316   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1317
1318
1319 def _GetNodePrimaryInstances(cfg, node_name):
1320   """Returns primary instances on a node.
1321
1322   """
1323   return _GetNodeInstancesInner(cfg,
1324                                 lambda inst: node_name == inst.primary_node)
1325
1326
1327 def _GetNodeSecondaryInstances(cfg, node_name):
1328   """Returns secondary instances on a node.
1329
1330   """
1331   return _GetNodeInstancesInner(cfg,
1332                                 lambda inst: node_name in inst.secondary_nodes)
1333
1334
1335 def _GetStorageTypeArgs(cfg, storage_type):
1336   """Returns the arguments for a storage type.
1337
1338   """
1339   # Special case for file storage
1340   if storage_type == constants.ST_FILE:
1341     # storage.FileStorage wants a list of storage directories
1342     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1343
1344   return []
1345
1346
1347 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1348   faulty = []
1349
1350   for dev in instance.disks:
1351     cfg.SetDiskID(dev, node_name)
1352
1353   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1354   result.Raise("Failed to get disk status from node %s" % node_name,
1355                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1356
1357   for idx, bdev_status in enumerate(result.payload):
1358     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1359       faulty.append(idx)
1360
1361   return faulty
1362
1363
1364 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1365   """Check the sanity of iallocator and node arguments and use the
1366   cluster-wide iallocator if appropriate.
1367
1368   Check that at most one of (iallocator, node) is specified. If none is
1369   specified, then the LU's opcode's iallocator slot is filled with the
1370   cluster-wide default iallocator.
1371
1372   @type iallocator_slot: string
1373   @param iallocator_slot: the name of the opcode iallocator slot
1374   @type node_slot: string
1375   @param node_slot: the name of the opcode target node slot
1376
1377   """
1378   node = getattr(lu.op, node_slot, None)
1379   iallocator = getattr(lu.op, iallocator_slot, None)
1380
1381   if node is not None and iallocator is not None:
1382     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1383                                errors.ECODE_INVAL)
1384   elif node is None and iallocator is None:
1385     default_iallocator = lu.cfg.GetDefaultIAllocator()
1386     if default_iallocator:
1387       setattr(lu.op, iallocator_slot, default_iallocator)
1388     else:
1389       raise errors.OpPrereqError("No iallocator or node given and no"
1390                                  " cluster-wide default iallocator found;"
1391                                  " please specify either an iallocator or a"
1392                                  " node, or set a cluster-wide default"
1393                                  " iallocator")
1394
1395
1396 def _GetDefaultIAllocator(cfg, iallocator):
1397   """Decides on which iallocator to use.
1398
1399   @type cfg: L{config.ConfigWriter}
1400   @param cfg: Cluster configuration object
1401   @type iallocator: string or None
1402   @param iallocator: Iallocator specified in opcode
1403   @rtype: string
1404   @return: Iallocator name
1405
1406   """
1407   if not iallocator:
1408     # Use default iallocator
1409     iallocator = cfg.GetDefaultIAllocator()
1410
1411   if not iallocator:
1412     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1413                                " opcode nor as a cluster-wide default",
1414                                errors.ECODE_INVAL)
1415
1416   return iallocator
1417
1418
1419 class LUClusterPostInit(LogicalUnit):
1420   """Logical unit for running hooks after cluster initialization.
1421
1422   """
1423   HPATH = "cluster-init"
1424   HTYPE = constants.HTYPE_CLUSTER
1425
1426   def BuildHooksEnv(self):
1427     """Build hooks env.
1428
1429     """
1430     return {
1431       "OP_TARGET": self.cfg.GetClusterName(),
1432       }
1433
1434   def BuildHooksNodes(self):
1435     """Build hooks nodes.
1436
1437     """
1438     return ([], [self.cfg.GetMasterNode()])
1439
1440   def Exec(self, feedback_fn):
1441     """Nothing to do.
1442
1443     """
1444     return True
1445
1446
1447 class LUClusterDestroy(LogicalUnit):
1448   """Logical unit for destroying the cluster.
1449
1450   """
1451   HPATH = "cluster-destroy"
1452   HTYPE = constants.HTYPE_CLUSTER
1453
1454   def BuildHooksEnv(self):
1455     """Build hooks env.
1456
1457     """
1458     return {
1459       "OP_TARGET": self.cfg.GetClusterName(),
1460       }
1461
1462   def BuildHooksNodes(self):
1463     """Build hooks nodes.
1464
1465     """
1466     return ([], [])
1467
1468   def CheckPrereq(self):
1469     """Check prerequisites.
1470
1471     This checks whether the cluster is empty.
1472
1473     Any errors are signaled by raising errors.OpPrereqError.
1474
1475     """
1476     master = self.cfg.GetMasterNode()
1477
1478     nodelist = self.cfg.GetNodeList()
1479     if len(nodelist) != 1 or nodelist[0] != master:
1480       raise errors.OpPrereqError("There are still %d node(s) in"
1481                                  " this cluster." % (len(nodelist) - 1),
1482                                  errors.ECODE_INVAL)
1483     instancelist = self.cfg.GetInstanceList()
1484     if instancelist:
1485       raise errors.OpPrereqError("There are still %d instance(s) in"
1486                                  " this cluster." % len(instancelist),
1487                                  errors.ECODE_INVAL)
1488
1489   def Exec(self, feedback_fn):
1490     """Destroys the cluster.
1491
1492     """
1493     master_params = self.cfg.GetMasterNetworkParameters()
1494
1495     # Run post hooks on master node before it's removed
1496     _RunPostHook(self, master_params.name)
1497
1498     ems = self.cfg.GetUseExternalMipScript()
1499     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1500                                                      master_params, ems)
1501     result.Raise("Could not disable the master role")
1502
1503     return master_params.name
1504
1505
1506 def _VerifyCertificate(filename):
1507   """Verifies a certificate for L{LUClusterVerifyConfig}.
1508
1509   @type filename: string
1510   @param filename: Path to PEM file
1511
1512   """
1513   try:
1514     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1515                                            utils.ReadFile(filename))
1516   except Exception, err: # pylint: disable=W0703
1517     return (LUClusterVerifyConfig.ETYPE_ERROR,
1518             "Failed to load X509 certificate %s: %s" % (filename, err))
1519
1520   (errcode, msg) = \
1521     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1522                                 constants.SSL_CERT_EXPIRATION_ERROR)
1523
1524   if msg:
1525     fnamemsg = "While verifying %s: %s" % (filename, msg)
1526   else:
1527     fnamemsg = None
1528
1529   if errcode is None:
1530     return (None, fnamemsg)
1531   elif errcode == utils.CERT_WARNING:
1532     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1533   elif errcode == utils.CERT_ERROR:
1534     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1535
1536   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1537
1538
1539 def _GetAllHypervisorParameters(cluster, instances):
1540   """Compute the set of all hypervisor parameters.
1541
1542   @type cluster: L{objects.Cluster}
1543   @param cluster: the cluster object
1544   @param instances: list of L{objects.Instance}
1545   @param instances: additional instances from which to obtain parameters
1546   @rtype: list of (origin, hypervisor, parameters)
1547   @return: a list with all parameters found, indicating the hypervisor they
1548        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1549
1550   """
1551   hvp_data = []
1552
1553   for hv_name in cluster.enabled_hypervisors:
1554     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1555
1556   for os_name, os_hvp in cluster.os_hvp.items():
1557     for hv_name, hv_params in os_hvp.items():
1558       if hv_params:
1559         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1560         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1561
1562   # TODO: collapse identical parameter values in a single one
1563   for instance in instances:
1564     if instance.hvparams:
1565       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1566                        cluster.FillHV(instance)))
1567
1568   return hvp_data
1569
1570
1571 class _VerifyErrors(object):
1572   """Mix-in for cluster/group verify LUs.
1573
1574   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1575   self.op and self._feedback_fn to be available.)
1576
1577   """
1578
1579   ETYPE_FIELD = "code"
1580   ETYPE_ERROR = "ERROR"
1581   ETYPE_WARNING = "WARNING"
1582
1583   def _Error(self, ecode, item, msg, *args, **kwargs):
1584     """Format an error message.
1585
1586     Based on the opcode's error_codes parameter, either format a
1587     parseable error code, or a simpler error string.
1588
1589     This must be called only from Exec and functions called from Exec.
1590
1591     """
1592     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1593     itype, etxt, _ = ecode
1594     # first complete the msg
1595     if args:
1596       msg = msg % args
1597     # then format the whole message
1598     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1599       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1600     else:
1601       if item:
1602         item = " " + item
1603       else:
1604         item = ""
1605       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1606     # and finally report it via the feedback_fn
1607     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1608
1609   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1610     """Log an error message if the passed condition is True.
1611
1612     """
1613     cond = (bool(cond)
1614             or self.op.debug_simulate_errors) # pylint: disable=E1101
1615
1616     # If the error code is in the list of ignored errors, demote the error to a
1617     # warning
1618     (_, etxt, _) = ecode
1619     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1620       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1621
1622     if cond:
1623       self._Error(ecode, *args, **kwargs)
1624
1625     # do not mark the operation as failed for WARN cases only
1626     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1627       self.bad = self.bad or cond
1628
1629
1630 class LUClusterVerify(NoHooksLU):
1631   """Submits all jobs necessary to verify the cluster.
1632
1633   """
1634   REQ_BGL = False
1635
1636   def ExpandNames(self):
1637     self.needed_locks = {}
1638
1639   def Exec(self, feedback_fn):
1640     jobs = []
1641
1642     if self.op.group_name:
1643       groups = [self.op.group_name]
1644       depends_fn = lambda: None
1645     else:
1646       groups = self.cfg.GetNodeGroupList()
1647
1648       # Verify global configuration
1649       jobs.append([
1650         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1651         ])
1652
1653       # Always depend on global verification
1654       depends_fn = lambda: [(-len(jobs), [])]
1655
1656     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1657                                             ignore_errors=self.op.ignore_errors,
1658                                             depends=depends_fn())]
1659                 for group in groups)
1660
1661     # Fix up all parameters
1662     for op in itertools.chain(*jobs): # pylint: disable=W0142
1663       op.debug_simulate_errors = self.op.debug_simulate_errors
1664       op.verbose = self.op.verbose
1665       op.error_codes = self.op.error_codes
1666       try:
1667         op.skip_checks = self.op.skip_checks
1668       except AttributeError:
1669         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1670
1671     return ResultWithJobs(jobs)
1672
1673
1674 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1675   """Verifies the cluster config.
1676
1677   """
1678   REQ_BGL = True
1679
1680   def _VerifyHVP(self, hvp_data):
1681     """Verifies locally the syntax of the hypervisor parameters.
1682
1683     """
1684     for item, hv_name, hv_params in hvp_data:
1685       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1686              (item, hv_name))
1687       try:
1688         hv_class = hypervisor.GetHypervisor(hv_name)
1689         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1690         hv_class.CheckParameterSyntax(hv_params)
1691       except errors.GenericError, err:
1692         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1693
1694   def ExpandNames(self):
1695     # Information can be safely retrieved as the BGL is acquired in exclusive
1696     # mode
1697     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1698     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1699     self.all_node_info = self.cfg.GetAllNodesInfo()
1700     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1701     self.needed_locks = {}
1702
1703   def Exec(self, feedback_fn):
1704     """Verify integrity of cluster, performing various test on nodes.
1705
1706     """
1707     self.bad = False
1708     self._feedback_fn = feedback_fn
1709
1710     feedback_fn("* Verifying cluster config")
1711
1712     for msg in self.cfg.VerifyConfig():
1713       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1714
1715     feedback_fn("* Verifying cluster certificate files")
1716
1717     for cert_filename in constants.ALL_CERT_FILES:
1718       (errcode, msg) = _VerifyCertificate(cert_filename)
1719       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1720
1721     feedback_fn("* Verifying hypervisor parameters")
1722
1723     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1724                                                 self.all_inst_info.values()))
1725
1726     feedback_fn("* Verifying all nodes belong to an existing group")
1727
1728     # We do this verification here because, should this bogus circumstance
1729     # occur, it would never be caught by VerifyGroup, which only acts on
1730     # nodes/instances reachable from existing node groups.
1731
1732     dangling_nodes = set(node.name for node in self.all_node_info.values()
1733                          if node.group not in self.all_group_info)
1734
1735     dangling_instances = {}
1736     no_node_instances = []
1737
1738     for inst in self.all_inst_info.values():
1739       if inst.primary_node in dangling_nodes:
1740         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1741       elif inst.primary_node not in self.all_node_info:
1742         no_node_instances.append(inst.name)
1743
1744     pretty_dangling = [
1745         "%s (%s)" %
1746         (node.name,
1747          utils.CommaJoin(dangling_instances.get(node.name,
1748                                                 ["no instances"])))
1749         for node in dangling_nodes]
1750
1751     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1752                   None,
1753                   "the following nodes (and their instances) belong to a non"
1754                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1755
1756     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1757                   None,
1758                   "the following instances have a non-existing primary-node:"
1759                   " %s", utils.CommaJoin(no_node_instances))
1760
1761     return not self.bad
1762
1763
1764 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1765   """Verifies the status of a node group.
1766
1767   """
1768   HPATH = "cluster-verify"
1769   HTYPE = constants.HTYPE_CLUSTER
1770   REQ_BGL = False
1771
1772   _HOOKS_INDENT_RE = re.compile("^", re.M)
1773
1774   class NodeImage(object):
1775     """A class representing the logical and physical status of a node.
1776
1777     @type name: string
1778     @ivar name: the node name to which this object refers
1779     @ivar volumes: a structure as returned from
1780         L{ganeti.backend.GetVolumeList} (runtime)
1781     @ivar instances: a list of running instances (runtime)
1782     @ivar pinst: list of configured primary instances (config)
1783     @ivar sinst: list of configured secondary instances (config)
1784     @ivar sbp: dictionary of {primary-node: list of instances} for all
1785         instances for which this node is secondary (config)
1786     @ivar mfree: free memory, as reported by hypervisor (runtime)
1787     @ivar dfree: free disk, as reported by the node (runtime)
1788     @ivar offline: the offline status (config)
1789     @type rpc_fail: boolean
1790     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1791         not whether the individual keys were correct) (runtime)
1792     @type lvm_fail: boolean
1793     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1794     @type hyp_fail: boolean
1795     @ivar hyp_fail: whether the RPC call didn't return the instance list
1796     @type ghost: boolean
1797     @ivar ghost: whether this is a known node or not (config)
1798     @type os_fail: boolean
1799     @ivar os_fail: whether the RPC call didn't return valid OS data
1800     @type oslist: list
1801     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1802     @type vm_capable: boolean
1803     @ivar vm_capable: whether the node can host instances
1804
1805     """
1806     def __init__(self, offline=False, name=None, vm_capable=True):
1807       self.name = name
1808       self.volumes = {}
1809       self.instances = []
1810       self.pinst = []
1811       self.sinst = []
1812       self.sbp = {}
1813       self.mfree = 0
1814       self.dfree = 0
1815       self.offline = offline
1816       self.vm_capable = vm_capable
1817       self.rpc_fail = False
1818       self.lvm_fail = False
1819       self.hyp_fail = False
1820       self.ghost = False
1821       self.os_fail = False
1822       self.oslist = {}
1823
1824   def ExpandNames(self):
1825     # This raises errors.OpPrereqError on its own:
1826     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1827
1828     # Get instances in node group; this is unsafe and needs verification later
1829     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1830
1831     self.needed_locks = {
1832       locking.LEVEL_INSTANCE: inst_names,
1833       locking.LEVEL_NODEGROUP: [self.group_uuid],
1834       locking.LEVEL_NODE: [],
1835       }
1836
1837     self.share_locks = _ShareAll()
1838
1839   def DeclareLocks(self, level):
1840     if level == locking.LEVEL_NODE:
1841       # Get members of node group; this is unsafe and needs verification later
1842       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1843
1844       all_inst_info = self.cfg.GetAllInstancesInfo()
1845
1846       # In Exec(), we warn about mirrored instances that have primary and
1847       # secondary living in separate node groups. To fully verify that
1848       # volumes for these instances are healthy, we will need to do an
1849       # extra call to their secondaries. We ensure here those nodes will
1850       # be locked.
1851       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1852         # Important: access only the instances whose lock is owned
1853         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1854           nodes.update(all_inst_info[inst].secondary_nodes)
1855
1856       self.needed_locks[locking.LEVEL_NODE] = nodes
1857
1858   def CheckPrereq(self):
1859     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1860     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1861
1862     group_nodes = set(self.group_info.members)
1863     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1864
1865     unlocked_nodes = \
1866         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1867
1868     unlocked_instances = \
1869         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1870
1871     if unlocked_nodes:
1872       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1873                                  utils.CommaJoin(unlocked_nodes))
1874
1875     if unlocked_instances:
1876       raise errors.OpPrereqError("Missing lock for instances: %s" %
1877                                  utils.CommaJoin(unlocked_instances))
1878
1879     self.all_node_info = self.cfg.GetAllNodesInfo()
1880     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1881
1882     self.my_node_names = utils.NiceSort(group_nodes)
1883     self.my_inst_names = utils.NiceSort(group_instances)
1884
1885     self.my_node_info = dict((name, self.all_node_info[name])
1886                              for name in self.my_node_names)
1887
1888     self.my_inst_info = dict((name, self.all_inst_info[name])
1889                              for name in self.my_inst_names)
1890
1891     # We detect here the nodes that will need the extra RPC calls for verifying
1892     # split LV volumes; they should be locked.
1893     extra_lv_nodes = set()
1894
1895     for inst in self.my_inst_info.values():
1896       if inst.disk_template in constants.DTS_INT_MIRROR:
1897         group = self.my_node_info[inst.primary_node].group
1898         for nname in inst.secondary_nodes:
1899           if self.all_node_info[nname].group != group:
1900             extra_lv_nodes.add(nname)
1901
1902     unlocked_lv_nodes = \
1903         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1904
1905     if unlocked_lv_nodes:
1906       raise errors.OpPrereqError("these nodes could be locked: %s" %
1907                                  utils.CommaJoin(unlocked_lv_nodes))
1908     self.extra_lv_nodes = list(extra_lv_nodes)
1909
1910   def _VerifyNode(self, ninfo, nresult):
1911     """Perform some basic validation on data returned from a node.
1912
1913       - check the result data structure is well formed and has all the
1914         mandatory fields
1915       - check ganeti version
1916
1917     @type ninfo: L{objects.Node}
1918     @param ninfo: the node to check
1919     @param nresult: the results from the node
1920     @rtype: boolean
1921     @return: whether overall this call was successful (and we can expect
1922          reasonable values in the respose)
1923
1924     """
1925     node = ninfo.name
1926     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1927
1928     # main result, nresult should be a non-empty dict
1929     test = not nresult or not isinstance(nresult, dict)
1930     _ErrorIf(test, constants.CV_ENODERPC, node,
1931                   "unable to verify node: no data returned")
1932     if test:
1933       return False
1934
1935     # compares ganeti version
1936     local_version = constants.PROTOCOL_VERSION
1937     remote_version = nresult.get("version", None)
1938     test = not (remote_version and
1939                 isinstance(remote_version, (list, tuple)) and
1940                 len(remote_version) == 2)
1941     _ErrorIf(test, constants.CV_ENODERPC, node,
1942              "connection to node returned invalid data")
1943     if test:
1944       return False
1945
1946     test = local_version != remote_version[0]
1947     _ErrorIf(test, constants.CV_ENODEVERSION, node,
1948              "incompatible protocol versions: master %s,"
1949              " node %s", local_version, remote_version[0])
1950     if test:
1951       return False
1952
1953     # node seems compatible, we can actually try to look into its results
1954
1955     # full package version
1956     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1957                   constants.CV_ENODEVERSION, node,
1958                   "software version mismatch: master %s, node %s",
1959                   constants.RELEASE_VERSION, remote_version[1],
1960                   code=self.ETYPE_WARNING)
1961
1962     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1963     if ninfo.vm_capable and isinstance(hyp_result, dict):
1964       for hv_name, hv_result in hyp_result.iteritems():
1965         test = hv_result is not None
1966         _ErrorIf(test, constants.CV_ENODEHV, node,
1967                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1968
1969     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1970     if ninfo.vm_capable and isinstance(hvp_result, list):
1971       for item, hv_name, hv_result in hvp_result:
1972         _ErrorIf(True, constants.CV_ENODEHV, node,
1973                  "hypervisor %s parameter verify failure (source %s): %s",
1974                  hv_name, item, hv_result)
1975
1976     test = nresult.get(constants.NV_NODESETUP,
1977                        ["Missing NODESETUP results"])
1978     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1979              "; ".join(test))
1980
1981     return True
1982
1983   def _VerifyNodeTime(self, ninfo, nresult,
1984                       nvinfo_starttime, nvinfo_endtime):
1985     """Check the node time.
1986
1987     @type ninfo: L{objects.Node}
1988     @param ninfo: the node to check
1989     @param nresult: the remote results for the node
1990     @param nvinfo_starttime: the start time of the RPC call
1991     @param nvinfo_endtime: the end time of the RPC call
1992
1993     """
1994     node = ninfo.name
1995     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1996
1997     ntime = nresult.get(constants.NV_TIME, None)
1998     try:
1999       ntime_merged = utils.MergeTime(ntime)
2000     except (ValueError, TypeError):
2001       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2002       return
2003
2004     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2005       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2006     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2007       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2008     else:
2009       ntime_diff = None
2010
2011     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2012              "Node time diverges by at least %s from master node time",
2013              ntime_diff)
2014
2015   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2016     """Check the node LVM results.
2017
2018     @type ninfo: L{objects.Node}
2019     @param ninfo: the node to check
2020     @param nresult: the remote results for the node
2021     @param vg_name: the configured VG name
2022
2023     """
2024     if vg_name is None:
2025       return
2026
2027     node = ninfo.name
2028     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2029
2030     # checks vg existence and size > 20G
2031     vglist = nresult.get(constants.NV_VGLIST, None)
2032     test = not vglist
2033     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2034     if not test:
2035       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2036                                             constants.MIN_VG_SIZE)
2037       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2038
2039     # check pv names
2040     pvlist = nresult.get(constants.NV_PVLIST, None)
2041     test = pvlist is None
2042     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2043     if not test:
2044       # check that ':' is not present in PV names, since it's a
2045       # special character for lvcreate (denotes the range of PEs to
2046       # use on the PV)
2047       for _, pvname, owner_vg in pvlist:
2048         test = ":" in pvname
2049         _ErrorIf(test, constants.CV_ENODELVM, node,
2050                  "Invalid character ':' in PV '%s' of VG '%s'",
2051                  pvname, owner_vg)
2052
2053   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2054     """Check the node bridges.
2055
2056     @type ninfo: L{objects.Node}
2057     @param ninfo: the node to check
2058     @param nresult: the remote results for the node
2059     @param bridges: the expected list of bridges
2060
2061     """
2062     if not bridges:
2063       return
2064
2065     node = ninfo.name
2066     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2067
2068     missing = nresult.get(constants.NV_BRIDGES, None)
2069     test = not isinstance(missing, list)
2070     _ErrorIf(test, constants.CV_ENODENET, node,
2071              "did not return valid bridge information")
2072     if not test:
2073       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2074                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2075
2076   def _VerifyNodeUserScripts(self, ninfo, nresult):
2077     """Check the results of user scripts presence and executability on the node
2078
2079     @type ninfo: L{objects.Node}
2080     @param ninfo: the node to check
2081     @param nresult: the remote results for the node
2082
2083     """
2084     node = ninfo.name
2085
2086     test = not constants.NV_USERSCRIPTS in nresult
2087     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2088                   "did not return user scripts information")
2089
2090     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2091     if not test:
2092       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2093                     "user scripts not present or not executable: %s" %
2094                     utils.CommaJoin(sorted(broken_scripts)))
2095
2096   def _VerifyNodeNetwork(self, ninfo, nresult):
2097     """Check the node network connectivity results.
2098
2099     @type ninfo: L{objects.Node}
2100     @param ninfo: the node to check
2101     @param nresult: the remote results for the node
2102
2103     """
2104     node = ninfo.name
2105     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2106
2107     test = constants.NV_NODELIST not in nresult
2108     _ErrorIf(test, constants.CV_ENODESSH, node,
2109              "node hasn't returned node ssh connectivity data")
2110     if not test:
2111       if nresult[constants.NV_NODELIST]:
2112         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2113           _ErrorIf(True, constants.CV_ENODESSH, node,
2114                    "ssh communication with node '%s': %s", a_node, a_msg)
2115
2116     test = constants.NV_NODENETTEST not in nresult
2117     _ErrorIf(test, constants.CV_ENODENET, node,
2118              "node hasn't returned node tcp connectivity data")
2119     if not test:
2120       if nresult[constants.NV_NODENETTEST]:
2121         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2122         for anode in nlist:
2123           _ErrorIf(True, constants.CV_ENODENET, node,
2124                    "tcp communication with node '%s': %s",
2125                    anode, nresult[constants.NV_NODENETTEST][anode])
2126
2127     test = constants.NV_MASTERIP not in nresult
2128     _ErrorIf(test, constants.CV_ENODENET, node,
2129              "node hasn't returned node master IP reachability data")
2130     if not test:
2131       if not nresult[constants.NV_MASTERIP]:
2132         if node == self.master_node:
2133           msg = "the master node cannot reach the master IP (not configured?)"
2134         else:
2135           msg = "cannot reach the master IP"
2136         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2137
2138   def _VerifyInstancePolicy(self, instance):
2139     """Verify instance specs against instance policy set on node group level.
2140
2141
2142     """
2143     cluster = self.cfg.GetClusterInfo()
2144     full_beparams = cluster.FillBE(instance)
2145     ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
2146
2147     mem_size = full_beparams.get(constants.BE_MAXMEM, None)
2148     cpu_count = full_beparams.get(constants.BE_VCPUS, None)
2149     disk_count = len(instance.disks)
2150     disk_sizes = [disk.size for disk in instance.disks]
2151     nic_count = len(instance.nics)
2152
2153     test_settings = [
2154       (constants.ISPEC_MEM_SIZE, mem_size),
2155       (constants.ISPEC_CPU_COUNT, cpu_count),
2156       (constants.ISPEC_DISK_COUNT, disk_count),
2157       (constants.ISPEC_NIC_COUNT, nic_count),
2158       ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
2159
2160     for (name, value) in test_settings:
2161       test_result = _CheckMinMaxSpecs(name, ipolicy, value)
2162       self._ErrorIf(test_result is not None,
2163                     constants.CV_EINSTANCEPOLICY, instance.name,
2164                     test_result)
2165
2166   def _VerifyInstance(self, instance, instanceconfig, node_image,
2167                       diskstatus):
2168     """Verify an instance.
2169
2170     This function checks to see if the required block devices are
2171     available on the instance's node.
2172
2173     """
2174     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2175     node_current = instanceconfig.primary_node
2176
2177     node_vol_should = {}
2178     instanceconfig.MapLVsByNode(node_vol_should)
2179
2180     self._VerifyInstancePolicy(instanceconfig)
2181
2182     for node in node_vol_should:
2183       n_img = node_image[node]
2184       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2185         # ignore missing volumes on offline or broken nodes
2186         continue
2187       for volume in node_vol_should[node]:
2188         test = volume not in n_img.volumes
2189         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2190                  "volume %s missing on node %s", volume, node)
2191
2192     if instanceconfig.admin_state == constants.ADMINST_UP:
2193       pri_img = node_image[node_current]
2194       test = instance not in pri_img.instances and not pri_img.offline
2195       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2196                "instance not running on its primary node %s",
2197                node_current)
2198
2199     diskdata = [(nname, success, status, idx)
2200                 for (nname, disks) in diskstatus.items()
2201                 for idx, (success, status) in enumerate(disks)]
2202
2203     for nname, success, bdev_status, idx in diskdata:
2204       # the 'ghost node' construction in Exec() ensures that we have a
2205       # node here
2206       snode = node_image[nname]
2207       bad_snode = snode.ghost or snode.offline
2208       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2209                not success and not bad_snode,
2210                constants.CV_EINSTANCEFAULTYDISK, instance,
2211                "couldn't retrieve status for disk/%s on %s: %s",
2212                idx, nname, bdev_status)
2213       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2214                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2215                constants.CV_EINSTANCEFAULTYDISK, instance,
2216                "disk/%s on %s is faulty", idx, nname)
2217
2218   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2219     """Verify if there are any unknown volumes in the cluster.
2220
2221     The .os, .swap and backup volumes are ignored. All other volumes are
2222     reported as unknown.
2223
2224     @type reserved: L{ganeti.utils.FieldSet}
2225     @param reserved: a FieldSet of reserved volume names
2226
2227     """
2228     for node, n_img in node_image.items():
2229       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2230         # skip non-healthy nodes
2231         continue
2232       for volume in n_img.volumes:
2233         test = ((node not in node_vol_should or
2234                 volume not in node_vol_should[node]) and
2235                 not reserved.Matches(volume))
2236         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2237                       "volume %s is unknown", volume)
2238
2239   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2240     """Verify N+1 Memory Resilience.
2241
2242     Check that if one single node dies we can still start all the
2243     instances it was primary for.
2244
2245     """
2246     cluster_info = self.cfg.GetClusterInfo()
2247     for node, n_img in node_image.items():
2248       # This code checks that every node which is now listed as
2249       # secondary has enough memory to host all instances it is
2250       # supposed to should a single other node in the cluster fail.
2251       # FIXME: not ready for failover to an arbitrary node
2252       # FIXME: does not support file-backed instances
2253       # WARNING: we currently take into account down instances as well
2254       # as up ones, considering that even if they're down someone
2255       # might want to start them even in the event of a node failure.
2256       if n_img.offline:
2257         # we're skipping offline nodes from the N+1 warning, since
2258         # most likely we don't have good memory infromation from them;
2259         # we already list instances living on such nodes, and that's
2260         # enough warning
2261         continue
2262       #TODO(dynmem): use MINMEM for checking
2263       #TODO(dynmem): also consider ballooning out other instances
2264       for prinode, instances in n_img.sbp.items():
2265         needed_mem = 0
2266         for instance in instances:
2267           bep = cluster_info.FillBE(instance_cfg[instance])
2268           if bep[constants.BE_AUTO_BALANCE]:
2269             needed_mem += bep[constants.BE_MAXMEM]
2270         test = n_img.mfree < needed_mem
2271         self._ErrorIf(test, constants.CV_ENODEN1, node,
2272                       "not enough memory to accomodate instance failovers"
2273                       " should node %s fail (%dMiB needed, %dMiB available)",
2274                       prinode, needed_mem, n_img.mfree)
2275
2276   @classmethod
2277   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2278                    (files_all, files_opt, files_mc, files_vm)):
2279     """Verifies file checksums collected from all nodes.
2280
2281     @param errorif: Callback for reporting errors
2282     @param nodeinfo: List of L{objects.Node} objects
2283     @param master_node: Name of master node
2284     @param all_nvinfo: RPC results
2285
2286     """
2287     # Define functions determining which nodes to consider for a file
2288     files2nodefn = [
2289       (files_all, None),
2290       (files_mc, lambda node: (node.master_candidate or
2291                                node.name == master_node)),
2292       (files_vm, lambda node: node.vm_capable),
2293       ]
2294
2295     # Build mapping from filename to list of nodes which should have the file
2296     nodefiles = {}
2297     for (files, fn) in files2nodefn:
2298       if fn is None:
2299         filenodes = nodeinfo
2300       else:
2301         filenodes = filter(fn, nodeinfo)
2302       nodefiles.update((filename,
2303                         frozenset(map(operator.attrgetter("name"), filenodes)))
2304                        for filename in files)
2305
2306     assert set(nodefiles) == (files_all | files_mc | files_vm)
2307
2308     fileinfo = dict((filename, {}) for filename in nodefiles)
2309     ignore_nodes = set()
2310
2311     for node in nodeinfo:
2312       if node.offline:
2313         ignore_nodes.add(node.name)
2314         continue
2315
2316       nresult = all_nvinfo[node.name]
2317
2318       if nresult.fail_msg or not nresult.payload:
2319         node_files = None
2320       else:
2321         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2322
2323       test = not (node_files and isinstance(node_files, dict))
2324       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2325               "Node did not return file checksum data")
2326       if test:
2327         ignore_nodes.add(node.name)
2328         continue
2329
2330       # Build per-checksum mapping from filename to nodes having it
2331       for (filename, checksum) in node_files.items():
2332         assert filename in nodefiles
2333         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2334
2335     for (filename, checksums) in fileinfo.items():
2336       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2337
2338       # Nodes having the file
2339       with_file = frozenset(node_name
2340                             for nodes in fileinfo[filename].values()
2341                             for node_name in nodes) - ignore_nodes
2342
2343       expected_nodes = nodefiles[filename] - ignore_nodes
2344
2345       # Nodes missing file
2346       missing_file = expected_nodes - with_file
2347
2348       if filename in files_opt:
2349         # All or no nodes
2350         errorif(missing_file and missing_file != expected_nodes,
2351                 constants.CV_ECLUSTERFILECHECK, None,
2352                 "File %s is optional, but it must exist on all or no"
2353                 " nodes (not found on %s)",
2354                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2355       else:
2356         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2357                 "File %s is missing from node(s) %s", filename,
2358                 utils.CommaJoin(utils.NiceSort(missing_file)))
2359
2360         # Warn if a node has a file it shouldn't
2361         unexpected = with_file - expected_nodes
2362         errorif(unexpected,
2363                 constants.CV_ECLUSTERFILECHECK, None,
2364                 "File %s should not exist on node(s) %s",
2365                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2366
2367       # See if there are multiple versions of the file
2368       test = len(checksums) > 1
2369       if test:
2370         variants = ["variant %s on %s" %
2371                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2372                     for (idx, (checksum, nodes)) in
2373                       enumerate(sorted(checksums.items()))]
2374       else:
2375         variants = []
2376
2377       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2378               "File %s found with %s different checksums (%s)",
2379               filename, len(checksums), "; ".join(variants))
2380
2381   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2382                       drbd_map):
2383     """Verifies and the node DRBD status.
2384
2385     @type ninfo: L{objects.Node}
2386     @param ninfo: the node to check
2387     @param nresult: the remote results for the node
2388     @param instanceinfo: the dict of instances
2389     @param drbd_helper: the configured DRBD usermode helper
2390     @param drbd_map: the DRBD map as returned by
2391         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2392
2393     """
2394     node = ninfo.name
2395     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2396
2397     if drbd_helper:
2398       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2399       test = (helper_result == None)
2400       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2401                "no drbd usermode helper returned")
2402       if helper_result:
2403         status, payload = helper_result
2404         test = not status
2405         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2406                  "drbd usermode helper check unsuccessful: %s", payload)
2407         test = status and (payload != drbd_helper)
2408         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2409                  "wrong drbd usermode helper: %s", payload)
2410
2411     # compute the DRBD minors
2412     node_drbd = {}
2413     for minor, instance in drbd_map[node].items():
2414       test = instance not in instanceinfo
2415       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2416                "ghost instance '%s' in temporary DRBD map", instance)
2417         # ghost instance should not be running, but otherwise we
2418         # don't give double warnings (both ghost instance and
2419         # unallocated minor in use)
2420       if test:
2421         node_drbd[minor] = (instance, False)
2422       else:
2423         instance = instanceinfo[instance]
2424         node_drbd[minor] = (instance.name,
2425                             instance.admin_state == constants.ADMINST_UP)
2426
2427     # and now check them
2428     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2429     test = not isinstance(used_minors, (tuple, list))
2430     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2431              "cannot parse drbd status file: %s", str(used_minors))
2432     if test:
2433       # we cannot check drbd status
2434       return
2435
2436     for minor, (iname, must_exist) in node_drbd.items():
2437       test = minor not in used_minors and must_exist
2438       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2439                "drbd minor %d of instance %s is not active", minor, iname)
2440     for minor in used_minors:
2441       test = minor not in node_drbd
2442       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2443                "unallocated drbd minor %d is in use", minor)
2444
2445   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2446     """Builds the node OS structures.
2447
2448     @type ninfo: L{objects.Node}
2449     @param ninfo: the node to check
2450     @param nresult: the remote results for the node
2451     @param nimg: the node image object
2452
2453     """
2454     node = ninfo.name
2455     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2456
2457     remote_os = nresult.get(constants.NV_OSLIST, None)
2458     test = (not isinstance(remote_os, list) or
2459             not compat.all(isinstance(v, list) and len(v) == 7
2460                            for v in remote_os))
2461
2462     _ErrorIf(test, constants.CV_ENODEOS, node,
2463              "node hasn't returned valid OS data")
2464
2465     nimg.os_fail = test
2466
2467     if test:
2468       return
2469
2470     os_dict = {}
2471
2472     for (name, os_path, status, diagnose,
2473          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2474
2475       if name not in os_dict:
2476         os_dict[name] = []
2477
2478       # parameters is a list of lists instead of list of tuples due to
2479       # JSON lacking a real tuple type, fix it:
2480       parameters = [tuple(v) for v in parameters]
2481       os_dict[name].append((os_path, status, diagnose,
2482                             set(variants), set(parameters), set(api_ver)))
2483
2484     nimg.oslist = os_dict
2485
2486   def _VerifyNodeOS(self, ninfo, nimg, base):
2487     """Verifies the node OS list.
2488
2489     @type ninfo: L{objects.Node}
2490     @param ninfo: the node to check
2491     @param nimg: the node image object
2492     @param base: the 'template' node we match against (e.g. from the master)
2493
2494     """
2495     node = ninfo.name
2496     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2497
2498     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2499
2500     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2501     for os_name, os_data in nimg.oslist.items():
2502       assert os_data, "Empty OS status for OS %s?!" % os_name
2503       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2504       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2505                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2506       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2507                "OS '%s' has multiple entries (first one shadows the rest): %s",
2508                os_name, utils.CommaJoin([v[0] for v in os_data]))
2509       # comparisons with the 'base' image
2510       test = os_name not in base.oslist
2511       _ErrorIf(test, constants.CV_ENODEOS, node,
2512                "Extra OS %s not present on reference node (%s)",
2513                os_name, base.name)
2514       if test:
2515         continue
2516       assert base.oslist[os_name], "Base node has empty OS status?"
2517       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2518       if not b_status:
2519         # base OS is invalid, skipping
2520         continue
2521       for kind, a, b in [("API version", f_api, b_api),
2522                          ("variants list", f_var, b_var),
2523                          ("parameters", beautify_params(f_param),
2524                           beautify_params(b_param))]:
2525         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2526                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2527                  kind, os_name, base.name,
2528                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2529
2530     # check any missing OSes
2531     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2532     _ErrorIf(missing, constants.CV_ENODEOS, node,
2533              "OSes present on reference node %s but missing on this node: %s",
2534              base.name, utils.CommaJoin(missing))
2535
2536   def _VerifyOob(self, ninfo, nresult):
2537     """Verifies out of band functionality of a node.
2538
2539     @type ninfo: L{objects.Node}
2540     @param ninfo: the node to check
2541     @param nresult: the remote results for the node
2542
2543     """
2544     node = ninfo.name
2545     # We just have to verify the paths on master and/or master candidates
2546     # as the oob helper is invoked on the master
2547     if ((ninfo.master_candidate or ninfo.master_capable) and
2548         constants.NV_OOB_PATHS in nresult):
2549       for path_result in nresult[constants.NV_OOB_PATHS]:
2550         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2551
2552   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2553     """Verifies and updates the node volume data.
2554
2555     This function will update a L{NodeImage}'s internal structures
2556     with data from the remote call.
2557
2558     @type ninfo: L{objects.Node}
2559     @param ninfo: the node to check
2560     @param nresult: the remote results for the node
2561     @param nimg: the node image object
2562     @param vg_name: the configured VG name
2563
2564     """
2565     node = ninfo.name
2566     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2567
2568     nimg.lvm_fail = True
2569     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2570     if vg_name is None:
2571       pass
2572     elif isinstance(lvdata, basestring):
2573       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2574                utils.SafeEncode(lvdata))
2575     elif not isinstance(lvdata, dict):
2576       _ErrorIf(True, constants.CV_ENODELVM, node,
2577                "rpc call to node failed (lvlist)")
2578     else:
2579       nimg.volumes = lvdata
2580       nimg.lvm_fail = False
2581
2582   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2583     """Verifies and updates the node instance list.
2584
2585     If the listing was successful, then updates this node's instance
2586     list. Otherwise, it marks the RPC call as failed for the instance
2587     list key.
2588
2589     @type ninfo: L{objects.Node}
2590     @param ninfo: the node to check
2591     @param nresult: the remote results for the node
2592     @param nimg: the node image object
2593
2594     """
2595     idata = nresult.get(constants.NV_INSTANCELIST, None)
2596     test = not isinstance(idata, list)
2597     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2598                   "rpc call to node failed (instancelist): %s",
2599                   utils.SafeEncode(str(idata)))
2600     if test:
2601       nimg.hyp_fail = True
2602     else:
2603       nimg.instances = idata
2604
2605   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2606     """Verifies and computes a node information map
2607
2608     @type ninfo: L{objects.Node}
2609     @param ninfo: the node to check
2610     @param nresult: the remote results for the node
2611     @param nimg: the node image object
2612     @param vg_name: the configured VG name
2613
2614     """
2615     node = ninfo.name
2616     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2617
2618     # try to read free memory (from the hypervisor)
2619     hv_info = nresult.get(constants.NV_HVINFO, None)
2620     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2621     _ErrorIf(test, constants.CV_ENODEHV, node,
2622              "rpc call to node failed (hvinfo)")
2623     if not test:
2624       try:
2625         nimg.mfree = int(hv_info["memory_free"])
2626       except (ValueError, TypeError):
2627         _ErrorIf(True, constants.CV_ENODERPC, node,
2628                  "node returned invalid nodeinfo, check hypervisor")
2629
2630     # FIXME: devise a free space model for file based instances as well
2631     if vg_name is not None:
2632       test = (constants.NV_VGLIST not in nresult or
2633               vg_name not in nresult[constants.NV_VGLIST])
2634       _ErrorIf(test, constants.CV_ENODELVM, node,
2635                "node didn't return data for the volume group '%s'"
2636                " - it is either missing or broken", vg_name)
2637       if not test:
2638         try:
2639           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2640         except (ValueError, TypeError):
2641           _ErrorIf(True, constants.CV_ENODERPC, node,
2642                    "node returned invalid LVM info, check LVM status")
2643
2644   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2645     """Gets per-disk status information for all instances.
2646
2647     @type nodelist: list of strings
2648     @param nodelist: Node names
2649     @type node_image: dict of (name, L{objects.Node})
2650     @param node_image: Node objects
2651     @type instanceinfo: dict of (name, L{objects.Instance})
2652     @param instanceinfo: Instance objects
2653     @rtype: {instance: {node: [(succes, payload)]}}
2654     @return: a dictionary of per-instance dictionaries with nodes as
2655         keys and disk information as values; the disk information is a
2656         list of tuples (success, payload)
2657
2658     """
2659     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2660
2661     node_disks = {}
2662     node_disks_devonly = {}
2663     diskless_instances = set()
2664     diskless = constants.DT_DISKLESS
2665
2666     for nname in nodelist:
2667       node_instances = list(itertools.chain(node_image[nname].pinst,
2668                                             node_image[nname].sinst))
2669       diskless_instances.update(inst for inst in node_instances
2670                                 if instanceinfo[inst].disk_template == diskless)
2671       disks = [(inst, disk)
2672                for inst in node_instances
2673                for disk in instanceinfo[inst].disks]
2674
2675       if not disks:
2676         # No need to collect data
2677         continue
2678
2679       node_disks[nname] = disks
2680
2681       # Creating copies as SetDiskID below will modify the objects and that can
2682       # lead to incorrect data returned from nodes
2683       devonly = [dev.Copy() for (_, dev) in disks]
2684
2685       for dev in devonly:
2686         self.cfg.SetDiskID(dev, nname)
2687
2688       node_disks_devonly[nname] = devonly
2689
2690     assert len(node_disks) == len(node_disks_devonly)
2691
2692     # Collect data from all nodes with disks
2693     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2694                                                           node_disks_devonly)
2695
2696     assert len(result) == len(node_disks)
2697
2698     instdisk = {}
2699
2700     for (nname, nres) in result.items():
2701       disks = node_disks[nname]
2702
2703       if nres.offline:
2704         # No data from this node
2705         data = len(disks) * [(False, "node offline")]
2706       else:
2707         msg = nres.fail_msg
2708         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2709                  "while getting disk information: %s", msg)
2710         if msg:
2711           # No data from this node
2712           data = len(disks) * [(False, msg)]
2713         else:
2714           data = []
2715           for idx, i in enumerate(nres.payload):
2716             if isinstance(i, (tuple, list)) and len(i) == 2:
2717               data.append(i)
2718             else:
2719               logging.warning("Invalid result from node %s, entry %d: %s",
2720                               nname, idx, i)
2721               data.append((False, "Invalid result from the remote node"))
2722
2723       for ((inst, _), status) in zip(disks, data):
2724         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2725
2726     # Add empty entries for diskless instances.
2727     for inst in diskless_instances:
2728       assert inst not in instdisk
2729       instdisk[inst] = {}
2730
2731     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2732                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2733                       compat.all(isinstance(s, (tuple, list)) and
2734                                  len(s) == 2 for s in statuses)
2735                       for inst, nnames in instdisk.items()
2736                       for nname, statuses in nnames.items())
2737     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2738
2739     return instdisk
2740
2741   @staticmethod
2742   def _SshNodeSelector(group_uuid, all_nodes):
2743     """Create endless iterators for all potential SSH check hosts.
2744
2745     """
2746     nodes = [node for node in all_nodes
2747              if (node.group != group_uuid and
2748                  not node.offline)]
2749     keyfunc = operator.attrgetter("group")
2750
2751     return map(itertools.cycle,
2752                [sorted(map(operator.attrgetter("name"), names))
2753                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2754                                                   keyfunc)])
2755
2756   @classmethod
2757   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2758     """Choose which nodes should talk to which other nodes.
2759
2760     We will make nodes contact all nodes in their group, and one node from
2761     every other group.
2762
2763     @warning: This algorithm has a known issue if one node group is much
2764       smaller than others (e.g. just one node). In such a case all other
2765       nodes will talk to the single node.
2766
2767     """
2768     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2769     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2770
2771     return (online_nodes,
2772             dict((name, sorted([i.next() for i in sel]))
2773                  for name in online_nodes))
2774
2775   def BuildHooksEnv(self):
2776     """Build hooks env.
2777
2778     Cluster-Verify hooks just ran in the post phase and their failure makes
2779     the output be logged in the verify output and the verification to fail.
2780
2781     """
2782     env = {
2783       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2784       }
2785
2786     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2787                for node in self.my_node_info.values())
2788
2789     return env
2790
2791   def BuildHooksNodes(self):
2792     """Build hooks nodes.
2793
2794     """
2795     return ([], self.my_node_names)
2796
2797   def Exec(self, feedback_fn):
2798     """Verify integrity of the node group, performing various test on nodes.
2799
2800     """
2801     # This method has too many local variables. pylint: disable=R0914
2802     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2803
2804     if not self.my_node_names:
2805       # empty node group
2806       feedback_fn("* Empty node group, skipping verification")
2807       return True
2808
2809     self.bad = False
2810     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2811     verbose = self.op.verbose
2812     self._feedback_fn = feedback_fn
2813
2814     vg_name = self.cfg.GetVGName()
2815     drbd_helper = self.cfg.GetDRBDHelper()
2816     cluster = self.cfg.GetClusterInfo()
2817     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2818     hypervisors = cluster.enabled_hypervisors
2819     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2820
2821     i_non_redundant = [] # Non redundant instances
2822     i_non_a_balanced = [] # Non auto-balanced instances
2823     i_offline = 0 # Count of offline instances
2824     n_offline = 0 # Count of offline nodes
2825     n_drained = 0 # Count of nodes being drained
2826     node_vol_should = {}
2827
2828     # FIXME: verify OS list
2829
2830     # File verification
2831     filemap = _ComputeAncillaryFiles(cluster, False)
2832
2833     # do local checksums
2834     master_node = self.master_node = self.cfg.GetMasterNode()
2835     master_ip = self.cfg.GetMasterIP()
2836
2837     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2838
2839     user_scripts = []
2840     if self.cfg.GetUseExternalMipScript():
2841       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2842
2843     node_verify_param = {
2844       constants.NV_FILELIST:
2845         utils.UniqueSequence(filename
2846                              for files in filemap
2847                              for filename in files),
2848       constants.NV_NODELIST:
2849         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2850                                   self.all_node_info.values()),
2851       constants.NV_HYPERVISOR: hypervisors,
2852       constants.NV_HVPARAMS:
2853         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2854       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2855                                  for node in node_data_list
2856                                  if not node.offline],
2857       constants.NV_INSTANCELIST: hypervisors,
2858       constants.NV_VERSION: None,
2859       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2860       constants.NV_NODESETUP: None,
2861       constants.NV_TIME: None,
2862       constants.NV_MASTERIP: (master_node, master_ip),
2863       constants.NV_OSLIST: None,
2864       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2865       constants.NV_USERSCRIPTS: user_scripts,
2866       }
2867
2868     if vg_name is not None:
2869       node_verify_param[constants.NV_VGLIST] = None
2870       node_verify_param[constants.NV_LVLIST] = vg_name
2871       node_verify_param[constants.NV_PVLIST] = [vg_name]
2872       node_verify_param[constants.NV_DRBDLIST] = None
2873
2874     if drbd_helper:
2875       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2876
2877     # bridge checks
2878     # FIXME: this needs to be changed per node-group, not cluster-wide
2879     bridges = set()
2880     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2881     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2882       bridges.add(default_nicpp[constants.NIC_LINK])
2883     for instance in self.my_inst_info.values():
2884       for nic in instance.nics:
2885         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2886         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2887           bridges.add(full_nic[constants.NIC_LINK])
2888
2889     if bridges:
2890       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2891
2892     # Build our expected cluster state
2893     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2894                                                  name=node.name,
2895                                                  vm_capable=node.vm_capable))
2896                       for node in node_data_list)
2897
2898     # Gather OOB paths
2899     oob_paths = []
2900     for node in self.all_node_info.values():
2901       path = _SupportsOob(self.cfg, node)
2902       if path and path not in oob_paths:
2903         oob_paths.append(path)
2904
2905     if oob_paths:
2906       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2907
2908     for instance in self.my_inst_names:
2909       inst_config = self.my_inst_info[instance]
2910
2911       for nname in inst_config.all_nodes:
2912         if nname not in node_image:
2913           gnode = self.NodeImage(name=nname)
2914           gnode.ghost = (nname not in self.all_node_info)
2915           node_image[nname] = gnode
2916
2917       inst_config.MapLVsByNode(node_vol_should)
2918
2919       pnode = inst_config.primary_node
2920       node_image[pnode].pinst.append(instance)
2921
2922       for snode in inst_config.secondary_nodes:
2923         nimg = node_image[snode]
2924         nimg.sinst.append(instance)
2925         if pnode not in nimg.sbp:
2926           nimg.sbp[pnode] = []
2927         nimg.sbp[pnode].append(instance)
2928
2929     # At this point, we have the in-memory data structures complete,
2930     # except for the runtime information, which we'll gather next
2931
2932     # Due to the way our RPC system works, exact response times cannot be
2933     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2934     # time before and after executing the request, we can at least have a time
2935     # window.
2936     nvinfo_starttime = time.time()
2937     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2938                                            node_verify_param,
2939                                            self.cfg.GetClusterName())
2940     nvinfo_endtime = time.time()
2941
2942     if self.extra_lv_nodes and vg_name is not None:
2943       extra_lv_nvinfo = \
2944           self.rpc.call_node_verify(self.extra_lv_nodes,
2945                                     {constants.NV_LVLIST: vg_name},
2946                                     self.cfg.GetClusterName())
2947     else:
2948       extra_lv_nvinfo = {}
2949
2950     all_drbd_map = self.cfg.ComputeDRBDMap()
2951
2952     feedback_fn("* Gathering disk information (%s nodes)" %
2953                 len(self.my_node_names))
2954     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2955                                      self.my_inst_info)
2956
2957     feedback_fn("* Verifying configuration file consistency")
2958
2959     # If not all nodes are being checked, we need to make sure the master node
2960     # and a non-checked vm_capable node are in the list.
2961     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2962     if absent_nodes:
2963       vf_nvinfo = all_nvinfo.copy()
2964       vf_node_info = list(self.my_node_info.values())
2965       additional_nodes = []
2966       if master_node not in self.my_node_info:
2967         additional_nodes.append(master_node)
2968         vf_node_info.append(self.all_node_info[master_node])
2969       # Add the first vm_capable node we find which is not included
2970       for node in absent_nodes:
2971         nodeinfo = self.all_node_info[node]
2972         if nodeinfo.vm_capable and not nodeinfo.offline:
2973           additional_nodes.append(node)
2974           vf_node_info.append(self.all_node_info[node])
2975           break
2976       key = constants.NV_FILELIST
2977       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2978                                                  {key: node_verify_param[key]},
2979                                                  self.cfg.GetClusterName()))
2980     else:
2981       vf_nvinfo = all_nvinfo
2982       vf_node_info = self.my_node_info.values()
2983
2984     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2985
2986     feedback_fn("* Verifying node status")
2987
2988     refos_img = None
2989
2990     for node_i in node_data_list:
2991       node = node_i.name
2992       nimg = node_image[node]
2993
2994       if node_i.offline:
2995         if verbose:
2996           feedback_fn("* Skipping offline node %s" % (node,))
2997         n_offline += 1
2998         continue
2999
3000       if node == master_node:
3001         ntype = "master"
3002       elif node_i.master_candidate:
3003         ntype = "master candidate"
3004       elif node_i.drained:
3005         ntype = "drained"
3006         n_drained += 1
3007       else:
3008         ntype = "regular"
3009       if verbose:
3010         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3011
3012       msg = all_nvinfo[node].fail_msg
3013       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3014                msg)
3015       if msg:
3016         nimg.rpc_fail = True
3017         continue
3018
3019       nresult = all_nvinfo[node].payload
3020
3021       nimg.call_ok = self._VerifyNode(node_i, nresult)
3022       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3023       self._VerifyNodeNetwork(node_i, nresult)
3024       self._VerifyNodeUserScripts(node_i, nresult)
3025       self._VerifyOob(node_i, nresult)
3026
3027       if nimg.vm_capable:
3028         self._VerifyNodeLVM(node_i, nresult, vg_name)
3029         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3030                              all_drbd_map)
3031
3032         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3033         self._UpdateNodeInstances(node_i, nresult, nimg)
3034         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3035         self._UpdateNodeOS(node_i, nresult, nimg)
3036
3037         if not nimg.os_fail:
3038           if refos_img is None:
3039             refos_img = nimg
3040           self._VerifyNodeOS(node_i, nimg, refos_img)
3041         self._VerifyNodeBridges(node_i, nresult, bridges)
3042
3043         # Check whether all running instancies are primary for the node. (This
3044         # can no longer be done from _VerifyInstance below, since some of the
3045         # wrong instances could be from other node groups.)
3046         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3047
3048         for inst in non_primary_inst:
3049           # FIXME: investigate best way to handle offline insts
3050           if inst.admin_state == constants.ADMINST_OFFLINE:
3051             if verbose:
3052               feedback_fn("* Skipping offline instance %s" % inst.name)
3053             i_offline += 1
3054             continue
3055           test = inst in self.all_inst_info
3056           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3057                    "instance should not run on node %s", node_i.name)
3058           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3059                    "node is running unknown instance %s", inst)
3060
3061     for node, result in extra_lv_nvinfo.items():
3062       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3063                               node_image[node], vg_name)
3064
3065     feedback_fn("* Verifying instance status")
3066     for instance in self.my_inst_names:
3067       if verbose:
3068         feedback_fn("* Verifying instance %s" % instance)
3069       inst_config = self.my_inst_info[instance]
3070       self._VerifyInstance(instance, inst_config, node_image,
3071                            instdisk[instance])
3072       inst_nodes_offline = []
3073
3074       pnode = inst_config.primary_node
3075       pnode_img = node_image[pnode]
3076       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3077                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3078                " primary node failed", instance)
3079
3080       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3081                pnode_img.offline,
3082                constants.CV_EINSTANCEBADNODE, instance,
3083                "instance is marked as running and lives on offline node %s",
3084                inst_config.primary_node)
3085
3086       # If the instance is non-redundant we cannot survive losing its primary
3087       # node, so we are not N+1 compliant. On the other hand we have no disk
3088       # templates with more than one secondary so that situation is not well
3089       # supported either.
3090       # FIXME: does not support file-backed instances
3091       if not inst_config.secondary_nodes:
3092         i_non_redundant.append(instance)
3093
3094       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3095                constants.CV_EINSTANCELAYOUT,
3096                instance, "instance has multiple secondary nodes: %s",
3097                utils.CommaJoin(inst_config.secondary_nodes),
3098                code=self.ETYPE_WARNING)
3099
3100       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3101         pnode = inst_config.primary_node
3102         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3103         instance_groups = {}
3104
3105         for node in instance_nodes:
3106           instance_groups.setdefault(self.all_node_info[node].group,
3107                                      []).append(node)
3108
3109         pretty_list = [
3110           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3111           # Sort so that we always list the primary node first.
3112           for group, nodes in sorted(instance_groups.items(),
3113                                      key=lambda (_, nodes): pnode in nodes,
3114                                      reverse=True)]
3115
3116         self._ErrorIf(len(instance_groups) > 1,
3117                       constants.CV_EINSTANCESPLITGROUPS,
3118                       instance, "instance has primary and secondary nodes in"
3119                       " different groups: %s", utils.CommaJoin(pretty_list),
3120                       code=self.ETYPE_WARNING)
3121
3122       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3123         i_non_a_balanced.append(instance)
3124
3125       for snode in inst_config.secondary_nodes:
3126         s_img = node_image[snode]
3127         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3128                  snode, "instance %s, connection to secondary node failed",
3129                  instance)
3130
3131         if s_img.offline:
3132           inst_nodes_offline.append(snode)
3133
3134       # warn that the instance lives on offline nodes
3135       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3136                "instance has offline secondary node(s) %s",
3137                utils.CommaJoin(inst_nodes_offline))
3138       # ... or ghost/non-vm_capable nodes
3139       for node in inst_config.all_nodes:
3140         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3141                  instance, "instance lives on ghost node %s", node)
3142         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3143                  instance, "instance lives on non-vm_capable node %s", node)
3144
3145     feedback_fn("* Verifying orphan volumes")
3146     reserved = utils.FieldSet(*cluster.reserved_lvs)
3147
3148     # We will get spurious "unknown volume" warnings if any node of this group
3149     # is secondary for an instance whose primary is in another group. To avoid
3150     # them, we find these instances and add their volumes to node_vol_should.
3151     for inst in self.all_inst_info.values():
3152       for secondary in inst.secondary_nodes:
3153         if (secondary in self.my_node_info
3154             and inst.name not in self.my_inst_info):
3155           inst.MapLVsByNode(node_vol_should)
3156           break
3157
3158     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3159
3160     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3161       feedback_fn("* Verifying N+1 Memory redundancy")
3162       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3163
3164     feedback_fn("* Other Notes")
3165     if i_non_redundant:
3166       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3167                   % len(i_non_redundant))
3168
3169     if i_non_a_balanced:
3170       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3171                   % len(i_non_a_balanced))
3172
3173     if i_offline:
3174       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3175
3176     if n_offline:
3177       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3178
3179     if n_drained:
3180       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3181
3182     return not self.bad
3183
3184   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3185     """Analyze the post-hooks' result
3186
3187     This method analyses the hook result, handles it, and sends some
3188     nicely-formatted feedback back to the user.
3189
3190     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3191         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3192     @param hooks_results: the results of the multi-node hooks rpc call
3193     @param feedback_fn: function used send feedback back to the caller
3194     @param lu_result: previous Exec result
3195     @return: the new Exec result, based on the previous result
3196         and hook results
3197
3198     """
3199     # We only really run POST phase hooks, only for non-empty groups,
3200     # and are only interested in their results
3201     if not self.my_node_names:
3202       # empty node group
3203       pass
3204     elif phase == constants.HOOKS_PHASE_POST:
3205       # Used to change hooks' output to proper indentation
3206       feedback_fn("* Hooks Results")
3207       assert hooks_results, "invalid result from hooks"
3208
3209       for node_name in hooks_results:
3210         res = hooks_results[node_name]
3211         msg = res.fail_msg
3212         test = msg and not res.offline
3213         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3214                       "Communication failure in hooks execution: %s", msg)
3215         if res.offline or msg:
3216           # No need to investigate payload if node is offline or gave
3217           # an error.
3218           continue
3219         for script, hkr, output in res.payload:
3220           test = hkr == constants.HKR_FAIL
3221           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3222                         "Script %s failed, output:", script)
3223           if test:
3224             output = self._HOOKS_INDENT_RE.sub("      ", output)
3225             feedback_fn("%s" % output)
3226             lu_result = False
3227
3228     return lu_result
3229
3230
3231 class LUClusterVerifyDisks(NoHooksLU):
3232   """Verifies the cluster disks status.
3233
3234   """
3235   REQ_BGL = False
3236
3237   def ExpandNames(self):
3238     self.share_locks = _ShareAll()
3239     self.needed_locks = {
3240       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3241       }
3242
3243   def Exec(self, feedback_fn):
3244     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3245
3246     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3247     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3248                            for group in group_names])
3249
3250
3251 class LUGroupVerifyDisks(NoHooksLU):
3252   """Verifies the status of all disks in a node group.
3253
3254   """
3255   REQ_BGL = False
3256
3257   def ExpandNames(self):
3258     # Raises errors.OpPrereqError on its own if group can't be found
3259     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3260
3261     self.share_locks = _ShareAll()
3262     self.needed_locks = {
3263       locking.LEVEL_INSTANCE: [],
3264       locking.LEVEL_NODEGROUP: [],
3265       locking.LEVEL_NODE: [],
3266       }
3267
3268   def DeclareLocks(self, level):
3269     if level == locking.LEVEL_INSTANCE:
3270       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3271
3272       # Lock instances optimistically, needs verification once node and group
3273       # locks have been acquired
3274       self.needed_locks[locking.LEVEL_INSTANCE] = \
3275         self.cfg.GetNodeGroupInstances(self.group_uuid)
3276
3277     elif level == locking.LEVEL_NODEGROUP:
3278       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3279
3280       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3281         set([self.group_uuid] +
3282             # Lock all groups used by instances optimistically; this requires
3283             # going via the node before it's locked, requiring verification
3284             # later on
3285             [group_uuid
3286              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3287              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3288
3289     elif level == locking.LEVEL_NODE:
3290       # This will only lock the nodes in the group to be verified which contain
3291       # actual instances
3292       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3293       self._LockInstancesNodes()
3294
3295       # Lock all nodes in group to be verified
3296       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3297       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3298       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3299
3300   def CheckPrereq(self):
3301     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3302     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3303     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3304
3305     assert self.group_uuid in owned_groups
3306
3307     # Check if locked instances are still correct
3308     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3309
3310     # Get instance information
3311     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3312
3313     # Check if node groups for locked instances are still correct
3314     for (instance_name, inst) in self.instances.items():
3315       assert owned_nodes.issuperset(inst.all_nodes), \
3316         "Instance %s's nodes changed while we kept the lock" % instance_name
3317
3318       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3319                                              owned_groups)
3320
3321       assert self.group_uuid in inst_groups, \
3322         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3323
3324   def Exec(self, feedback_fn):
3325     """Verify integrity of cluster disks.
3326
3327     @rtype: tuple of three items
3328     @return: a tuple of (dict of node-to-node_error, list of instances
3329         which need activate-disks, dict of instance: (node, volume) for
3330         missing volumes
3331
3332     """
3333     res_nodes = {}
3334     res_instances = set()
3335     res_missing = {}
3336
3337     nv_dict = _MapInstanceDisksToNodes([inst
3338             for inst in self.instances.values()
3339             if inst.admin_state == constants.ADMINST_UP])
3340
3341     if nv_dict:
3342       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3343                              set(self.cfg.GetVmCapableNodeList()))
3344
3345       node_lvs = self.rpc.call_lv_list(nodes, [])
3346
3347       for (node, node_res) in node_lvs.items():
3348         if node_res.offline:
3349           continue
3350
3351         msg = node_res.fail_msg
3352         if msg:
3353           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3354           res_nodes[node] = msg
3355           continue
3356
3357         for lv_name, (_, _, lv_online) in node_res.payload.items():
3358           inst = nv_dict.pop((node, lv_name), None)
3359           if not (lv_online or inst is None):
3360             res_instances.add(inst)
3361
3362       # any leftover items in nv_dict are missing LVs, let's arrange the data
3363       # better
3364       for key, inst in nv_dict.iteritems():
3365         res_missing.setdefault(inst, []).append(list(key))
3366
3367     return (res_nodes, list(res_instances), res_missing)
3368
3369
3370 class LUClusterRepairDiskSizes(NoHooksLU):
3371   """Verifies the cluster disks sizes.
3372
3373   """
3374   REQ_BGL = False
3375
3376   def ExpandNames(self):
3377     if self.op.instances:
3378       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3379       self.needed_locks = {
3380         locking.LEVEL_NODE_RES: [],
3381         locking.LEVEL_INSTANCE: self.wanted_names,
3382         }
3383       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3384     else:
3385       self.wanted_names = None
3386       self.needed_locks = {
3387         locking.LEVEL_NODE_RES: locking.ALL_SET,
3388         locking.LEVEL_INSTANCE: locking.ALL_SET,
3389         }
3390     self.share_locks = {
3391       locking.LEVEL_NODE_RES: 1,
3392       locking.LEVEL_INSTANCE: 0,
3393       }
3394
3395   def DeclareLocks(self, level):
3396     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3397       self._LockInstancesNodes(primary_only=True, level=level)
3398
3399   def CheckPrereq(self):
3400     """Check prerequisites.
3401
3402     This only checks the optional instance list against the existing names.
3403
3404     """
3405     if self.wanted_names is None:
3406       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3407
3408     self.wanted_instances = \
3409         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3410
3411   def _EnsureChildSizes(self, disk):
3412     """Ensure children of the disk have the needed disk size.
3413
3414     This is valid mainly for DRBD8 and fixes an issue where the
3415     children have smaller disk size.
3416
3417     @param disk: an L{ganeti.objects.Disk} object
3418
3419     """
3420     if disk.dev_type == constants.LD_DRBD8:
3421       assert disk.children, "Empty children for DRBD8?"
3422       fchild = disk.children[0]
3423       mismatch = fchild.size < disk.size
3424       if mismatch:
3425         self.LogInfo("Child disk has size %d, parent %d, fixing",
3426                      fchild.size, disk.size)
3427         fchild.size = disk.size
3428
3429       # and we recurse on this child only, not on the metadev
3430       return self._EnsureChildSizes(fchild) or mismatch
3431     else:
3432       return False
3433
3434   def Exec(self, feedback_fn):
3435     """Verify the size of cluster disks.
3436
3437     """
3438     # TODO: check child disks too
3439     # TODO: check differences in size between primary/secondary nodes
3440     per_node_disks = {}
3441     for instance in self.wanted_instances:
3442       pnode = instance.primary_node
3443       if pnode not in per_node_disks:
3444         per_node_disks[pnode] = []
3445       for idx, disk in enumerate(instance.disks):
3446         per_node_disks[pnode].append((instance, idx, disk))
3447
3448     assert not (frozenset(per_node_disks.keys()) -
3449                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3450       "Not owning correct locks"
3451     assert not self.owned_locks(locking.LEVEL_NODE)
3452
3453     changed = []
3454     for node, dskl in per_node_disks.items():
3455       newl = [v[2].Copy() for v in dskl]
3456       for dsk in newl:
3457         self.cfg.SetDiskID(dsk, node)
3458       result = self.rpc.call_blockdev_getsize(node, newl)
3459       if result.fail_msg:
3460         self.LogWarning("Failure in blockdev_getsize call to node"
3461                         " %s, ignoring", node)
3462         continue
3463       if len(result.payload) != len(dskl):
3464         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3465                         " result.payload=%s", node, len(dskl), result.payload)
3466         self.LogWarning("Invalid result from node %s, ignoring node results",
3467                         node)
3468         continue
3469       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3470         if size is None:
3471           self.LogWarning("Disk %d of instance %s did not return size"
3472                           " information, ignoring", idx, instance.name)
3473           continue
3474         if not isinstance(size, (int, long)):
3475           self.LogWarning("Disk %d of instance %s did not return valid"
3476                           " size information, ignoring", idx, instance.name)
3477           continue
3478         size = size >> 20
3479         if size != disk.size:
3480           self.LogInfo("Disk %d of instance %s has mismatched size,"
3481                        " correcting: recorded %d, actual %d", idx,
3482                        instance.name, disk.size, size)
3483           disk.size = size
3484           self.cfg.Update(instance, feedback_fn)
3485           changed.append((instance.name, idx, size))
3486         if self._EnsureChildSizes(disk):
3487           self.cfg.Update(instance, feedback_fn)
3488           changed.append((instance.name, idx, disk.size))
3489     return changed
3490
3491
3492 class LUClusterRename(LogicalUnit):
3493   """Rename the cluster.
3494
3495   """
3496   HPATH = "cluster-rename"
3497   HTYPE = constants.HTYPE_CLUSTER
3498
3499   def BuildHooksEnv(self):
3500     """Build hooks env.
3501
3502     """
3503     return {
3504       "OP_TARGET": self.cfg.GetClusterName(),
3505       "NEW_NAME": self.op.name,
3506       }
3507
3508   def BuildHooksNodes(self):
3509     """Build hooks nodes.
3510
3511     """
3512     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3513
3514   def CheckPrereq(self):
3515     """Verify that the passed name is a valid one.
3516
3517     """
3518     hostname = netutils.GetHostname(name=self.op.name,
3519                                     family=self.cfg.GetPrimaryIPFamily())
3520
3521     new_name = hostname.name
3522     self.ip = new_ip = hostname.ip
3523     old_name = self.cfg.GetClusterName()
3524     old_ip = self.cfg.GetMasterIP()
3525     if new_name == old_name and new_ip == old_ip:
3526       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3527                                  " cluster has changed",
3528                                  errors.ECODE_INVAL)
3529     if new_ip != old_ip:
3530       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3531         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3532                                    " reachable on the network" %
3533                                    new_ip, errors.ECODE_NOTUNIQUE)
3534
3535     self.op.name = new_name
3536
3537   def Exec(self, feedback_fn):
3538     """Rename the cluster.
3539
3540     """
3541     clustername = self.op.name
3542     new_ip = self.ip
3543
3544     # shutdown the master IP
3545     master_params = self.cfg.GetMasterNetworkParameters()
3546     ems = self.cfg.GetUseExternalMipScript()
3547     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3548                                                      master_params, ems)
3549     result.Raise("Could not disable the master role")
3550
3551     try:
3552       cluster = self.cfg.GetClusterInfo()
3553       cluster.cluster_name = clustername
3554       cluster.master_ip = new_ip
3555       self.cfg.Update(cluster, feedback_fn)
3556
3557       # update the known hosts file
3558       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3559       node_list = self.cfg.GetOnlineNodeList()
3560       try:
3561         node_list.remove(master_params.name)
3562       except ValueError:
3563         pass
3564       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3565     finally:
3566       master_params.ip = new_ip
3567       result = self.rpc.call_node_activate_master_ip(master_params.name,
3568                                                      master_params, ems)
3569       msg = result.fail_msg
3570       if msg:
3571         self.LogWarning("Could not re-enable the master role on"
3572                         " the master, please restart manually: %s", msg)
3573
3574     return clustername
3575
3576
3577 def _ValidateNetmask(cfg, netmask):
3578   """Checks if a netmask is valid.
3579
3580   @type cfg: L{config.ConfigWriter}
3581   @param cfg: The cluster configuration
3582   @type netmask: int
3583   @param netmask: the netmask to be verified
3584   @raise errors.OpPrereqError: if the validation fails
3585
3586   """
3587   ip_family = cfg.GetPrimaryIPFamily()
3588   try:
3589     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3590   except errors.ProgrammerError:
3591     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3592                                ip_family)
3593   if not ipcls.ValidateNetmask(netmask):
3594     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3595                                 (netmask))
3596
3597
3598 class LUClusterSetParams(LogicalUnit):
3599   """Change the parameters of the cluster.
3600
3601   """
3602   HPATH = "cluster-modify"
3603   HTYPE = constants.HTYPE_CLUSTER
3604   REQ_BGL = False
3605
3606   def CheckArguments(self):
3607     """Check parameters
3608
3609     """
3610     if self.op.uid_pool:
3611       uidpool.CheckUidPool(self.op.uid_pool)
3612
3613     if self.op.add_uids:
3614       uidpool.CheckUidPool(self.op.add_uids)
3615
3616     if self.op.remove_uids:
3617       uidpool.CheckUidPool(self.op.remove_uids)
3618
3619     if self.op.master_netmask is not None:
3620       _ValidateNetmask(self.cfg, self.op.master_netmask)
3621
3622     if self.op.diskparams:
3623       for dt_params in self.op.diskparams.values():
3624         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3625
3626   def ExpandNames(self):
3627     # FIXME: in the future maybe other cluster params won't require checking on
3628     # all nodes to be modified.
3629     self.needed_locks = {
3630       locking.LEVEL_NODE: locking.ALL_SET,
3631     }
3632     self.share_locks[locking.LEVEL_NODE] = 1
3633
3634   def BuildHooksEnv(self):
3635     """Build hooks env.
3636
3637     """
3638     return {
3639       "OP_TARGET": self.cfg.GetClusterName(),
3640       "NEW_VG_NAME": self.op.vg_name,
3641       }
3642
3643   def BuildHooksNodes(self):
3644     """Build hooks nodes.
3645
3646     """
3647     mn = self.cfg.GetMasterNode()
3648     return ([mn], [mn])
3649
3650   def CheckPrereq(self):
3651     """Check prerequisites.
3652
3653     This checks whether the given params don't conflict and
3654     if the given volume group is valid.
3655
3656     """
3657     if self.op.vg_name is not None and not self.op.vg_name:
3658       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3659         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3660                                    " instances exist", errors.ECODE_INVAL)
3661
3662     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3663       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3664         raise errors.OpPrereqError("Cannot disable drbd helper while"
3665                                    " drbd-based instances exist",
3666                                    errors.ECODE_INVAL)
3667
3668     node_list = self.owned_locks(locking.LEVEL_NODE)
3669
3670     # if vg_name not None, checks given volume group on all nodes
3671     if self.op.vg_name:
3672       vglist = self.rpc.call_vg_list(node_list)
3673       for node in node_list:
3674         msg = vglist[node].fail_msg
3675         if msg:
3676           # ignoring down node
3677           self.LogWarning("Error while gathering data on node %s"
3678                           " (ignoring node): %s", node, msg)
3679           continue
3680         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3681                                               self.op.vg_name,
3682                                               constants.MIN_VG_SIZE)
3683         if vgstatus:
3684           raise errors.OpPrereqError("Error on node '%s': %s" %
3685                                      (node, vgstatus), errors.ECODE_ENVIRON)
3686
3687     if self.op.drbd_helper:
3688       # checks given drbd helper on all nodes
3689       helpers = self.rpc.call_drbd_helper(node_list)
3690       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3691         if ninfo.offline:
3692           self.LogInfo("Not checking drbd helper on offline node %s", node)
3693           continue
3694         msg = helpers[node].fail_msg
3695         if msg:
3696           raise errors.OpPrereqError("Error checking drbd helper on node"
3697                                      " '%s': %s" % (node, msg),
3698                                      errors.ECODE_ENVIRON)
3699         node_helper = helpers[node].payload
3700         if node_helper != self.op.drbd_helper:
3701           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3702                                      (node, node_helper), errors.ECODE_ENVIRON)
3703
3704     self.cluster = cluster = self.cfg.GetClusterInfo()
3705     # validate params changes
3706     if self.op.beparams:
3707       objects.UpgradeBeParams(self.op.beparams)
3708       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3709       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3710
3711     if self.op.ndparams:
3712       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3713       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3714
3715       # TODO: we need a more general way to handle resetting
3716       # cluster-level parameters to default values
3717       if self.new_ndparams["oob_program"] == "":
3718         self.new_ndparams["oob_program"] = \
3719             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3720
3721     if self.op.hv_state:
3722       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3723                                             self.cluster.hv_state_static)
3724       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3725                                for hv, values in new_hv_state.items())
3726
3727     if self.op.disk_state:
3728       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3729                                                 self.cluster.disk_state_static)
3730       self.new_disk_state = \
3731         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3732                             for name, values in svalues.items()))
3733              for storage, svalues in new_disk_state.items())
3734
3735     if self.op.ipolicy:
3736       ipolicy = {}
3737       for key, value in self.op.ipolicy.items():
3738         utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3739         ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3740                                           value)
3741       objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3742       self.new_ipolicy = ipolicy
3743
3744     if self.op.nicparams:
3745       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3746       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3747       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3748       nic_errors = []
3749
3750       # check all instances for consistency
3751       for instance in self.cfg.GetAllInstancesInfo().values():
3752         for nic_idx, nic in enumerate(instance.nics):
3753           params_copy = copy.deepcopy(nic.nicparams)
3754           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3755
3756           # check parameter syntax
3757           try:
3758             objects.NIC.CheckParameterSyntax(params_filled)
3759           except errors.ConfigurationError, err:
3760             nic_errors.append("Instance %s, nic/%d: %s" %
3761                               (instance.name, nic_idx, err))
3762
3763           # if we're moving instances to routed, check that they have an ip
3764           target_mode = params_filled[constants.NIC_MODE]
3765           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3766             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3767                               " address" % (instance.name, nic_idx))
3768       if nic_errors:
3769         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3770                                    "\n".join(nic_errors))
3771
3772     # hypervisor list/parameters
3773     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3774     if self.op.hvparams:
3775       for hv_name, hv_dict in self.op.hvparams.items():
3776         if hv_name not in self.new_hvparams:
3777           self.new_hvparams[hv_name] = hv_dict
3778         else:
3779           self.new_hvparams[hv_name].update(hv_dict)
3780
3781     # disk template parameters
3782     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3783     if self.op.diskparams:
3784       for dt_name, dt_params in self.op.diskparams.items():
3785         if dt_name not in self.op.diskparams:
3786           self.new_diskparams[dt_name] = dt_params
3787         else:
3788           self.new_diskparams[dt_name].update(dt_params)
3789
3790     # os hypervisor parameters
3791     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3792     if self.op.os_hvp:
3793       for os_name, hvs in self.op.os_hvp.items():
3794         if os_name not in self.new_os_hvp:
3795           self.new_os_hvp[os_name] = hvs
3796         else:
3797           for hv_name, hv_dict in hvs.items():
3798             if hv_name not in self.new_os_hvp[os_name]:
3799               self.new_os_hvp[os_name][hv_name] = hv_dict
3800             else:
3801               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3802
3803     # os parameters
3804     self.new_osp = objects.FillDict(cluster.osparams, {})
3805     if self.op.osparams:
3806       for os_name, osp in self.op.osparams.items():
3807         if os_name not in self.new_osp:
3808           self.new_osp[os_name] = {}
3809
3810         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3811                                                   use_none=True)
3812
3813         if not self.new_osp[os_name]:
3814           # we removed all parameters
3815           del self.new_osp[os_name]
3816         else:
3817           # check the parameter validity (remote check)
3818           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3819                          os_name, self.new_osp[os_name])
3820
3821     # changes to the hypervisor list
3822     if self.op.enabled_hypervisors is not None:
3823       self.hv_list = self.op.enabled_hypervisors
3824       for hv in self.hv_list:
3825         # if the hypervisor doesn't already exist in the cluster
3826         # hvparams, we initialize it to empty, and then (in both
3827         # cases) we make sure to fill the defaults, as we might not
3828         # have a complete defaults list if the hypervisor wasn't
3829         # enabled before
3830         if hv not in new_hvp:
3831           new_hvp[hv] = {}
3832         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3833         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3834     else:
3835       self.hv_list = cluster.enabled_hypervisors
3836
3837     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3838       # either the enabled list has changed, or the parameters have, validate
3839       for hv_name, hv_params in self.new_hvparams.items():
3840         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3841             (self.op.enabled_hypervisors and
3842              hv_name in self.op.enabled_hypervisors)):
3843           # either this is a new hypervisor, or its parameters have changed
3844           hv_class = hypervisor.GetHypervisor(hv_name)
3845           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3846           hv_class.CheckParameterSyntax(hv_params)
3847           _CheckHVParams(self, node_list, hv_name, hv_params)
3848
3849     if self.op.os_hvp:
3850       # no need to check any newly-enabled hypervisors, since the
3851       # defaults have already been checked in the above code-block
3852       for os_name, os_hvp in self.new_os_hvp.items():
3853         for hv_name, hv_params in os_hvp.items():
3854           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3855           # we need to fill in the new os_hvp on top of the actual hv_p
3856           cluster_defaults = self.new_hvparams.get(hv_name, {})
3857           new_osp = objects.FillDict(cluster_defaults, hv_params)
3858           hv_class = hypervisor.GetHypervisor(hv_name)
3859           hv_class.CheckParameterSyntax(new_osp)
3860           _CheckHVParams(self, node_list, hv_name, new_osp)
3861
3862     if self.op.default_iallocator:
3863       alloc_script = utils.FindFile(self.op.default_iallocator,
3864                                     constants.IALLOCATOR_SEARCH_PATH,
3865                                     os.path.isfile)
3866       if alloc_script is None:
3867         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3868                                    " specified" % self.op.default_iallocator,
3869                                    errors.ECODE_INVAL)
3870
3871   def Exec(self, feedback_fn):
3872     """Change the parameters of the cluster.
3873
3874     """
3875     if self.op.vg_name is not None:
3876       new_volume = self.op.vg_name
3877       if not new_volume:
3878         new_volume = None
3879       if new_volume != self.cfg.GetVGName():
3880         self.cfg.SetVGName(new_volume)
3881       else:
3882         feedback_fn("Cluster LVM configuration already in desired"
3883                     " state, not changing")
3884     if self.op.drbd_helper is not None:
3885       new_helper = self.op.drbd_helper
3886       if not new_helper:
3887         new_helper = None
3888       if new_helper != self.cfg.GetDRBDHelper():
3889         self.cfg.SetDRBDHelper(new_helper)
3890       else:
3891         feedback_fn("Cluster DRBD helper already in desired state,"
3892                     " not changing")
3893     if self.op.hvparams:
3894       self.cluster.hvparams = self.new_hvparams
3895     if self.op.os_hvp:
3896       self.cluster.os_hvp = self.new_os_hvp
3897     if self.op.enabled_hypervisors is not None:
3898       self.cluster.hvparams = self.new_hvparams
3899       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3900     if self.op.beparams:
3901       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3902     if self.op.nicparams:
3903       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3904     if self.op.ipolicy:
3905       self.cluster.ipolicy = self.new_ipolicy
3906     if self.op.osparams:
3907       self.cluster.osparams = self.new_osp
3908     if self.op.ndparams:
3909       self.cluster.ndparams = self.new_ndparams
3910     if self.op.diskparams:
3911       self.cluster.diskparams = self.new_diskparams
3912     if self.op.hv_state:
3913       self.cluster.hv_state_static = self.new_hv_state
3914     if self.op.disk_state:
3915       self.cluster.disk_state_static = self.new_disk_state
3916
3917     if self.op.candidate_pool_size is not None:
3918       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3919       # we need to update the pool size here, otherwise the save will fail
3920       _AdjustCandidatePool(self, [])
3921
3922     if self.op.maintain_node_health is not None:
3923       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3924         feedback_fn("Note: CONFD was disabled at build time, node health"
3925                     " maintenance is not useful (still enabling it)")
3926       self.cluster.maintain_node_health = self.op.maintain_node_health
3927
3928     if self.op.prealloc_wipe_disks is not None:
3929       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3930
3931     if self.op.add_uids is not None:
3932       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3933
3934     if self.op.remove_uids is not None:
3935       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3936
3937     if self.op.uid_pool is not None:
3938       self.cluster.uid_pool = self.op.uid_pool
3939
3940     if self.op.default_iallocator is not None:
3941       self.cluster.default_iallocator = self.op.default_iallocator
3942
3943     if self.op.reserved_lvs is not None:
3944       self.cluster.reserved_lvs = self.op.reserved_lvs
3945
3946     if self.op.use_external_mip_script is not None:
3947       self.cluster.use_external_mip_script = self.op.use_external_mip_script
3948
3949     def helper_os(aname, mods, desc):
3950       desc += " OS list"
3951       lst = getattr(self.cluster, aname)
3952       for key, val in mods:
3953         if key == constants.DDM_ADD:
3954           if val in lst:
3955             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3956           else:
3957             lst.append(val)
3958         elif key == constants.DDM_REMOVE:
3959           if val in lst:
3960             lst.remove(val)
3961           else:
3962             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3963         else:
3964           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3965
3966     if self.op.hidden_os:
3967       helper_os("hidden_os", self.op.hidden_os, "hidden")
3968
3969     if self.op.blacklisted_os:
3970       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3971
3972     if self.op.master_netdev:
3973       master_params = self.cfg.GetMasterNetworkParameters()
3974       ems = self.cfg.GetUseExternalMipScript()
3975       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3976                   self.cluster.master_netdev)
3977       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3978                                                        master_params, ems)
3979       result.Raise("Could not disable the master ip")
3980       feedback_fn("Changing master_netdev from %s to %s" %
3981                   (master_params.netdev, self.op.master_netdev))
3982       self.cluster.master_netdev = self.op.master_netdev
3983
3984     if self.op.master_netmask:
3985       master_params = self.cfg.GetMasterNetworkParameters()
3986       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3987       result = self.rpc.call_node_change_master_netmask(master_params.name,
3988                                                         master_params.netmask,
3989                                                         self.op.master_netmask,
3990                                                         master_params.ip,
3991                                                         master_params.netdev)
3992       if result.fail_msg:
3993         msg = "Could not change the master IP netmask: %s" % result.fail_msg
3994         feedback_fn(msg)
3995
3996       self.cluster.master_netmask = self.op.master_netmask
3997
3998     self.cfg.Update(self.cluster, feedback_fn)
3999
4000     if self.op.master_netdev:
4001       master_params = self.cfg.GetMasterNetworkParameters()
4002       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4003                   self.op.master_netdev)
4004       ems = self.cfg.GetUseExternalMipScript()
4005       result = self.rpc.call_node_activate_master_ip(master_params.name,
4006                                                      master_params, ems)
4007       if result.fail_msg:
4008         self.LogWarning("Could not re-enable the master ip on"
4009                         " the master, please restart manually: %s",
4010                         result.fail_msg)
4011
4012
4013 def _UploadHelper(lu, nodes, fname):
4014   """Helper for uploading a file and showing warnings.
4015
4016   """
4017   if os.path.exists(fname):
4018     result = lu.rpc.call_upload_file(nodes, fname)
4019     for to_node, to_result in result.items():
4020       msg = to_result.fail_msg
4021       if msg:
4022         msg = ("Copy of file %s to node %s failed: %s" %
4023                (fname, to_node, msg))
4024         lu.proc.LogWarning(msg)
4025
4026
4027 def _ComputeAncillaryFiles(cluster, redist):
4028   """Compute files external to Ganeti which need to be consistent.
4029
4030   @type redist: boolean
4031   @param redist: Whether to include files which need to be redistributed
4032
4033   """
4034   # Compute files for all nodes
4035   files_all = set([
4036     constants.SSH_KNOWN_HOSTS_FILE,
4037     constants.CONFD_HMAC_KEY,
4038     constants.CLUSTER_DOMAIN_SECRET_FILE,
4039     constants.SPICE_CERT_FILE,
4040     constants.SPICE_CACERT_FILE,
4041     constants.RAPI_USERS_FILE,
4042     ])
4043
4044   if not redist:
4045     files_all.update(constants.ALL_CERT_FILES)
4046     files_all.update(ssconf.SimpleStore().GetFileList())
4047   else:
4048     # we need to ship at least the RAPI certificate
4049     files_all.add(constants.RAPI_CERT_FILE)
4050
4051   if cluster.modify_etc_hosts:
4052     files_all.add(constants.ETC_HOSTS)
4053
4054   # Files which are optional, these must:
4055   # - be present in one other category as well
4056   # - either exist or not exist on all nodes of that category (mc, vm all)
4057   files_opt = set([
4058     constants.RAPI_USERS_FILE,
4059     ])
4060
4061   # Files which should only be on master candidates
4062   files_mc = set()
4063
4064   if not redist:
4065     files_mc.add(constants.CLUSTER_CONF_FILE)
4066
4067     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4068     # replication
4069     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4070
4071   # Files which should only be on VM-capable nodes
4072   files_vm = set(filename
4073     for hv_name in cluster.enabled_hypervisors
4074     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4075
4076   files_opt |= set(filename
4077     for hv_name in cluster.enabled_hypervisors
4078     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4079
4080   # Filenames in each category must be unique
4081   all_files_set = files_all | files_mc | files_vm
4082   assert (len(all_files_set) ==
4083           sum(map(len, [files_all, files_mc, files_vm]))), \
4084          "Found file listed in more than one file list"
4085
4086   # Optional files must be present in one other category
4087   assert all_files_set.issuperset(files_opt), \
4088          "Optional file not in a different required list"
4089
4090   return (files_all, files_opt, files_mc, files_vm)
4091
4092
4093 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4094   """Distribute additional files which are part of the cluster configuration.
4095
4096   ConfigWriter takes care of distributing the config and ssconf files, but
4097   there are more files which should be distributed to all nodes. This function
4098   makes sure those are copied.
4099
4100   @param lu: calling logical unit
4101   @param additional_nodes: list of nodes not in the config to distribute to
4102   @type additional_vm: boolean
4103   @param additional_vm: whether the additional nodes are vm-capable or not
4104
4105   """
4106   # Gather target nodes
4107   cluster = lu.cfg.GetClusterInfo()
4108   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4109
4110   online_nodes = lu.cfg.GetOnlineNodeList()
4111   vm_nodes = lu.cfg.GetVmCapableNodeList()
4112
4113   if additional_nodes is not None:
4114     online_nodes.extend(additional_nodes)
4115     if additional_vm:
4116       vm_nodes.extend(additional_nodes)
4117
4118   # Never distribute to master node
4119   for nodelist in [online_nodes, vm_nodes]:
4120     if master_info.name in nodelist:
4121       nodelist.remove(master_info.name)
4122
4123   # Gather file lists
4124   (files_all, _, files_mc, files_vm) = \
4125     _ComputeAncillaryFiles(cluster, True)
4126
4127   # Never re-distribute configuration file from here
4128   assert not (constants.CLUSTER_CONF_FILE in files_all or
4129               constants.CLUSTER_CONF_FILE in files_vm)
4130   assert not files_mc, "Master candidates not handled in this function"
4131
4132   filemap = [
4133     (online_nodes, files_all),
4134     (vm_nodes, files_vm),
4135     ]
4136
4137   # Upload the files
4138   for (node_list, files) in filemap:
4139     for fname in files:
4140       _UploadHelper(lu, node_list, fname)
4141
4142
4143 class LUClusterRedistConf(NoHooksLU):
4144   """Force the redistribution of cluster configuration.
4145
4146   This is a very simple LU.
4147
4148   """
4149   REQ_BGL = False
4150
4151   def ExpandNames(self):
4152     self.needed_locks = {
4153       locking.LEVEL_NODE: locking.ALL_SET,
4154     }
4155     self.share_locks[locking.LEVEL_NODE] = 1
4156
4157   def Exec(self, feedback_fn):
4158     """Redistribute the configuration.
4159
4160     """
4161     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4162     _RedistributeAncillaryFiles(self)
4163
4164
4165 class LUClusterActivateMasterIp(NoHooksLU):
4166   """Activate the master IP on the master node.
4167
4168   """
4169   def Exec(self, feedback_fn):
4170     """Activate the master IP.
4171
4172     """
4173     master_params = self.cfg.GetMasterNetworkParameters()
4174     ems = self.cfg.GetUseExternalMipScript()
4175     result = self.rpc.call_node_activate_master_ip(master_params.name,
4176                                                    master_params, ems)
4177     result.Raise("Could not activate the master IP")
4178
4179
4180 class LUClusterDeactivateMasterIp(NoHooksLU):
4181   """Deactivate the master IP on the master node.
4182
4183   """
4184   def Exec(self, feedback_fn):
4185     """Deactivate the master IP.
4186
4187     """
4188     master_params = self.cfg.GetMasterNetworkParameters()
4189     ems = self.cfg.GetUseExternalMipScript()
4190     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4191                                                      master_params, ems)
4192     result.Raise("Could not deactivate the master IP")
4193
4194
4195 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4196   """Sleep and poll for an instance's disk to sync.
4197
4198   """
4199   if not instance.disks or disks is not None and not disks:
4200     return True
4201
4202   disks = _ExpandCheckDisks(instance, disks)
4203
4204   if not oneshot:
4205     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4206
4207   node = instance.primary_node
4208
4209   for dev in disks:
4210     lu.cfg.SetDiskID(dev, node)
4211
4212   # TODO: Convert to utils.Retry
4213
4214   retries = 0
4215   degr_retries = 10 # in seconds, as we sleep 1 second each time
4216   while True:
4217     max_time = 0
4218     done = True
4219     cumul_degraded = False
4220     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4221     msg = rstats.fail_msg
4222     if msg:
4223       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4224       retries += 1
4225       if retries >= 10:
4226         raise errors.RemoteError("Can't contact node %s for mirror data,"
4227                                  " aborting." % node)
4228       time.sleep(6)
4229       continue
4230     rstats = rstats.payload
4231     retries = 0
4232     for i, mstat in enumerate(rstats):
4233       if mstat is None:
4234         lu.LogWarning("Can't compute data for node %s/%s",
4235                            node, disks[i].iv_name)
4236         continue
4237
4238       cumul_degraded = (cumul_degraded or
4239                         (mstat.is_degraded and mstat.sync_percent is None))
4240       if mstat.sync_percent is not None:
4241         done = False
4242         if mstat.estimated_time is not None:
4243           rem_time = ("%s remaining (estimated)" %
4244                       utils.FormatSeconds(mstat.estimated_time))
4245           max_time = mstat.estimated_time
4246         else:
4247           rem_time = "no time estimate"
4248         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4249                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4250
4251     # if we're done but degraded, let's do a few small retries, to
4252     # make sure we see a stable and not transient situation; therefore
4253     # we force restart of the loop
4254     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4255       logging.info("Degraded disks found, %d retries left", degr_retries)
4256       degr_retries -= 1
4257       time.sleep(1)
4258       continue
4259
4260     if done or oneshot:
4261       break
4262
4263     time.sleep(min(60, max_time))
4264
4265   if done:
4266     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4267   return not cumul_degraded
4268
4269
4270 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4271   """Check that mirrors are not degraded.
4272
4273   The ldisk parameter, if True, will change the test from the
4274   is_degraded attribute (which represents overall non-ok status for
4275   the device(s)) to the ldisk (representing the local storage status).
4276
4277   """
4278   lu.cfg.SetDiskID(dev, node)
4279
4280   result = True
4281
4282   if on_primary or dev.AssembleOnSecondary():
4283     rstats = lu.rpc.call_blockdev_find(node, dev)
4284     msg = rstats.fail_msg
4285     if msg:
4286       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4287       result = False
4288     elif not rstats.payload:
4289       lu.LogWarning("Can't find disk on node %s", node)
4290       result = False
4291     else:
4292       if ldisk:
4293         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4294       else:
4295         result = result and not rstats.payload.is_degraded
4296
4297   if dev.children:
4298     for child in dev.children:
4299       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4300
4301   return result
4302
4303
4304 class LUOobCommand(NoHooksLU):
4305   """Logical unit for OOB handling.
4306
4307   """
4308   REG_BGL = False
4309   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4310
4311   def ExpandNames(self):
4312     """Gather locks we need.
4313
4314     """
4315     if self.op.node_names:
4316       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4317       lock_names = self.op.node_names
4318     else:
4319       lock_names = locking.ALL_SET
4320
4321     self.needed_locks = {
4322       locking.LEVEL_NODE: lock_names,
4323       }
4324
4325   def CheckPrereq(self):
4326     """Check prerequisites.
4327
4328     This checks:
4329      - the node exists in the configuration
4330      - OOB is supported
4331
4332     Any errors are signaled by raising errors.OpPrereqError.
4333
4334     """
4335     self.nodes = []
4336     self.master_node = self.cfg.GetMasterNode()
4337
4338     assert self.op.power_delay >= 0.0
4339
4340     if self.op.node_names:
4341       if (self.op.command in self._SKIP_MASTER and
4342           self.master_node in self.op.node_names):
4343         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4344         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4345
4346         if master_oob_handler:
4347           additional_text = ("run '%s %s %s' if you want to operate on the"
4348                              " master regardless") % (master_oob_handler,
4349                                                       self.op.command,
4350                                                       self.master_node)
4351         else:
4352           additional_text = "it does not support out-of-band operations"
4353
4354         raise errors.OpPrereqError(("Operating on the master node %s is not"
4355                                     " allowed for %s; %s") %
4356                                    (self.master_node, self.op.command,
4357                                     additional_text), errors.ECODE_INVAL)
4358     else:
4359       self.op.node_names = self.cfg.GetNodeList()
4360       if self.op.command in self._SKIP_MASTER:
4361         self.op.node_names.remove(self.master_node)
4362
4363     if self.op.command in self._SKIP_MASTER:
4364       assert self.master_node not in self.op.node_names
4365
4366     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4367       if node is None:
4368         raise errors.OpPrereqError("Node %s not found" % node_name,
4369                                    errors.ECODE_NOENT)
4370       else:
4371         self.nodes.append(node)
4372
4373       if (not self.op.ignore_status and
4374           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4375         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4376                                     " not marked offline") % node_name,
4377                                    errors.ECODE_STATE)
4378
4379   def Exec(self, feedback_fn):
4380     """Execute OOB and return result if we expect any.
4381
4382     """
4383     master_node = self.master_node
4384     ret = []
4385
4386     for idx, node in enumerate(utils.NiceSort(self.nodes,
4387                                               key=lambda node: node.name)):
4388       node_entry = [(constants.RS_NORMAL, node.name)]
4389       ret.append(node_entry)
4390
4391       oob_program = _SupportsOob(self.cfg, node)
4392
4393       if not oob_program:
4394         node_entry.append((constants.RS_UNAVAIL, None))
4395         continue
4396
4397       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4398                    self.op.command, oob_program, node.name)
4399       result = self.rpc.call_run_oob(master_node, oob_program,
4400                                      self.op.command, node.name,
4401                                      self.op.timeout)
4402
4403       if result.fail_msg:
4404         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4405                         node.name, result.fail_msg)
4406         node_entry.append((constants.RS_NODATA, None))
4407       else:
4408         try:
4409           self._CheckPayload(result)
4410         except errors.OpExecError, err:
4411           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4412                           node.name, err)
4413           node_entry.append((constants.RS_NODATA, None))
4414         else:
4415           if self.op.command == constants.OOB_HEALTH:
4416             # For health we should log important events
4417             for item, status in result.payload:
4418               if status in [constants.OOB_STATUS_WARNING,
4419                             constants.OOB_STATUS_CRITICAL]:
4420                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4421                                 item, node.name, status)
4422
4423           if self.op.command == constants.OOB_POWER_ON:
4424             node.powered = True
4425           elif self.op.command == constants.OOB_POWER_OFF:
4426             node.powered = False
4427           elif self.op.command == constants.OOB_POWER_STATUS:
4428             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4429             if powered != node.powered:
4430               logging.warning(("Recorded power state (%s) of node '%s' does not"
4431                                " match actual power state (%s)"), node.powered,
4432                               node.name, powered)
4433
4434           # For configuration changing commands we should update the node
4435           if self.op.command in (constants.OOB_POWER_ON,
4436                                  constants.OOB_POWER_OFF):
4437             self.cfg.Update(node, feedback_fn)
4438
4439           node_entry.append((constants.RS_NORMAL, result.payload))
4440
4441           if (self.op.command == constants.OOB_POWER_ON and
4442               idx < len(self.nodes) - 1):
4443             time.sleep(self.op.power_delay)
4444
4445     return ret
4446
4447   def _CheckPayload(self, result):
4448     """Checks if the payload is valid.
4449
4450     @param result: RPC result
4451     @raises errors.OpExecError: If payload is not valid
4452
4453     """
4454     errs = []
4455     if self.op.command == constants.OOB_HEALTH:
4456       if not isinstance(result.payload, list):
4457         errs.append("command 'health' is expected to return a list but got %s" %
4458                     type(result.payload))
4459       else:
4460         for item, status in result.payload:
4461           if status not in constants.OOB_STATUSES:
4462             errs.append("health item '%s' has invalid status '%s'" %
4463                         (item, status))
4464
4465     if self.op.command == constants.OOB_POWER_STATUS:
4466       if not isinstance(result.payload, dict):
4467         errs.append("power-status is expected to return a dict but got %s" %
4468                     type(result.payload))
4469
4470     if self.op.command in [
4471         constants.OOB_POWER_ON,
4472         constants.OOB_POWER_OFF,
4473         constants.OOB_POWER_CYCLE,
4474         ]:
4475       if result.payload is not None:
4476         errs.append("%s is expected to not return payload but got '%s'" %
4477                     (self.op.command, result.payload))
4478
4479     if errs:
4480       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4481                                utils.CommaJoin(errs))
4482
4483
4484 class _OsQuery(_QueryBase):
4485   FIELDS = query.OS_FIELDS
4486
4487   def ExpandNames(self, lu):
4488     # Lock all nodes in shared mode
4489     # Temporary removal of locks, should be reverted later
4490     # TODO: reintroduce locks when they are lighter-weight
4491     lu.needed_locks = {}
4492     #self.share_locks[locking.LEVEL_NODE] = 1
4493     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4494
4495     # The following variables interact with _QueryBase._GetNames
4496     if self.names:
4497       self.wanted = self.names
4498     else:
4499       self.wanted = locking.ALL_SET
4500
4501     self.do_locking = self.use_locking
4502
4503   def DeclareLocks(self, lu, level):
4504     pass
4505
4506   @staticmethod
4507   def _DiagnoseByOS(rlist):
4508     """Remaps a per-node return list into an a per-os per-node dictionary
4509
4510     @param rlist: a map with node names as keys and OS objects as values
4511
4512     @rtype: dict
4513     @return: a dictionary with osnames as keys and as value another
4514         map, with nodes as keys and tuples of (path, status, diagnose,
4515         variants, parameters, api_versions) as values, eg::
4516
4517           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4518                                      (/srv/..., False, "invalid api")],
4519                            "node2": [(/srv/..., True, "", [], [])]}
4520           }
4521
4522     """
4523     all_os = {}
4524     # we build here the list of nodes that didn't fail the RPC (at RPC
4525     # level), so that nodes with a non-responding node daemon don't
4526     # make all OSes invalid
4527     good_nodes = [node_name for node_name in rlist
4528                   if not rlist[node_name].fail_msg]
4529     for node_name, nr in rlist.items():
4530       if nr.fail_msg or not nr.payload:
4531         continue
4532       for (name, path, status, diagnose, variants,
4533            params, api_versions) in nr.payload:
4534         if name not in all_os:
4535           # build a list of nodes for this os containing empty lists
4536           # for each node in node_list
4537           all_os[name] = {}
4538           for nname in good_nodes:
4539             all_os[name][nname] = []
4540         # convert params from [name, help] to (name, help)
4541         params = [tuple(v) for v in params]
4542         all_os[name][node_name].append((path, status, diagnose,
4543                                         variants, params, api_versions))
4544     return all_os
4545
4546   def _GetQueryData(self, lu):
4547     """Computes the list of nodes and their attributes.
4548
4549     """
4550     # Locking is not used
4551     assert not (compat.any(lu.glm.is_owned(level)
4552                            for level in locking.LEVELS
4553                            if level != locking.LEVEL_CLUSTER) or
4554                 self.do_locking or self.use_locking)
4555
4556     valid_nodes = [node.name
4557                    for node in lu.cfg.GetAllNodesInfo().values()
4558                    if not node.offline and node.vm_capable]
4559     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4560     cluster = lu.cfg.GetClusterInfo()
4561
4562     data = {}
4563
4564     for (os_name, os_data) in pol.items():
4565       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4566                           hidden=(os_name in cluster.hidden_os),
4567                           blacklisted=(os_name in cluster.blacklisted_os))
4568
4569       variants = set()
4570       parameters = set()
4571       api_versions = set()
4572
4573       for idx, osl in enumerate(os_data.values()):
4574         info.valid = bool(info.valid and osl and osl[0][1])
4575         if not info.valid:
4576           break
4577
4578         (node_variants, node_params, node_api) = osl[0][3:6]
4579         if idx == 0:
4580           # First entry
4581           variants.update(node_variants)
4582           parameters.update(node_params)
4583           api_versions.update(node_api)
4584         else:
4585           # Filter out inconsistent values
4586           variants.intersection_update(node_variants)
4587           parameters.intersection_update(node_params)
4588           api_versions.intersection_update(node_api)
4589
4590       info.variants = list(variants)
4591       info.parameters = list(parameters)
4592       info.api_versions = list(api_versions)
4593
4594       data[os_name] = info
4595
4596     # Prepare data in requested order
4597     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4598             if name in data]
4599
4600
4601 class LUOsDiagnose(NoHooksLU):
4602   """Logical unit for OS diagnose/query.
4603
4604   """
4605   REQ_BGL = False
4606
4607   @staticmethod
4608   def _BuildFilter(fields, names):
4609     """Builds a filter for querying OSes.
4610
4611     """
4612     name_filter = qlang.MakeSimpleFilter("name", names)
4613
4614     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4615     # respective field is not requested
4616     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4617                      for fname in ["hidden", "blacklisted"]
4618                      if fname not in fields]
4619     if "valid" not in fields:
4620       status_filter.append([qlang.OP_TRUE, "valid"])
4621
4622     if status_filter:
4623       status_filter.insert(0, qlang.OP_AND)
4624     else:
4625       status_filter = None
4626
4627     if name_filter and status_filter:
4628       return [qlang.OP_AND, name_filter, status_filter]
4629     elif name_filter:
4630       return name_filter
4631     else:
4632       return status_filter
4633
4634   def CheckArguments(self):
4635     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4636                        self.op.output_fields, False)
4637
4638   def ExpandNames(self):
4639     self.oq.ExpandNames(self)
4640
4641   def Exec(self, feedback_fn):
4642     return self.oq.OldStyleQuery(self)
4643
4644
4645 class LUNodeRemove(LogicalUnit):
4646   """Logical unit for removing a node.
4647
4648   """
4649   HPATH = "node-remove"
4650   HTYPE = constants.HTYPE_NODE
4651
4652   def BuildHooksEnv(self):
4653     """Build hooks env.
4654
4655     This doesn't run on the target node in the pre phase as a failed
4656     node would then be impossible to remove.
4657
4658     """
4659     return {
4660       "OP_TARGET": self.op.node_name,
4661       "NODE_NAME": self.op.node_name,
4662       }
4663
4664   def BuildHooksNodes(self):
4665     """Build hooks nodes.
4666
4667     """
4668     all_nodes = self.cfg.GetNodeList()
4669     try:
4670       all_nodes.remove(self.op.node_name)
4671     except ValueError:
4672       logging.warning("Node '%s', which is about to be removed, was not found"
4673                       " in the list of all nodes", self.op.node_name)
4674     return (all_nodes, all_nodes)
4675
4676   def CheckPrereq(self):
4677     """Check prerequisites.
4678
4679     This checks:
4680      - the node exists in the configuration
4681      - it does not have primary or secondary instances
4682      - it's not the master
4683
4684     Any errors are signaled by raising errors.OpPrereqError.
4685
4686     """
4687     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4688     node = self.cfg.GetNodeInfo(self.op.node_name)
4689     assert node is not None
4690
4691     masternode = self.cfg.GetMasterNode()
4692     if node.name == masternode:
4693       raise errors.OpPrereqError("Node is the master node, failover to another"
4694                                  " node is required", errors.ECODE_INVAL)
4695
4696     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4697       if node.name in instance.all_nodes:
4698         raise errors.OpPrereqError("Instance %s is still running on the node,"
4699                                    " please remove first" % instance_name,
4700                                    errors.ECODE_INVAL)
4701     self.op.node_name = node.name
4702     self.node = node
4703
4704   def Exec(self, feedback_fn):
4705     """Removes the node from the cluster.
4706
4707     """
4708     node = self.node
4709     logging.info("Stopping the node daemon and removing configs from node %s",
4710                  node.name)
4711
4712     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4713
4714     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4715       "Not owning BGL"
4716
4717     # Promote nodes to master candidate as needed
4718     _AdjustCandidatePool(self, exceptions=[node.name])
4719     self.context.RemoveNode(node.name)
4720
4721     # Run post hooks on the node before it's removed
4722     _RunPostHook(self, node.name)
4723
4724     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4725     msg = result.fail_msg
4726     if msg:
4727       self.LogWarning("Errors encountered on the remote node while leaving"
4728                       " the cluster: %s", msg)
4729
4730     # Remove node from our /etc/hosts
4731     if self.cfg.GetClusterInfo().modify_etc_hosts:
4732       master_node = self.cfg.GetMasterNode()
4733       result = self.rpc.call_etc_hosts_modify(master_node,
4734                                               constants.ETC_HOSTS_REMOVE,
4735                                               node.name, None)
4736       result.Raise("Can't update hosts file with new host data")
4737       _RedistributeAncillaryFiles(self)
4738
4739
4740 class _NodeQuery(_QueryBase):
4741   FIELDS = query.NODE_FIELDS
4742
4743   def ExpandNames(self, lu):
4744     lu.needed_locks = {}
4745     lu.share_locks = _ShareAll()
4746
4747     if self.names:
4748       self.wanted = _GetWantedNodes(lu, self.names)
4749     else:
4750       self.wanted = locking.ALL_SET
4751
4752     self.do_locking = (self.use_locking and
4753                        query.NQ_LIVE in self.requested_data)
4754
4755     if self.do_locking:
4756       # If any non-static field is requested we need to lock the nodes
4757       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4758
4759   def DeclareLocks(self, lu, level):
4760     pass
4761
4762   def _GetQueryData(self, lu):
4763     """Computes the list of nodes and their attributes.
4764
4765     """
4766     all_info = lu.cfg.GetAllNodesInfo()
4767
4768     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4769
4770     # Gather data as requested
4771     if query.NQ_LIVE in self.requested_data:
4772       # filter out non-vm_capable nodes
4773       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4774
4775       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4776                                         [lu.cfg.GetHypervisorType()])
4777       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4778                        for (name, nresult) in node_data.items()
4779                        if not nresult.fail_msg and nresult.payload)
4780     else:
4781       live_data = None
4782
4783     if query.NQ_INST in self.requested_data:
4784       node_to_primary = dict([(name, set()) for name in nodenames])
4785       node_to_secondary = dict([(name, set()) for name in nodenames])
4786
4787       inst_data = lu.cfg.GetAllInstancesInfo()
4788
4789       for inst in inst_data.values():
4790         if inst.primary_node in node_to_primary:
4791           node_to_primary[inst.primary_node].add(inst.name)
4792         for secnode in inst.secondary_nodes:
4793           if secnode in node_to_secondary:
4794             node_to_secondary[secnode].add(inst.name)
4795     else:
4796       node_to_primary = None
4797       node_to_secondary = None
4798
4799     if query.NQ_OOB in self.requested_data:
4800       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4801                          for name, node in all_info.iteritems())
4802     else:
4803       oob_support = None
4804
4805     if query.NQ_GROUP in self.requested_data:
4806       groups = lu.cfg.GetAllNodeGroupsInfo()
4807     else:
4808       groups = {}
4809
4810     return query.NodeQueryData([all_info[name] for name in nodenames],
4811                                live_data, lu.cfg.GetMasterNode(),
4812                                node_to_primary, node_to_secondary, groups,
4813                                oob_support, lu.cfg.GetClusterInfo())
4814
4815
4816 class LUNodeQuery(NoHooksLU):
4817   """Logical unit for querying nodes.
4818
4819   """
4820   # pylint: disable=W0142
4821   REQ_BGL = False
4822
4823   def CheckArguments(self):
4824     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4825                          self.op.output_fields, self.op.use_locking)
4826
4827   def ExpandNames(self):
4828     self.nq.ExpandNames(self)
4829
4830   def DeclareLocks(self, level):
4831     self.nq.DeclareLocks(self, level)
4832
4833   def Exec(self, feedback_fn):
4834     return self.nq.OldStyleQuery(self)
4835
4836
4837 class LUNodeQueryvols(NoHooksLU):
4838   """Logical unit for getting volumes on node(s).
4839
4840   """
4841   REQ_BGL = False
4842   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4843   _FIELDS_STATIC = utils.FieldSet("node")
4844
4845   def CheckArguments(self):
4846     _CheckOutputFields(static=self._FIELDS_STATIC,
4847                        dynamic=self._FIELDS_DYNAMIC,
4848                        selected=self.op.output_fields)
4849
4850   def ExpandNames(self):
4851     self.share_locks = _ShareAll()
4852     self.needed_locks = {}
4853
4854     if not self.op.nodes:
4855       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4856     else:
4857       self.needed_locks[locking.LEVEL_NODE] = \
4858         _GetWantedNodes(self, self.op.nodes)
4859
4860   def Exec(self, feedback_fn):
4861     """Computes the list of nodes and their attributes.
4862
4863     """
4864     nodenames = self.owned_locks(locking.LEVEL_NODE)
4865     volumes = self.rpc.call_node_volumes(nodenames)
4866
4867     ilist = self.cfg.GetAllInstancesInfo()
4868     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4869
4870     output = []
4871     for node in nodenames:
4872       nresult = volumes[node]
4873       if nresult.offline:
4874         continue
4875       msg = nresult.fail_msg
4876       if msg:
4877         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4878         continue
4879
4880       node_vols = sorted(nresult.payload,
4881                          key=operator.itemgetter("dev"))
4882
4883       for vol in node_vols:
4884         node_output = []
4885         for field in self.op.output_fields:
4886           if field == "node":
4887             val = node
4888           elif field == "phys":
4889             val = vol["dev"]
4890           elif field == "vg":
4891             val = vol["vg"]
4892           elif field == "name":
4893             val = vol["name"]
4894           elif field == "size":
4895             val = int(float(vol["size"]))
4896           elif field == "instance":
4897             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4898           else:
4899             raise errors.ParameterError(field)
4900           node_output.append(str(val))
4901
4902         output.append(node_output)
4903
4904     return output
4905
4906
4907 class LUNodeQueryStorage(NoHooksLU):
4908   """Logical unit for getting information on storage units on node(s).
4909
4910   """
4911   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4912   REQ_BGL = False
4913
4914   def CheckArguments(self):
4915     _CheckOutputFields(static=self._FIELDS_STATIC,
4916                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4917                        selected=self.op.output_fields)
4918
4919   def ExpandNames(self):
4920     self.share_locks = _ShareAll()
4921     self.needed_locks = {}
4922
4923     if self.op.nodes:
4924       self.needed_locks[locking.LEVEL_NODE] = \
4925         _GetWantedNodes(self, self.op.nodes)
4926     else:
4927       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4928
4929   def Exec(self, feedback_fn):
4930     """Computes the list of nodes and their attributes.
4931
4932     """
4933     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4934
4935     # Always get name to sort by
4936     if constants.SF_NAME in self.op.output_fields:
4937       fields = self.op.output_fields[:]
4938     else:
4939       fields = [constants.SF_NAME] + self.op.output_fields
4940
4941     # Never ask for node or type as it's only known to the LU
4942     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4943       while extra in fields:
4944         fields.remove(extra)
4945
4946     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4947     name_idx = field_idx[constants.SF_NAME]
4948
4949     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4950     data = self.rpc.call_storage_list(self.nodes,
4951                                       self.op.storage_type, st_args,
4952                                       self.op.name, fields)
4953
4954     result = []
4955
4956     for node in utils.NiceSort(self.nodes):
4957       nresult = data[node]
4958       if nresult.offline:
4959         continue
4960
4961       msg = nresult.fail_msg
4962       if msg:
4963         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4964         continue
4965
4966       rows = dict([(row[name_idx], row) for row in nresult.payload])
4967
4968       for name in utils.NiceSort(rows.keys()):
4969         row = rows[name]
4970
4971         out = []
4972
4973         for field in self.op.output_fields:
4974           if field == constants.SF_NODE:
4975             val = node
4976           elif field == constants.SF_TYPE:
4977             val = self.op.storage_type
4978           elif field in field_idx:
4979             val = row[field_idx[field]]
4980           else:
4981             raise errors.ParameterError(field)
4982
4983           out.append(val)
4984
4985         result.append(out)
4986
4987     return result
4988
4989
4990 class _InstanceQuery(_QueryBase):
4991   FIELDS = query.INSTANCE_FIELDS
4992
4993   def ExpandNames(self, lu):
4994     lu.needed_locks = {}
4995     lu.share_locks = _ShareAll()
4996
4997     if self.names:
4998       self.wanted = _GetWantedInstances(lu, self.names)
4999     else:
5000       self.wanted = locking.ALL_SET
5001
5002     self.do_locking = (self.use_locking and
5003                        query.IQ_LIVE in self.requested_data)
5004     if self.do_locking:
5005       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5006       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5007       lu.needed_locks[locking.LEVEL_NODE] = []
5008       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5009
5010     self.do_grouplocks = (self.do_locking and
5011                           query.IQ_NODES in self.requested_data)
5012
5013   def DeclareLocks(self, lu, level):
5014     if self.do_locking:
5015       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5016         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5017
5018         # Lock all groups used by instances optimistically; this requires going
5019         # via the node before it's locked, requiring verification later on
5020         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5021           set(group_uuid
5022               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5023               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5024       elif level == locking.LEVEL_NODE:
5025         lu._LockInstancesNodes() # pylint: disable=W0212
5026
5027   @staticmethod
5028   def _CheckGroupLocks(lu):
5029     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5030     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5031
5032     # Check if node groups for locked instances are still correct
5033     for instance_name in owned_instances:
5034       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5035
5036   def _GetQueryData(self, lu):
5037     """Computes the list of instances and their attributes.
5038
5039     """
5040     if self.do_grouplocks:
5041       self._CheckGroupLocks(lu)
5042
5043     cluster = lu.cfg.GetClusterInfo()
5044     all_info = lu.cfg.GetAllInstancesInfo()
5045
5046     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5047
5048     instance_list = [all_info[name] for name in instance_names]
5049     nodes = frozenset(itertools.chain(*(inst.all_nodes
5050                                         for inst in instance_list)))
5051     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5052     bad_nodes = []
5053     offline_nodes = []
5054     wrongnode_inst = set()
5055
5056     # Gather data as requested
5057     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5058       live_data = {}
5059       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5060       for name in nodes:
5061         result = node_data[name]
5062         if result.offline:
5063           # offline nodes will be in both lists
5064           assert result.fail_msg
5065           offline_nodes.append(name)
5066         if result.fail_msg:
5067           bad_nodes.append(name)
5068         elif result.payload:
5069           for inst in result.payload:
5070             if inst in all_info:
5071               if all_info[inst].primary_node == name:
5072                 live_data.update(result.payload)
5073               else:
5074                 wrongnode_inst.add(inst)
5075             else:
5076               # orphan instance; we don't list it here as we don't
5077               # handle this case yet in the output of instance listing
5078               logging.warning("Orphan instance '%s' found on node %s",
5079                               inst, name)
5080         # else no instance is alive
5081     else:
5082       live_data = {}
5083
5084     if query.IQ_DISKUSAGE in self.requested_data:
5085       disk_usage = dict((inst.name,
5086                          _ComputeDiskSize(inst.disk_template,
5087                                           [{constants.IDISK_SIZE: disk.size}
5088                                            for disk in inst.disks]))
5089                         for inst in instance_list)
5090     else:
5091       disk_usage = None
5092
5093     if query.IQ_CONSOLE in self.requested_data:
5094       consinfo = {}
5095       for inst in instance_list:
5096         if inst.name in live_data:
5097           # Instance is running
5098           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5099         else:
5100           consinfo[inst.name] = None
5101       assert set(consinfo.keys()) == set(instance_names)
5102     else:
5103       consinfo = None
5104
5105     if query.IQ_NODES in self.requested_data:
5106       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5107                                             instance_list)))
5108       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5109       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5110                     for uuid in set(map(operator.attrgetter("group"),
5111                                         nodes.values())))
5112     else:
5113       nodes = None
5114       groups = None
5115
5116     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5117                                    disk_usage, offline_nodes, bad_nodes,
5118                                    live_data, wrongnode_inst, consinfo,
5119                                    nodes, groups)
5120
5121
5122 class LUQuery(NoHooksLU):
5123   """Query for resources/items of a certain kind.
5124
5125   """
5126   # pylint: disable=W0142
5127   REQ_BGL = False
5128
5129   def CheckArguments(self):
5130     qcls = _GetQueryImplementation(self.op.what)
5131
5132     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5133
5134   def ExpandNames(self):
5135     self.impl.ExpandNames(self)
5136
5137   def DeclareLocks(self, level):
5138     self.impl.DeclareLocks(self, level)
5139
5140   def Exec(self, feedback_fn):
5141     return self.impl.NewStyleQuery(self)
5142
5143
5144 class LUQueryFields(NoHooksLU):
5145   """Query for resources/items of a certain kind.
5146
5147   """
5148   # pylint: disable=W0142
5149   REQ_BGL = False
5150
5151   def CheckArguments(self):
5152     self.qcls = _GetQueryImplementation(self.op.what)
5153
5154   def ExpandNames(self):
5155     self.needed_locks = {}
5156
5157   def Exec(self, feedback_fn):
5158     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5159
5160
5161 class LUNodeModifyStorage(NoHooksLU):
5162   """Logical unit for modifying a storage volume on a node.
5163
5164   """
5165   REQ_BGL = False
5166
5167   def CheckArguments(self):
5168     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5169
5170     storage_type = self.op.storage_type
5171
5172     try:
5173       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5174     except KeyError:
5175       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5176                                  " modified" % storage_type,
5177                                  errors.ECODE_INVAL)
5178
5179     diff = set(self.op.changes.keys()) - modifiable
5180     if diff:
5181       raise errors.OpPrereqError("The following fields can not be modified for"
5182                                  " storage units of type '%s': %r" %
5183                                  (storage_type, list(diff)),
5184                                  errors.ECODE_INVAL)
5185
5186   def ExpandNames(self):
5187     self.needed_locks = {
5188       locking.LEVEL_NODE: self.op.node_name,
5189       }
5190
5191   def Exec(self, feedback_fn):
5192     """Computes the list of nodes and their attributes.
5193
5194     """
5195     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5196     result = self.rpc.call_storage_modify(self.op.node_name,
5197                                           self.op.storage_type, st_args,
5198                                           self.op.name, self.op.changes)
5199     result.Raise("Failed to modify storage unit '%s' on %s" %
5200                  (self.op.name, self.op.node_name))
5201
5202
5203 class LUNodeAdd(LogicalUnit):
5204   """Logical unit for adding node to the cluster.
5205
5206   """
5207   HPATH = "node-add"
5208   HTYPE = constants.HTYPE_NODE
5209   _NFLAGS = ["master_capable", "vm_capable"]
5210
5211   def CheckArguments(self):
5212     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5213     # validate/normalize the node name
5214     self.hostname = netutils.GetHostname(name=self.op.node_name,
5215                                          family=self.primary_ip_family)
5216     self.op.node_name = self.hostname.name
5217
5218     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5219       raise errors.OpPrereqError("Cannot readd the master node",
5220                                  errors.ECODE_STATE)
5221
5222     if self.op.readd and self.op.group:
5223       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5224                                  " being readded", errors.ECODE_INVAL)
5225
5226   def BuildHooksEnv(self):
5227     """Build hooks env.
5228
5229     This will run on all nodes before, and on all nodes + the new node after.
5230
5231     """
5232     return {
5233       "OP_TARGET": self.op.node_name,
5234       "NODE_NAME": self.op.node_name,
5235       "NODE_PIP": self.op.primary_ip,
5236       "NODE_SIP": self.op.secondary_ip,
5237       "MASTER_CAPABLE": str(self.op.master_capable),
5238       "VM_CAPABLE": str(self.op.vm_capable),
5239       }
5240
5241   def BuildHooksNodes(self):
5242     """Build hooks nodes.
5243
5244     """
5245     # Exclude added node
5246     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5247     post_nodes = pre_nodes + [self.op.node_name, ]
5248
5249     return (pre_nodes, post_nodes)
5250
5251   def CheckPrereq(self):
5252     """Check prerequisites.
5253
5254     This checks:
5255      - the new node is not already in the config
5256      - it is resolvable
5257      - its parameters (single/dual homed) matches the cluster
5258
5259     Any errors are signaled by raising errors.OpPrereqError.
5260
5261     """
5262     cfg = self.cfg
5263     hostname = self.hostname
5264     node = hostname.name
5265     primary_ip = self.op.primary_ip = hostname.ip
5266     if self.op.secondary_ip is None:
5267       if self.primary_ip_family == netutils.IP6Address.family:
5268         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5269                                    " IPv4 address must be given as secondary",
5270                                    errors.ECODE_INVAL)
5271       self.op.secondary_ip = primary_ip
5272
5273     secondary_ip = self.op.secondary_ip
5274     if not netutils.IP4Address.IsValid(secondary_ip):
5275       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5276                                  " address" % secondary_ip, errors.ECODE_INVAL)
5277
5278     node_list = cfg.GetNodeList()
5279     if not self.op.readd and node in node_list:
5280       raise errors.OpPrereqError("Node %s is already in the configuration" %
5281                                  node, errors.ECODE_EXISTS)
5282     elif self.op.readd and node not in node_list:
5283       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5284                                  errors.ECODE_NOENT)
5285
5286     self.changed_primary_ip = False
5287
5288     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5289       if self.op.readd and node == existing_node_name:
5290         if existing_node.secondary_ip != secondary_ip:
5291           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5292                                      " address configuration as before",
5293                                      errors.ECODE_INVAL)
5294         if existing_node.primary_ip != primary_ip:
5295           self.changed_primary_ip = True
5296
5297         continue
5298
5299       if (existing_node.primary_ip == primary_ip or
5300           existing_node.secondary_ip == primary_ip or
5301           existing_node.primary_ip == secondary_ip or
5302           existing_node.secondary_ip == secondary_ip):
5303         raise errors.OpPrereqError("New node ip address(es) conflict with"
5304                                    " existing node %s" % existing_node.name,
5305                                    errors.ECODE_NOTUNIQUE)
5306
5307     # After this 'if' block, None is no longer a valid value for the
5308     # _capable op attributes
5309     if self.op.readd:
5310       old_node = self.cfg.GetNodeInfo(node)
5311       assert old_node is not None, "Can't retrieve locked node %s" % node
5312       for attr in self._NFLAGS:
5313         if getattr(self.op, attr) is None:
5314           setattr(self.op, attr, getattr(old_node, attr))
5315     else:
5316       for attr in self._NFLAGS:
5317         if getattr(self.op, attr) is None:
5318           setattr(self.op, attr, True)
5319
5320     if self.op.readd and not self.op.vm_capable:
5321       pri, sec = cfg.GetNodeInstances(node)
5322       if pri or sec:
5323         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5324                                    " flag set to false, but it already holds"
5325                                    " instances" % node,
5326                                    errors.ECODE_STATE)
5327
5328     # check that the type of the node (single versus dual homed) is the
5329     # same as for the master
5330     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5331     master_singlehomed = myself.secondary_ip == myself.primary_ip
5332     newbie_singlehomed = secondary_ip == primary_ip
5333     if master_singlehomed != newbie_singlehomed:
5334       if master_singlehomed:
5335         raise errors.OpPrereqError("The master has no secondary ip but the"
5336                                    " new node has one",
5337                                    errors.ECODE_INVAL)
5338       else:
5339         raise errors.OpPrereqError("The master has a secondary ip but the"
5340                                    " new node doesn't have one",
5341                                    errors.ECODE_INVAL)
5342
5343     # checks reachability
5344     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5345       raise errors.OpPrereqError("Node not reachable by ping",
5346                                  errors.ECODE_ENVIRON)
5347
5348     if not newbie_singlehomed:
5349       # check reachability from my secondary ip to newbie's secondary ip
5350       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5351                            source=myself.secondary_ip):
5352         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5353                                    " based ping to node daemon port",
5354                                    errors.ECODE_ENVIRON)
5355
5356     if self.op.readd:
5357       exceptions = [node]
5358     else:
5359       exceptions = []
5360
5361     if self.op.master_capable:
5362       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5363     else:
5364       self.master_candidate = False
5365
5366     if self.op.readd:
5367       self.new_node = old_node
5368     else:
5369       node_group = cfg.LookupNodeGroup(self.op.group)
5370       self.new_node = objects.Node(name=node,
5371                                    primary_ip=primary_ip,
5372                                    secondary_ip=secondary_ip,
5373                                    master_candidate=self.master_candidate,
5374                                    offline=False, drained=False,
5375                                    group=node_group)
5376
5377     if self.op.ndparams:
5378       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5379
5380   def Exec(self, feedback_fn):
5381     """Adds the new node to the cluster.
5382
5383     """
5384     new_node = self.new_node
5385     node = new_node.name
5386
5387     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5388       "Not owning BGL"
5389
5390     # We adding a new node so we assume it's powered
5391     new_node.powered = True
5392
5393     # for re-adds, reset the offline/drained/master-candidate flags;
5394     # we need to reset here, otherwise offline would prevent RPC calls
5395     # later in the procedure; this also means that if the re-add
5396     # fails, we are left with a non-offlined, broken node
5397     if self.op.readd:
5398       new_node.drained = new_node.offline = False # pylint: disable=W0201
5399       self.LogInfo("Readding a node, the offline/drained flags were reset")
5400       # if we demote the node, we do cleanup later in the procedure
5401       new_node.master_candidate = self.master_candidate
5402       if self.changed_primary_ip:
5403         new_node.primary_ip = self.op.primary_ip
5404
5405     # copy the master/vm_capable flags
5406     for attr in self._NFLAGS:
5407       setattr(new_node, attr, getattr(self.op, attr))
5408
5409     # notify the user about any possible mc promotion
5410     if new_node.master_candidate:
5411       self.LogInfo("Node will be a master candidate")
5412
5413     if self.op.ndparams:
5414       new_node.ndparams = self.op.ndparams
5415     else:
5416       new_node.ndparams = {}
5417
5418     # check connectivity
5419     result = self.rpc.call_version([node])[node]
5420     result.Raise("Can't get version information from node %s" % node)
5421     if constants.PROTOCOL_VERSION == result.payload:
5422       logging.info("Communication to node %s fine, sw version %s match",
5423                    node, result.payload)
5424     else:
5425       raise errors.OpExecError("Version mismatch master version %s,"
5426                                " node version %s" %
5427                                (constants.PROTOCOL_VERSION, result.payload))
5428
5429     # Add node to our /etc/hosts, and add key to known_hosts
5430     if self.cfg.GetClusterInfo().modify_etc_hosts:
5431       master_node = self.cfg.GetMasterNode()
5432       result = self.rpc.call_etc_hosts_modify(master_node,
5433                                               constants.ETC_HOSTS_ADD,
5434                                               self.hostname.name,
5435                                               self.hostname.ip)
5436       result.Raise("Can't update hosts file with new host data")
5437
5438     if new_node.secondary_ip != new_node.primary_ip:
5439       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5440                                False)
5441
5442     node_verify_list = [self.cfg.GetMasterNode()]
5443     node_verify_param = {
5444       constants.NV_NODELIST: ([node], {}),
5445       # TODO: do a node-net-test as well?
5446     }
5447
5448     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5449                                        self.cfg.GetClusterName())
5450     for verifier in node_verify_list:
5451       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5452       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5453       if nl_payload:
5454         for failed in nl_payload:
5455           feedback_fn("ssh/hostname verification failed"
5456                       " (checking from %s): %s" %
5457                       (verifier, nl_payload[failed]))
5458         raise errors.OpExecError("ssh/hostname verification failed")
5459
5460     if self.op.readd:
5461       _RedistributeAncillaryFiles(self)
5462       self.context.ReaddNode(new_node)
5463       # make sure we redistribute the config
5464       self.cfg.Update(new_node, feedback_fn)
5465       # and make sure the new node will not have old files around
5466       if not new_node.master_candidate:
5467         result = self.rpc.call_node_demote_from_mc(new_node.name)
5468         msg = result.fail_msg
5469         if msg:
5470           self.LogWarning("Node failed to demote itself from master"
5471                           " candidate status: %s" % msg)
5472     else:
5473       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5474                                   additional_vm=self.op.vm_capable)
5475       self.context.AddNode(new_node, self.proc.GetECId())
5476
5477
5478 class LUNodeSetParams(LogicalUnit):
5479   """Modifies the parameters of a node.
5480
5481   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5482       to the node role (as _ROLE_*)
5483   @cvar _R2F: a dictionary from node role to tuples of flags
5484   @cvar _FLAGS: a list of attribute names corresponding to the flags
5485
5486   """
5487   HPATH = "node-modify"
5488   HTYPE = constants.HTYPE_NODE
5489   REQ_BGL = False
5490   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5491   _F2R = {
5492     (True, False, False): _ROLE_CANDIDATE,
5493     (False, True, False): _ROLE_DRAINED,
5494     (False, False, True): _ROLE_OFFLINE,
5495     (False, False, False): _ROLE_REGULAR,
5496     }
5497   _R2F = dict((v, k) for k, v in _F2R.items())
5498   _FLAGS = ["master_candidate", "drained", "offline"]
5499
5500   def CheckArguments(self):
5501     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5502     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5503                 self.op.master_capable, self.op.vm_capable,
5504                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5505                 self.op.disk_state]
5506     if all_mods.count(None) == len(all_mods):
5507       raise errors.OpPrereqError("Please pass at least one modification",
5508                                  errors.ECODE_INVAL)
5509     if all_mods.count(True) > 1:
5510       raise errors.OpPrereqError("Can't set the node into more than one"
5511                                  " state at the same time",
5512                                  errors.ECODE_INVAL)
5513
5514     # Boolean value that tells us whether we might be demoting from MC
5515     self.might_demote = (self.op.master_candidate == False or
5516                          self.op.offline == True or
5517                          self.op.drained == True or
5518                          self.op.master_capable == False)
5519
5520     if self.op.secondary_ip:
5521       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5522         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5523                                    " address" % self.op.secondary_ip,
5524                                    errors.ECODE_INVAL)
5525
5526     self.lock_all = self.op.auto_promote and self.might_demote
5527     self.lock_instances = self.op.secondary_ip is not None
5528
5529   def _InstanceFilter(self, instance):
5530     """Filter for getting affected instances.
5531
5532     """
5533     return (instance.disk_template in constants.DTS_INT_MIRROR and
5534             self.op.node_name in instance.all_nodes)
5535
5536   def ExpandNames(self):
5537     if self.lock_all:
5538       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5539     else:
5540       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5541
5542     # Since modifying a node can have severe effects on currently running
5543     # operations the resource lock is at least acquired in shared mode
5544     self.needed_locks[locking.LEVEL_NODE_RES] = \
5545       self.needed_locks[locking.LEVEL_NODE]
5546
5547     # Get node resource and instance locks in shared mode; they are not used
5548     # for anything but read-only access
5549     self.share_locks[locking.LEVEL_NODE_RES] = 1
5550     self.share_locks[locking.LEVEL_INSTANCE] = 1
5551
5552     if self.lock_instances:
5553       self.needed_locks[locking.LEVEL_INSTANCE] = \
5554         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5555
5556   def BuildHooksEnv(self):
5557     """Build hooks env.
5558
5559     This runs on the master node.
5560
5561     """
5562     return {
5563       "OP_TARGET": self.op.node_name,
5564       "MASTER_CANDIDATE": str(self.op.master_candidate),
5565       "OFFLINE": str(self.op.offline),
5566       "DRAINED": str(self.op.drained),
5567       "MASTER_CAPABLE": str(self.op.master_capable),
5568       "VM_CAPABLE": str(self.op.vm_capable),
5569       }
5570
5571   def BuildHooksNodes(self):
5572     """Build hooks nodes.
5573
5574     """
5575     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5576     return (nl, nl)
5577
5578   def CheckPrereq(self):
5579     """Check prerequisites.
5580
5581     This only checks the instance list against the existing names.
5582
5583     """
5584     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5585
5586     if self.lock_instances:
5587       affected_instances = \
5588         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5589
5590       # Verify instance locks
5591       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5592       wanted_instances = frozenset(affected_instances.keys())
5593       if wanted_instances - owned_instances:
5594         raise errors.OpPrereqError("Instances affected by changing node %s's"
5595                                    " secondary IP address have changed since"
5596                                    " locks were acquired, wanted '%s', have"
5597                                    " '%s'; retry the operation" %
5598                                    (self.op.node_name,
5599                                     utils.CommaJoin(wanted_instances),
5600                                     utils.CommaJoin(owned_instances)),
5601                                    errors.ECODE_STATE)
5602     else:
5603       affected_instances = None
5604
5605     if (self.op.master_candidate is not None or
5606         self.op.drained is not None or
5607         self.op.offline is not None):
5608       # we can't change the master's node flags
5609       if self.op.node_name == self.cfg.GetMasterNode():
5610         raise errors.OpPrereqError("The master role can be changed"
5611                                    " only via master-failover",
5612                                    errors.ECODE_INVAL)
5613
5614     if self.op.master_candidate and not node.master_capable:
5615       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5616                                  " it a master candidate" % node.name,
5617                                  errors.ECODE_STATE)
5618
5619     if self.op.vm_capable == False:
5620       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5621       if ipri or isec:
5622         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5623                                    " the vm_capable flag" % node.name,
5624                                    errors.ECODE_STATE)
5625
5626     if node.master_candidate and self.might_demote and not self.lock_all:
5627       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5628       # check if after removing the current node, we're missing master
5629       # candidates
5630       (mc_remaining, mc_should, _) = \
5631           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5632       if mc_remaining < mc_should:
5633         raise errors.OpPrereqError("Not enough master candidates, please"
5634                                    " pass auto promote option to allow"
5635                                    " promotion", errors.ECODE_STATE)
5636
5637     self.old_flags = old_flags = (node.master_candidate,
5638                                   node.drained, node.offline)
5639     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5640     self.old_role = old_role = self._F2R[old_flags]
5641
5642     # Check for ineffective changes
5643     for attr in self._FLAGS:
5644       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5645         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5646         setattr(self.op, attr, None)
5647
5648     # Past this point, any flag change to False means a transition
5649     # away from the respective state, as only real changes are kept
5650
5651     # TODO: We might query the real power state if it supports OOB
5652     if _SupportsOob(self.cfg, node):
5653       if self.op.offline is False and not (node.powered or
5654                                            self.op.powered == True):
5655         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5656                                     " offline status can be reset") %
5657                                    self.op.node_name)
5658     elif self.op.powered is not None:
5659       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5660                                   " as it does not support out-of-band"
5661                                   " handling") % self.op.node_name)
5662
5663     # If we're being deofflined/drained, we'll MC ourself if needed
5664     if (self.op.drained == False or self.op.offline == False or
5665         (self.op.master_capable and not node.master_capable)):
5666       if _DecideSelfPromotion(self):
5667         self.op.master_candidate = True
5668         self.LogInfo("Auto-promoting node to master candidate")
5669
5670     # If we're no longer master capable, we'll demote ourselves from MC
5671     if self.op.master_capable == False and node.master_candidate:
5672       self.LogInfo("Demoting from master candidate")
5673       self.op.master_candidate = False
5674
5675     # Compute new role
5676     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5677     if self.op.master_candidate:
5678       new_role = self._ROLE_CANDIDATE
5679     elif self.op.drained:
5680       new_role = self._ROLE_DRAINED
5681     elif self.op.offline:
5682       new_role = self._ROLE_OFFLINE
5683     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5684       # False is still in new flags, which means we're un-setting (the
5685       # only) True flag
5686       new_role = self._ROLE_REGULAR
5687     else: # no new flags, nothing, keep old role
5688       new_role = old_role
5689
5690     self.new_role = new_role
5691
5692     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5693       # Trying to transition out of offline status
5694       # TODO: Use standard RPC runner, but make sure it works when the node is
5695       # still marked offline
5696       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5697       if result.fail_msg:
5698         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5699                                    " to report its version: %s" %
5700                                    (node.name, result.fail_msg),
5701                                    errors.ECODE_STATE)
5702       else:
5703         self.LogWarning("Transitioning node from offline to online state"
5704                         " without using re-add. Please make sure the node"
5705                         " is healthy!")
5706
5707     if self.op.secondary_ip:
5708       # Ok even without locking, because this can't be changed by any LU
5709       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5710       master_singlehomed = master.secondary_ip == master.primary_ip
5711       if master_singlehomed and self.op.secondary_ip:
5712         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5713                                    " homed cluster", errors.ECODE_INVAL)
5714
5715       assert not (frozenset(affected_instances) -
5716                   self.owned_locks(locking.LEVEL_INSTANCE))
5717
5718       if node.offline:
5719         if affected_instances:
5720           raise errors.OpPrereqError("Cannot change secondary IP address:"
5721                                      " offline node has instances (%s)"
5722                                      " configured to use it" %
5723                                      utils.CommaJoin(affected_instances.keys()))
5724       else:
5725         # On online nodes, check that no instances are running, and that
5726         # the node has the new ip and we can reach it.
5727         for instance in affected_instances.values():
5728           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5729                               msg="cannot change secondary ip")
5730
5731         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5732         if master.name != node.name:
5733           # check reachability from master secondary ip to new secondary ip
5734           if not netutils.TcpPing(self.op.secondary_ip,
5735                                   constants.DEFAULT_NODED_PORT,
5736                                   source=master.secondary_ip):
5737             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5738                                        " based ping to node daemon port",
5739                                        errors.ECODE_ENVIRON)
5740
5741     if self.op.ndparams:
5742       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5743       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5744       self.new_ndparams = new_ndparams
5745
5746     if self.op.hv_state:
5747       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5748                                                  self.node.hv_state_static)
5749
5750     if self.op.disk_state:
5751       self.new_disk_state = \
5752         _MergeAndVerifyDiskState(self.op.disk_state,
5753                                  self.node.disk_state_static)
5754
5755   def Exec(self, feedback_fn):
5756     """Modifies a node.
5757
5758     """
5759     node = self.node
5760     old_role = self.old_role
5761     new_role = self.new_role
5762
5763     result = []
5764
5765     if self.op.ndparams:
5766       node.ndparams = self.new_ndparams
5767
5768     if self.op.powered is not None:
5769       node.powered = self.op.powered
5770
5771     if self.op.hv_state:
5772       node.hv_state_static = self.new_hv_state
5773
5774     if self.op.disk_state:
5775       node.disk_state_static = self.new_disk_state
5776
5777     for attr in ["master_capable", "vm_capable"]:
5778       val = getattr(self.op, attr)
5779       if val is not None:
5780         setattr(node, attr, val)
5781         result.append((attr, str(val)))
5782
5783     if new_role != old_role:
5784       # Tell the node to demote itself, if no longer MC and not offline
5785       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5786         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5787         if msg:
5788           self.LogWarning("Node failed to demote itself: %s", msg)
5789
5790       new_flags = self._R2F[new_role]
5791       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5792         if of != nf:
5793           result.append((desc, str(nf)))
5794       (node.master_candidate, node.drained, node.offline) = new_flags
5795
5796       # we locked all nodes, we adjust the CP before updating this node
5797       if self.lock_all:
5798         _AdjustCandidatePool(self, [node.name])
5799
5800     if self.op.secondary_ip:
5801       node.secondary_ip = self.op.secondary_ip
5802       result.append(("secondary_ip", self.op.secondary_ip))
5803
5804     # this will trigger configuration file update, if needed
5805     self.cfg.Update(node, feedback_fn)
5806
5807     # this will trigger job queue propagation or cleanup if the mc
5808     # flag changed
5809     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5810       self.context.ReaddNode(node)
5811
5812     return result
5813
5814
5815 class LUNodePowercycle(NoHooksLU):
5816   """Powercycles a node.
5817
5818   """
5819   REQ_BGL = False
5820
5821   def CheckArguments(self):
5822     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5823     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5824       raise errors.OpPrereqError("The node is the master and the force"
5825                                  " parameter was not set",
5826                                  errors.ECODE_INVAL)
5827
5828   def ExpandNames(self):
5829     """Locking for PowercycleNode.
5830
5831     This is a last-resort option and shouldn't block on other
5832     jobs. Therefore, we grab no locks.
5833
5834     """
5835     self.needed_locks = {}
5836
5837   def Exec(self, feedback_fn):
5838     """Reboots a node.
5839
5840     """
5841     result = self.rpc.call_node_powercycle(self.op.node_name,
5842                                            self.cfg.GetHypervisorType())
5843     result.Raise("Failed to schedule the reboot")
5844     return result.payload
5845
5846
5847 class LUClusterQuery(NoHooksLU):
5848   """Query cluster configuration.
5849
5850   """
5851   REQ_BGL = False
5852
5853   def ExpandNames(self):
5854     self.needed_locks = {}
5855
5856   def Exec(self, feedback_fn):
5857     """Return cluster config.
5858
5859     """
5860     cluster = self.cfg.GetClusterInfo()
5861     os_hvp = {}
5862
5863     # Filter just for enabled hypervisors
5864     for os_name, hv_dict in cluster.os_hvp.items():
5865       os_hvp[os_name] = {}
5866       for hv_name, hv_params in hv_dict.items():
5867         if hv_name in cluster.enabled_hypervisors:
5868           os_hvp[os_name][hv_name] = hv_params
5869
5870     # Convert ip_family to ip_version
5871     primary_ip_version = constants.IP4_VERSION
5872     if cluster.primary_ip_family == netutils.IP6Address.family:
5873       primary_ip_version = constants.IP6_VERSION
5874
5875     result = {
5876       "software_version": constants.RELEASE_VERSION,
5877       "protocol_version": constants.PROTOCOL_VERSION,
5878       "config_version": constants.CONFIG_VERSION,
5879       "os_api_version": max(constants.OS_API_VERSIONS),
5880       "export_version": constants.EXPORT_VERSION,
5881       "architecture": (platform.architecture()[0], platform.machine()),
5882       "name": cluster.cluster_name,
5883       "master": cluster.master_node,
5884       "default_hypervisor": cluster.primary_hypervisor,
5885       "enabled_hypervisors": cluster.enabled_hypervisors,
5886       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5887                         for hypervisor_name in cluster.enabled_hypervisors]),
5888       "os_hvp": os_hvp,
5889       "beparams": cluster.beparams,
5890       "osparams": cluster.osparams,
5891       "ipolicy": cluster.ipolicy,
5892       "nicparams": cluster.nicparams,
5893       "ndparams": cluster.ndparams,
5894       "candidate_pool_size": cluster.candidate_pool_size,
5895       "master_netdev": cluster.master_netdev,
5896       "master_netmask": cluster.master_netmask,
5897       "use_external_mip_script": cluster.use_external_mip_script,
5898       "volume_group_name": cluster.volume_group_name,
5899       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5900       "file_storage_dir": cluster.file_storage_dir,
5901       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5902       "maintain_node_health": cluster.maintain_node_health,
5903       "ctime": cluster.ctime,
5904       "mtime": cluster.mtime,
5905       "uuid": cluster.uuid,
5906       "tags": list(cluster.GetTags()),
5907       "uid_pool": cluster.uid_pool,
5908       "default_iallocator": cluster.default_iallocator,
5909       "reserved_lvs": cluster.reserved_lvs,
5910       "primary_ip_version": primary_ip_version,
5911       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5912       "hidden_os": cluster.hidden_os,
5913       "blacklisted_os": cluster.blacklisted_os,
5914       }
5915
5916     return result
5917
5918
5919 class LUClusterConfigQuery(NoHooksLU):
5920   """Return configuration values.
5921
5922   """
5923   REQ_BGL = False
5924   _FIELDS_DYNAMIC = utils.FieldSet()
5925   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5926                                   "watcher_pause", "volume_group_name")
5927
5928   def CheckArguments(self):
5929     _CheckOutputFields(static=self._FIELDS_STATIC,
5930                        dynamic=self._FIELDS_DYNAMIC,
5931                        selected=self.op.output_fields)
5932
5933   def ExpandNames(self):
5934     self.needed_locks = {}
5935
5936   def Exec(self, feedback_fn):
5937     """Dump a representation of the cluster config to the standard output.
5938
5939     """
5940     values = []
5941     for field in self.op.output_fields:
5942       if field == "cluster_name":
5943         entry = self.cfg.GetClusterName()
5944       elif field == "master_node":
5945         entry = self.cfg.GetMasterNode()
5946       elif field == "drain_flag":
5947         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5948       elif field == "watcher_pause":
5949         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5950       elif field == "volume_group_name":
5951         entry = self.cfg.GetVGName()
5952       else:
5953         raise errors.ParameterError(field)
5954       values.append(entry)
5955     return values
5956
5957
5958 class LUInstanceActivateDisks(NoHooksLU):
5959   """Bring up an instance's disks.
5960
5961   """
5962   REQ_BGL = False
5963
5964   def ExpandNames(self):
5965     self._ExpandAndLockInstance()
5966     self.needed_locks[locking.LEVEL_NODE] = []
5967     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5968
5969   def DeclareLocks(self, level):
5970     if level == locking.LEVEL_NODE:
5971       self._LockInstancesNodes()
5972
5973   def CheckPrereq(self):
5974     """Check prerequisites.
5975
5976     This checks that the instance is in the cluster.
5977
5978     """
5979     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5980     assert self.instance is not None, \
5981       "Cannot retrieve locked instance %s" % self.op.instance_name
5982     _CheckNodeOnline(self, self.instance.primary_node)
5983
5984   def Exec(self, feedback_fn):
5985     """Activate the disks.
5986
5987     """
5988     disks_ok, disks_info = \
5989               _AssembleInstanceDisks(self, self.instance,
5990                                      ignore_size=self.op.ignore_size)
5991     if not disks_ok:
5992       raise errors.OpExecError("Cannot activate block devices")
5993
5994     return disks_info
5995
5996
5997 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5998                            ignore_size=False):
5999   """Prepare the block devices for an instance.
6000
6001   This sets up the block devices on all nodes.
6002
6003   @type lu: L{LogicalUnit}
6004   @param lu: the logical unit on whose behalf we execute
6005   @type instance: L{objects.Instance}
6006   @param instance: the instance for whose disks we assemble
6007   @type disks: list of L{objects.Disk} or None
6008   @param disks: which disks to assemble (or all, if None)
6009   @type ignore_secondaries: boolean
6010   @param ignore_secondaries: if true, errors on secondary nodes
6011       won't result in an error return from the function
6012   @type ignore_size: boolean
6013   @param ignore_size: if true, the current known size of the disk
6014       will not be used during the disk activation, useful for cases
6015       when the size is wrong
6016   @return: False if the operation failed, otherwise a list of
6017       (host, instance_visible_name, node_visible_name)
6018       with the mapping from node devices to instance devices
6019
6020   """
6021   device_info = []
6022   disks_ok = True
6023   iname = instance.name
6024   disks = _ExpandCheckDisks(instance, disks)
6025
6026   # With the two passes mechanism we try to reduce the window of
6027   # opportunity for the race condition of switching DRBD to primary
6028   # before handshaking occured, but we do not eliminate it
6029
6030   # The proper fix would be to wait (with some limits) until the
6031   # connection has been made and drbd transitions from WFConnection
6032   # into any other network-connected state (Connected, SyncTarget,
6033   # SyncSource, etc.)
6034
6035   # 1st pass, assemble on all nodes in secondary mode
6036   for idx, inst_disk in enumerate(disks):
6037     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6038       if ignore_size:
6039         node_disk = node_disk.Copy()
6040         node_disk.UnsetSize()
6041       lu.cfg.SetDiskID(node_disk, node)
6042       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6043       msg = result.fail_msg
6044       if msg:
6045         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6046                            " (is_primary=False, pass=1): %s",
6047                            inst_disk.iv_name, node, msg)
6048         if not ignore_secondaries:
6049           disks_ok = False
6050
6051   # FIXME: race condition on drbd migration to primary
6052
6053   # 2nd pass, do only the primary node
6054   for idx, inst_disk in enumerate(disks):
6055     dev_path = None
6056
6057     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6058       if node != instance.primary_node:
6059         continue
6060       if ignore_size:
6061         node_disk = node_disk.Copy()
6062         node_disk.UnsetSize()
6063       lu.cfg.SetDiskID(node_disk, node)
6064       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6065       msg = result.fail_msg
6066       if msg:
6067         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6068                            " (is_primary=True, pass=2): %s",
6069                            inst_disk.iv_name, node, msg)
6070         disks_ok = False
6071       else:
6072         dev_path = result.payload
6073
6074     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6075
6076   # leave the disks configured for the primary node
6077   # this is a workaround that would be fixed better by
6078   # improving the logical/physical id handling
6079   for disk in disks:
6080     lu.cfg.SetDiskID(disk, instance.primary_node)
6081
6082   return disks_ok, device_info
6083
6084
6085 def _StartInstanceDisks(lu, instance, force):
6086   """Start the disks of an instance.
6087
6088   """
6089   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6090                                            ignore_secondaries=force)
6091   if not disks_ok:
6092     _ShutdownInstanceDisks(lu, instance)
6093     if force is not None and not force:
6094       lu.proc.LogWarning("", hint="If the message above refers to a"
6095                          " secondary node,"
6096                          " you can retry the operation using '--force'.")
6097     raise errors.OpExecError("Disk consistency error")
6098
6099
6100 class LUInstanceDeactivateDisks(NoHooksLU):
6101   """Shutdown an instance's disks.
6102
6103   """
6104   REQ_BGL = False
6105
6106   def ExpandNames(self):
6107     self._ExpandAndLockInstance()
6108     self.needed_locks[locking.LEVEL_NODE] = []
6109     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6110
6111   def DeclareLocks(self, level):
6112     if level == locking.LEVEL_NODE:
6113       self._LockInstancesNodes()
6114
6115   def CheckPrereq(self):
6116     """Check prerequisites.
6117
6118     This checks that the instance is in the cluster.
6119
6120     """
6121     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6122     assert self.instance is not None, \
6123       "Cannot retrieve locked instance %s" % self.op.instance_name
6124
6125   def Exec(self, feedback_fn):
6126     """Deactivate the disks
6127
6128     """
6129     instance = self.instance
6130     if self.op.force:
6131       _ShutdownInstanceDisks(self, instance)
6132     else:
6133       _SafeShutdownInstanceDisks(self, instance)
6134
6135
6136 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6137   """Shutdown block devices of an instance.
6138
6139   This function checks if an instance is running, before calling
6140   _ShutdownInstanceDisks.
6141
6142   """
6143   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6144   _ShutdownInstanceDisks(lu, instance, disks=disks)
6145
6146
6147 def _ExpandCheckDisks(instance, disks):
6148   """Return the instance disks selected by the disks list
6149
6150   @type disks: list of L{objects.Disk} or None
6151   @param disks: selected disks
6152   @rtype: list of L{objects.Disk}
6153   @return: selected instance disks to act on
6154
6155   """
6156   if disks is None:
6157     return instance.disks
6158   else:
6159     if not set(disks).issubset(instance.disks):
6160       raise errors.ProgrammerError("Can only act on disks belonging to the"
6161                                    " target instance")
6162     return disks
6163
6164
6165 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6166   """Shutdown block devices of an instance.
6167
6168   This does the shutdown on all nodes of the instance.
6169
6170   If the ignore_primary is false, errors on the primary node are
6171   ignored.
6172
6173   """
6174   all_result = True
6175   disks = _ExpandCheckDisks(instance, disks)
6176
6177   for disk in disks:
6178     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6179       lu.cfg.SetDiskID(top_disk, node)
6180       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6181       msg = result.fail_msg
6182       if msg:
6183         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6184                       disk.iv_name, node, msg)
6185         if ((node == instance.primary_node and not ignore_primary) or
6186             (node != instance.primary_node and not result.offline)):
6187           all_result = False
6188   return all_result
6189
6190
6191 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6192   """Checks if a node has enough free memory.
6193
6194   This function check if a given node has the needed amount of free
6195   memory. In case the node has less memory or we cannot get the
6196   information from the node, this function raise an OpPrereqError
6197   exception.
6198
6199   @type lu: C{LogicalUnit}
6200   @param lu: a logical unit from which we get configuration data
6201   @type node: C{str}
6202   @param node: the node to check
6203   @type reason: C{str}
6204   @param reason: string to use in the error message
6205   @type requested: C{int}
6206   @param requested: the amount of memory in MiB to check for
6207   @type hypervisor_name: C{str}
6208   @param hypervisor_name: the hypervisor to ask for memory stats
6209   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6210       we cannot check the node
6211
6212   """
6213   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6214   nodeinfo[node].Raise("Can't get data from node %s" % node,
6215                        prereq=True, ecode=errors.ECODE_ENVIRON)
6216   (_, _, (hv_info, )) = nodeinfo[node].payload
6217
6218   free_mem = hv_info.get("memory_free", None)
6219   if not isinstance(free_mem, int):
6220     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6221                                " was '%s'" % (node, free_mem),
6222                                errors.ECODE_ENVIRON)
6223   if requested > free_mem:
6224     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6225                                " needed %s MiB, available %s MiB" %
6226                                (node, reason, requested, free_mem),
6227                                errors.ECODE_NORES)
6228
6229
6230 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6231   """Checks if nodes have enough free disk space in the all VGs.
6232
6233   This function check if all given nodes have the needed amount of
6234   free disk. In case any node has less disk or we cannot get the
6235   information from the node, this function raise an OpPrereqError
6236   exception.
6237
6238   @type lu: C{LogicalUnit}
6239   @param lu: a logical unit from which we get configuration data
6240   @type nodenames: C{list}
6241   @param nodenames: the list of node names to check
6242   @type req_sizes: C{dict}
6243   @param req_sizes: the hash of vg and corresponding amount of disk in
6244       MiB to check for
6245   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6246       or we cannot check the node
6247
6248   """
6249   for vg, req_size in req_sizes.items():
6250     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6251
6252
6253 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6254   """Checks if nodes have enough free disk space in the specified VG.
6255
6256   This function check if all given nodes have the needed amount of
6257   free disk. In case any node has less disk or we cannot get the
6258   information from the node, this function raise an OpPrereqError
6259   exception.
6260
6261   @type lu: C{LogicalUnit}
6262   @param lu: a logical unit from which we get configuration data
6263   @type nodenames: C{list}
6264   @param nodenames: the list of node names to check
6265   @type vg: C{str}
6266   @param vg: the volume group to check
6267   @type requested: C{int}
6268   @param requested: the amount of disk in MiB to check for
6269   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6270       or we cannot check the node
6271
6272   """
6273   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6274   for node in nodenames:
6275     info = nodeinfo[node]
6276     info.Raise("Cannot get current information from node %s" % node,
6277                prereq=True, ecode=errors.ECODE_ENVIRON)
6278     (_, (vg_info, ), _) = info.payload
6279     vg_free = vg_info.get("vg_free", None)
6280     if not isinstance(vg_free, int):
6281       raise errors.OpPrereqError("Can't compute free disk space on node"
6282                                  " %s for vg %s, result was '%s'" %
6283                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6284     if requested > vg_free:
6285       raise errors.OpPrereqError("Not enough disk space on target node %s"
6286                                  " vg %s: required %d MiB, available %d MiB" %
6287                                  (node, vg, requested, vg_free),
6288                                  errors.ECODE_NORES)
6289
6290
6291 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6292   """Checks if nodes have enough physical CPUs
6293
6294   This function checks if all given nodes have the needed number of
6295   physical CPUs. In case any node has less CPUs or we cannot get the
6296   information from the node, this function raises an OpPrereqError
6297   exception.
6298
6299   @type lu: C{LogicalUnit}
6300   @param lu: a logical unit from which we get configuration data
6301   @type nodenames: C{list}
6302   @param nodenames: the list of node names to check
6303   @type requested: C{int}
6304   @param requested: the minimum acceptable number of physical CPUs
6305   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6306       or we cannot check the node
6307
6308   """
6309   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6310   for node in nodenames:
6311     info = nodeinfo[node]
6312     info.Raise("Cannot get current information from node %s" % node,
6313                prereq=True, ecode=errors.ECODE_ENVIRON)
6314     (_, _, (hv_info, )) = info.payload
6315     num_cpus = hv_info.get("cpu_total", None)
6316     if not isinstance(num_cpus, int):
6317       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6318                                  " on node %s, result was '%s'" %
6319                                  (node, num_cpus), errors.ECODE_ENVIRON)
6320     if requested > num_cpus:
6321       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6322                                  "required" % (node, num_cpus, requested),
6323                                  errors.ECODE_NORES)
6324
6325
6326 class LUInstanceStartup(LogicalUnit):
6327   """Starts an instance.
6328
6329   """
6330   HPATH = "instance-start"
6331   HTYPE = constants.HTYPE_INSTANCE
6332   REQ_BGL = False
6333
6334   def CheckArguments(self):
6335     # extra beparams
6336     if self.op.beparams:
6337       # fill the beparams dict
6338       objects.UpgradeBeParams(self.op.beparams)
6339       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6340
6341   def ExpandNames(self):
6342     self._ExpandAndLockInstance()
6343
6344   def BuildHooksEnv(self):
6345     """Build hooks env.
6346
6347     This runs on master, primary and secondary nodes of the instance.
6348
6349     """
6350     env = {
6351       "FORCE": self.op.force,
6352       }
6353
6354     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6355
6356     return env
6357
6358   def BuildHooksNodes(self):
6359     """Build hooks nodes.
6360
6361     """
6362     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6363     return (nl, nl)
6364
6365   def CheckPrereq(self):
6366     """Check prerequisites.
6367
6368     This checks that the instance is in the cluster.
6369
6370     """
6371     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6372     assert self.instance is not None, \
6373       "Cannot retrieve locked instance %s" % self.op.instance_name
6374
6375     # extra hvparams
6376     if self.op.hvparams:
6377       # check hypervisor parameter syntax (locally)
6378       cluster = self.cfg.GetClusterInfo()
6379       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6380       filled_hvp = cluster.FillHV(instance)
6381       filled_hvp.update(self.op.hvparams)
6382       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6383       hv_type.CheckParameterSyntax(filled_hvp)
6384       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6385
6386     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6387
6388     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6389
6390     if self.primary_offline and self.op.ignore_offline_nodes:
6391       self.proc.LogWarning("Ignoring offline primary node")
6392
6393       if self.op.hvparams or self.op.beparams:
6394         self.proc.LogWarning("Overridden parameters are ignored")
6395     else:
6396       _CheckNodeOnline(self, instance.primary_node)
6397
6398       bep = self.cfg.GetClusterInfo().FillBE(instance)
6399
6400       # check bridges existence
6401       _CheckInstanceBridgesExist(self, instance)
6402
6403       remote_info = self.rpc.call_instance_info(instance.primary_node,
6404                                                 instance.name,
6405                                                 instance.hypervisor)
6406       remote_info.Raise("Error checking node %s" % instance.primary_node,
6407                         prereq=True, ecode=errors.ECODE_ENVIRON)
6408       if not remote_info.payload: # not running already
6409         _CheckNodeFreeMemory(self, instance.primary_node,
6410                              "starting instance %s" % instance.name,
6411                              bep[constants.BE_MAXMEM], instance.hypervisor)
6412
6413   def Exec(self, feedback_fn):
6414     """Start the instance.
6415
6416     """
6417     instance = self.instance
6418     force = self.op.force
6419
6420     if not self.op.no_remember:
6421       self.cfg.MarkInstanceUp(instance.name)
6422
6423     if self.primary_offline:
6424       assert self.op.ignore_offline_nodes
6425       self.proc.LogInfo("Primary node offline, marked instance as started")
6426     else:
6427       node_current = instance.primary_node
6428
6429       _StartInstanceDisks(self, instance, force)
6430
6431       result = \
6432         self.rpc.call_instance_start(node_current,
6433                                      (instance, self.op.hvparams,
6434                                       self.op.beparams),
6435                                      self.op.startup_paused)
6436       msg = result.fail_msg
6437       if msg:
6438         _ShutdownInstanceDisks(self, instance)
6439         raise errors.OpExecError("Could not start instance: %s" % msg)
6440
6441
6442 class LUInstanceReboot(LogicalUnit):
6443   """Reboot an instance.
6444
6445   """
6446   HPATH = "instance-reboot"
6447   HTYPE = constants.HTYPE_INSTANCE
6448   REQ_BGL = False
6449
6450   def ExpandNames(self):
6451     self._ExpandAndLockInstance()
6452
6453   def BuildHooksEnv(self):
6454     """Build hooks env.
6455
6456     This runs on master, primary and secondary nodes of the instance.
6457
6458     """
6459     env = {
6460       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6461       "REBOOT_TYPE": self.op.reboot_type,
6462       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6463       }
6464
6465     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6466
6467     return env
6468
6469   def BuildHooksNodes(self):
6470     """Build hooks nodes.
6471
6472     """
6473     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6474     return (nl, nl)
6475
6476   def CheckPrereq(self):
6477     """Check prerequisites.
6478
6479     This checks that the instance is in the cluster.
6480
6481     """
6482     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6483     assert self.instance is not None, \
6484       "Cannot retrieve locked instance %s" % self.op.instance_name
6485     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6486     _CheckNodeOnline(self, instance.primary_node)
6487
6488     # check bridges existence
6489     _CheckInstanceBridgesExist(self, instance)
6490
6491   def Exec(self, feedback_fn):
6492     """Reboot the instance.
6493
6494     """
6495     instance = self.instance
6496     ignore_secondaries = self.op.ignore_secondaries
6497     reboot_type = self.op.reboot_type
6498
6499     remote_info = self.rpc.call_instance_info(instance.primary_node,
6500                                               instance.name,
6501                                               instance.hypervisor)
6502     remote_info.Raise("Error checking node %s" % instance.primary_node)
6503     instance_running = bool(remote_info.payload)
6504
6505     node_current = instance.primary_node
6506
6507     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6508                                             constants.INSTANCE_REBOOT_HARD]:
6509       for disk in instance.disks:
6510         self.cfg.SetDiskID(disk, node_current)
6511       result = self.rpc.call_instance_reboot(node_current, instance,
6512                                              reboot_type,
6513                                              self.op.shutdown_timeout)
6514       result.Raise("Could not reboot instance")
6515     else:
6516       if instance_running:
6517         result = self.rpc.call_instance_shutdown(node_current, instance,
6518                                                  self.op.shutdown_timeout)
6519         result.Raise("Could not shutdown instance for full reboot")
6520         _ShutdownInstanceDisks(self, instance)
6521       else:
6522         self.LogInfo("Instance %s was already stopped, starting now",
6523                      instance.name)
6524       _StartInstanceDisks(self, instance, ignore_secondaries)
6525       result = self.rpc.call_instance_start(node_current,
6526                                             (instance, None, None), False)
6527       msg = result.fail_msg
6528       if msg:
6529         _ShutdownInstanceDisks(self, instance)
6530         raise errors.OpExecError("Could not start instance for"
6531                                  " full reboot: %s" % msg)
6532
6533     self.cfg.MarkInstanceUp(instance.name)
6534
6535
6536 class LUInstanceShutdown(LogicalUnit):
6537   """Shutdown an instance.
6538
6539   """
6540   HPATH = "instance-stop"
6541   HTYPE = constants.HTYPE_INSTANCE
6542   REQ_BGL = False
6543
6544   def ExpandNames(self):
6545     self._ExpandAndLockInstance()
6546
6547   def BuildHooksEnv(self):
6548     """Build hooks env.
6549
6550     This runs on master, primary and secondary nodes of the instance.
6551
6552     """
6553     env = _BuildInstanceHookEnvByObject(self, self.instance)
6554     env["TIMEOUT"] = self.op.timeout
6555     return env
6556
6557   def BuildHooksNodes(self):
6558     """Build hooks nodes.
6559
6560     """
6561     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6562     return (nl, nl)
6563
6564   def CheckPrereq(self):
6565     """Check prerequisites.
6566
6567     This checks that the instance is in the cluster.
6568
6569     """
6570     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6571     assert self.instance is not None, \
6572       "Cannot retrieve locked instance %s" % self.op.instance_name
6573
6574     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6575
6576     self.primary_offline = \
6577       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6578
6579     if self.primary_offline and self.op.ignore_offline_nodes:
6580       self.proc.LogWarning("Ignoring offline primary node")
6581     else:
6582       _CheckNodeOnline(self, self.instance.primary_node)
6583
6584   def Exec(self, feedback_fn):
6585     """Shutdown the instance.
6586
6587     """
6588     instance = self.instance
6589     node_current = instance.primary_node
6590     timeout = self.op.timeout
6591
6592     if not self.op.no_remember:
6593       self.cfg.MarkInstanceDown(instance.name)
6594
6595     if self.primary_offline:
6596       assert self.op.ignore_offline_nodes
6597       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6598     else:
6599       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6600       msg = result.fail_msg
6601       if msg:
6602         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6603
6604       _ShutdownInstanceDisks(self, instance)
6605
6606
6607 class LUInstanceReinstall(LogicalUnit):
6608   """Reinstall an instance.
6609
6610   """
6611   HPATH = "instance-reinstall"
6612   HTYPE = constants.HTYPE_INSTANCE
6613   REQ_BGL = False
6614
6615   def ExpandNames(self):
6616     self._ExpandAndLockInstance()
6617
6618   def BuildHooksEnv(self):
6619     """Build hooks env.
6620
6621     This runs on master, primary and secondary nodes of the instance.
6622
6623     """
6624     return _BuildInstanceHookEnvByObject(self, self.instance)
6625
6626   def BuildHooksNodes(self):
6627     """Build hooks nodes.
6628
6629     """
6630     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6631     return (nl, nl)
6632
6633   def CheckPrereq(self):
6634     """Check prerequisites.
6635
6636     This checks that the instance is in the cluster and is not running.
6637
6638     """
6639     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6640     assert instance is not None, \
6641       "Cannot retrieve locked instance %s" % self.op.instance_name
6642     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6643                      " offline, cannot reinstall")
6644     for node in instance.secondary_nodes:
6645       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6646                        " cannot reinstall")
6647
6648     if instance.disk_template == constants.DT_DISKLESS:
6649       raise errors.OpPrereqError("Instance '%s' has no disks" %
6650                                  self.op.instance_name,
6651                                  errors.ECODE_INVAL)
6652     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6653
6654     if self.op.os_type is not None:
6655       # OS verification
6656       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6657       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6658       instance_os = self.op.os_type
6659     else:
6660       instance_os = instance.os
6661
6662     nodelist = list(instance.all_nodes)
6663
6664     if self.op.osparams:
6665       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6666       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6667       self.os_inst = i_osdict # the new dict (without defaults)
6668     else:
6669       self.os_inst = None
6670
6671     self.instance = instance
6672
6673   def Exec(self, feedback_fn):
6674     """Reinstall the instance.
6675
6676     """
6677     inst = self.instance
6678
6679     if self.op.os_type is not None:
6680       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6681       inst.os = self.op.os_type
6682       # Write to configuration
6683       self.cfg.Update(inst, feedback_fn)
6684
6685     _StartInstanceDisks(self, inst, None)
6686     try:
6687       feedback_fn("Running the instance OS create scripts...")
6688       # FIXME: pass debug option from opcode to backend
6689       result = self.rpc.call_instance_os_add(inst.primary_node,
6690                                              (inst, self.os_inst), True,
6691                                              self.op.debug_level)
6692       result.Raise("Could not install OS for instance %s on node %s" %
6693                    (inst.name, inst.primary_node))
6694     finally:
6695       _ShutdownInstanceDisks(self, inst)
6696
6697
6698 class LUInstanceRecreateDisks(LogicalUnit):
6699   """Recreate an instance's missing disks.
6700
6701   """
6702   HPATH = "instance-recreate-disks"
6703   HTYPE = constants.HTYPE_INSTANCE
6704   REQ_BGL = False
6705
6706   def CheckArguments(self):
6707     # normalise the disk list
6708     self.op.disks = sorted(frozenset(self.op.disks))
6709
6710   def ExpandNames(self):
6711     self._ExpandAndLockInstance()
6712     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6713     if self.op.nodes:
6714       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6715       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6716     else:
6717       self.needed_locks[locking.LEVEL_NODE] = []
6718
6719   def DeclareLocks(self, level):
6720     if level == locking.LEVEL_NODE:
6721       # if we replace the nodes, we only need to lock the old primary,
6722       # otherwise we need to lock all nodes for disk re-creation
6723       primary_only = bool(self.op.nodes)
6724       self._LockInstancesNodes(primary_only=primary_only)
6725     elif level == locking.LEVEL_NODE_RES:
6726       # Copy node locks
6727       self.needed_locks[locking.LEVEL_NODE_RES] = \
6728         self.needed_locks[locking.LEVEL_NODE][:]
6729
6730   def BuildHooksEnv(self):
6731     """Build hooks env.
6732
6733     This runs on master, primary and secondary nodes of the instance.
6734
6735     """
6736     return _BuildInstanceHookEnvByObject(self, self.instance)
6737
6738   def BuildHooksNodes(self):
6739     """Build hooks nodes.
6740
6741     """
6742     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6743     return (nl, nl)
6744
6745   def CheckPrereq(self):
6746     """Check prerequisites.
6747
6748     This checks that the instance is in the cluster and is not running.
6749
6750     """
6751     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6752     assert instance is not None, \
6753       "Cannot retrieve locked instance %s" % self.op.instance_name
6754     if self.op.nodes:
6755       if len(self.op.nodes) != len(instance.all_nodes):
6756         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6757                                    " %d replacement nodes were specified" %
6758                                    (instance.name, len(instance.all_nodes),
6759                                     len(self.op.nodes)),
6760                                    errors.ECODE_INVAL)
6761       assert instance.disk_template != constants.DT_DRBD8 or \
6762           len(self.op.nodes) == 2
6763       assert instance.disk_template != constants.DT_PLAIN or \
6764           len(self.op.nodes) == 1
6765       primary_node = self.op.nodes[0]
6766     else:
6767       primary_node = instance.primary_node
6768     _CheckNodeOnline(self, primary_node)
6769
6770     if instance.disk_template == constants.DT_DISKLESS:
6771       raise errors.OpPrereqError("Instance '%s' has no disks" %
6772                                  self.op.instance_name, errors.ECODE_INVAL)
6773     # if we replace nodes *and* the old primary is offline, we don't
6774     # check
6775     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6776     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6777     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6778     if not (self.op.nodes and old_pnode.offline):
6779       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6780                           msg="cannot recreate disks")
6781
6782     if not self.op.disks:
6783       self.op.disks = range(len(instance.disks))
6784     else:
6785       for idx in self.op.disks:
6786         if idx >= len(instance.disks):
6787           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6788                                      errors.ECODE_INVAL)
6789     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6790       raise errors.OpPrereqError("Can't recreate disks partially and"
6791                                  " change the nodes at the same time",
6792                                  errors.ECODE_INVAL)
6793     self.instance = instance
6794
6795   def Exec(self, feedback_fn):
6796     """Recreate the disks.
6797
6798     """
6799     instance = self.instance
6800
6801     assert (self.owned_locks(locking.LEVEL_NODE) ==
6802             self.owned_locks(locking.LEVEL_NODE_RES))
6803
6804     to_skip = []
6805     mods = [] # keeps track of needed logical_id changes
6806
6807     for idx, disk in enumerate(instance.disks):
6808       if idx not in self.op.disks: # disk idx has not been passed in
6809         to_skip.append(idx)
6810         continue
6811       # update secondaries for disks, if needed
6812       if self.op.nodes:
6813         if disk.dev_type == constants.LD_DRBD8:
6814           # need to update the nodes and minors
6815           assert len(self.op.nodes) == 2
6816           assert len(disk.logical_id) == 6 # otherwise disk internals
6817                                            # have changed
6818           (_, _, old_port, _, _, old_secret) = disk.logical_id
6819           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6820           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6821                     new_minors[0], new_minors[1], old_secret)
6822           assert len(disk.logical_id) == len(new_id)
6823           mods.append((idx, new_id))
6824
6825     # now that we have passed all asserts above, we can apply the mods
6826     # in a single run (to avoid partial changes)
6827     for idx, new_id in mods:
6828       instance.disks[idx].logical_id = new_id
6829
6830     # change primary node, if needed
6831     if self.op.nodes:
6832       instance.primary_node = self.op.nodes[0]
6833       self.LogWarning("Changing the instance's nodes, you will have to"
6834                       " remove any disks left on the older nodes manually")
6835
6836     if self.op.nodes:
6837       self.cfg.Update(instance, feedback_fn)
6838
6839     _CreateDisks(self, instance, to_skip=to_skip)
6840
6841
6842 class LUInstanceRename(LogicalUnit):
6843   """Rename an instance.
6844
6845   """
6846   HPATH = "instance-rename"
6847   HTYPE = constants.HTYPE_INSTANCE
6848
6849   def CheckArguments(self):
6850     """Check arguments.
6851
6852     """
6853     if self.op.ip_check and not self.op.name_check:
6854       # TODO: make the ip check more flexible and not depend on the name check
6855       raise errors.OpPrereqError("IP address check requires a name check",
6856                                  errors.ECODE_INVAL)
6857
6858   def BuildHooksEnv(self):
6859     """Build hooks env.
6860
6861     This runs on master, primary and secondary nodes of the instance.
6862
6863     """
6864     env = _BuildInstanceHookEnvByObject(self, self.instance)
6865     env["INSTANCE_NEW_NAME"] = self.op.new_name
6866     return env
6867
6868   def BuildHooksNodes(self):
6869     """Build hooks nodes.
6870
6871     """
6872     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6873     return (nl, nl)
6874
6875   def CheckPrereq(self):
6876     """Check prerequisites.
6877
6878     This checks that the instance is in the cluster and is not running.
6879
6880     """
6881     self.op.instance_name = _ExpandInstanceName(self.cfg,
6882                                                 self.op.instance_name)
6883     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6884     assert instance is not None
6885     _CheckNodeOnline(self, instance.primary_node)
6886     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6887                         msg="cannot rename")
6888     self.instance = instance
6889
6890     new_name = self.op.new_name
6891     if self.op.name_check:
6892       hostname = netutils.GetHostname(name=new_name)
6893       if hostname.name != new_name:
6894         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6895                      hostname.name)
6896       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6897         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6898                                     " same as given hostname '%s'") %
6899                                     (hostname.name, self.op.new_name),
6900                                     errors.ECODE_INVAL)
6901       new_name = self.op.new_name = hostname.name
6902       if (self.op.ip_check and
6903           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6904         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6905                                    (hostname.ip, new_name),
6906                                    errors.ECODE_NOTUNIQUE)
6907
6908     instance_list = self.cfg.GetInstanceList()
6909     if new_name in instance_list and new_name != instance.name:
6910       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6911                                  new_name, errors.ECODE_EXISTS)
6912
6913   def Exec(self, feedback_fn):
6914     """Rename the instance.
6915
6916     """
6917     inst = self.instance
6918     old_name = inst.name
6919
6920     rename_file_storage = False
6921     if (inst.disk_template in constants.DTS_FILEBASED and
6922         self.op.new_name != inst.name):
6923       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6924       rename_file_storage = True
6925
6926     self.cfg.RenameInstance(inst.name, self.op.new_name)
6927     # Change the instance lock. This is definitely safe while we hold the BGL.
6928     # Otherwise the new lock would have to be added in acquired mode.
6929     assert self.REQ_BGL
6930     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6931     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6932
6933     # re-read the instance from the configuration after rename
6934     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6935
6936     if rename_file_storage:
6937       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6938       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6939                                                      old_file_storage_dir,
6940                                                      new_file_storage_dir)
6941       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6942                    " (but the instance has been renamed in Ganeti)" %
6943                    (inst.primary_node, old_file_storage_dir,
6944                     new_file_storage_dir))
6945
6946     _StartInstanceDisks(self, inst, None)
6947     try:
6948       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6949                                                  old_name, self.op.debug_level)
6950       msg = result.fail_msg
6951       if msg:
6952         msg = ("Could not run OS rename script for instance %s on node %s"
6953                " (but the instance has been renamed in Ganeti): %s" %
6954                (inst.name, inst.primary_node, msg))
6955         self.proc.LogWarning(msg)
6956     finally:
6957       _ShutdownInstanceDisks(self, inst)
6958
6959     return inst.name
6960
6961
6962 class LUInstanceRemove(LogicalUnit):
6963   """Remove an instance.
6964
6965   """
6966   HPATH = "instance-remove"
6967   HTYPE = constants.HTYPE_INSTANCE
6968   REQ_BGL = False
6969
6970   def ExpandNames(self):
6971     self._ExpandAndLockInstance()
6972     self.needed_locks[locking.LEVEL_NODE] = []
6973     self.needed_locks[locking.LEVEL_NODE_RES] = []
6974     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6975
6976   def DeclareLocks(self, level):
6977     if level == locking.LEVEL_NODE:
6978       self._LockInstancesNodes()
6979     elif level == locking.LEVEL_NODE_RES:
6980       # Copy node locks
6981       self.needed_locks[locking.LEVEL_NODE_RES] = \
6982         self.needed_locks[locking.LEVEL_NODE][:]
6983
6984   def BuildHooksEnv(self):
6985     """Build hooks env.
6986
6987     This runs on master, primary and secondary nodes of the instance.
6988
6989     """
6990     env = _BuildInstanceHookEnvByObject(self, self.instance)
6991     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6992     return env
6993
6994   def BuildHooksNodes(self):
6995     """Build hooks nodes.
6996
6997     """
6998     nl = [self.cfg.GetMasterNode()]
6999     nl_post = list(self.instance.all_nodes) + nl
7000     return (nl, nl_post)
7001
7002   def CheckPrereq(self):
7003     """Check prerequisites.
7004
7005     This checks that the instance is in the cluster.
7006
7007     """
7008     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7009     assert self.instance is not None, \
7010       "Cannot retrieve locked instance %s" % self.op.instance_name
7011
7012   def Exec(self, feedback_fn):
7013     """Remove the instance.
7014
7015     """
7016     instance = self.instance
7017     logging.info("Shutting down instance %s on node %s",
7018                  instance.name, instance.primary_node)
7019
7020     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7021                                              self.op.shutdown_timeout)
7022     msg = result.fail_msg
7023     if msg:
7024       if self.op.ignore_failures:
7025         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7026       else:
7027         raise errors.OpExecError("Could not shutdown instance %s on"
7028                                  " node %s: %s" %
7029                                  (instance.name, instance.primary_node, msg))
7030
7031     assert (self.owned_locks(locking.LEVEL_NODE) ==
7032             self.owned_locks(locking.LEVEL_NODE_RES))
7033     assert not (set(instance.all_nodes) -
7034                 self.owned_locks(locking.LEVEL_NODE)), \
7035       "Not owning correct locks"
7036
7037     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7038
7039
7040 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7041   """Utility function to remove an instance.
7042
7043   """
7044   logging.info("Removing block devices for instance %s", instance.name)
7045
7046   if not _RemoveDisks(lu, instance):
7047     if not ignore_failures:
7048       raise errors.OpExecError("Can't remove instance's disks")
7049     feedback_fn("Warning: can't remove instance's disks")
7050
7051   logging.info("Removing instance %s out of cluster config", instance.name)
7052
7053   lu.cfg.RemoveInstance(instance.name)
7054
7055   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7056     "Instance lock removal conflict"
7057
7058   # Remove lock for the instance
7059   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7060
7061
7062 class LUInstanceQuery(NoHooksLU):
7063   """Logical unit for querying instances.
7064
7065   """
7066   # pylint: disable=W0142
7067   REQ_BGL = False
7068
7069   def CheckArguments(self):
7070     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7071                              self.op.output_fields, self.op.use_locking)
7072
7073   def ExpandNames(self):
7074     self.iq.ExpandNames(self)
7075
7076   def DeclareLocks(self, level):
7077     self.iq.DeclareLocks(self, level)
7078
7079   def Exec(self, feedback_fn):
7080     return self.iq.OldStyleQuery(self)
7081
7082
7083 class LUInstanceFailover(LogicalUnit):
7084   """Failover an instance.
7085
7086   """
7087   HPATH = "instance-failover"
7088   HTYPE = constants.HTYPE_INSTANCE
7089   REQ_BGL = False
7090
7091   def CheckArguments(self):
7092     """Check the arguments.
7093
7094     """
7095     self.iallocator = getattr(self.op, "iallocator", None)
7096     self.target_node = getattr(self.op, "target_node", None)
7097
7098   def ExpandNames(self):
7099     self._ExpandAndLockInstance()
7100
7101     if self.op.target_node is not None:
7102       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7103
7104     self.needed_locks[locking.LEVEL_NODE] = []
7105     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7106
7107     ignore_consistency = self.op.ignore_consistency
7108     shutdown_timeout = self.op.shutdown_timeout
7109     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7110                                        cleanup=False,
7111                                        failover=True,
7112                                        ignore_consistency=ignore_consistency,
7113                                        shutdown_timeout=shutdown_timeout)
7114     self.tasklets = [self._migrater]
7115
7116   def DeclareLocks(self, level):
7117     if level == locking.LEVEL_NODE:
7118       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7119       if instance.disk_template in constants.DTS_EXT_MIRROR:
7120         if self.op.target_node is None:
7121           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7122         else:
7123           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7124                                                    self.op.target_node]
7125         del self.recalculate_locks[locking.LEVEL_NODE]
7126       else:
7127         self._LockInstancesNodes()
7128
7129   def BuildHooksEnv(self):
7130     """Build hooks env.
7131
7132     This runs on master, primary and secondary nodes of the instance.
7133
7134     """
7135     instance = self._migrater.instance
7136     source_node = instance.primary_node
7137     target_node = self.op.target_node
7138     env = {
7139       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7140       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7141       "OLD_PRIMARY": source_node,
7142       "NEW_PRIMARY": target_node,
7143       }
7144
7145     if instance.disk_template in constants.DTS_INT_MIRROR:
7146       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7147       env["NEW_SECONDARY"] = source_node
7148     else:
7149       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7150
7151     env.update(_BuildInstanceHookEnvByObject(self, instance))
7152
7153     return env
7154
7155   def BuildHooksNodes(self):
7156     """Build hooks nodes.
7157
7158     """
7159     instance = self._migrater.instance
7160     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7161     return (nl, nl + [instance.primary_node])
7162
7163
7164 class LUInstanceMigrate(LogicalUnit):
7165   """Migrate an instance.
7166
7167   This is migration without shutting down, compared to the failover,
7168   which is done with shutdown.
7169
7170   """
7171   HPATH = "instance-migrate"
7172   HTYPE = constants.HTYPE_INSTANCE
7173   REQ_BGL = False
7174
7175   def ExpandNames(self):
7176     self._ExpandAndLockInstance()
7177
7178     if self.op.target_node is not None:
7179       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7180
7181     self.needed_locks[locking.LEVEL_NODE] = []
7182     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7183
7184     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7185                                        cleanup=self.op.cleanup,
7186                                        failover=False,
7187                                        fallback=self.op.allow_failover)
7188     self.tasklets = [self._migrater]
7189
7190   def DeclareLocks(self, level):
7191     if level == locking.LEVEL_NODE:
7192       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7193       if instance.disk_template in constants.DTS_EXT_MIRROR:
7194         if self.op.target_node is None:
7195           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7196         else:
7197           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7198                                                    self.op.target_node]
7199         del self.recalculate_locks[locking.LEVEL_NODE]
7200       else:
7201         self._LockInstancesNodes()
7202
7203   def BuildHooksEnv(self):
7204     """Build hooks env.
7205
7206     This runs on master, primary and secondary nodes of the instance.
7207
7208     """
7209     instance = self._migrater.instance
7210     source_node = instance.primary_node
7211     target_node = self.op.target_node
7212     env = _BuildInstanceHookEnvByObject(self, instance)
7213     env.update({
7214       "MIGRATE_LIVE": self._migrater.live,
7215       "MIGRATE_CLEANUP": self.op.cleanup,
7216       "OLD_PRIMARY": source_node,
7217       "NEW_PRIMARY": target_node,
7218       })
7219
7220     if instance.disk_template in constants.DTS_INT_MIRROR:
7221       env["OLD_SECONDARY"] = target_node
7222       env["NEW_SECONDARY"] = source_node
7223     else:
7224       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7225
7226     return env
7227
7228   def BuildHooksNodes(self):
7229     """Build hooks nodes.
7230
7231     """
7232     instance = self._migrater.instance
7233     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7234     return (nl, nl + [instance.primary_node])
7235
7236
7237 class LUInstanceMove(LogicalUnit):
7238   """Move an instance by data-copying.
7239
7240   """
7241   HPATH = "instance-move"
7242   HTYPE = constants.HTYPE_INSTANCE
7243   REQ_BGL = False
7244
7245   def ExpandNames(self):
7246     self._ExpandAndLockInstance()
7247     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7248     self.op.target_node = target_node
7249     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7250     self.needed_locks[locking.LEVEL_NODE_RES] = []
7251     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7252
7253   def DeclareLocks(self, level):
7254     if level == locking.LEVEL_NODE:
7255       self._LockInstancesNodes(primary_only=True)
7256     elif level == locking.LEVEL_NODE_RES:
7257       # Copy node locks
7258       self.needed_locks[locking.LEVEL_NODE_RES] = \
7259         self.needed_locks[locking.LEVEL_NODE][:]
7260
7261   def BuildHooksEnv(self):
7262     """Build hooks env.
7263
7264     This runs on master, primary and secondary nodes of the instance.
7265
7266     """
7267     env = {
7268       "TARGET_NODE": self.op.target_node,
7269       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7270       }
7271     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7272     return env
7273
7274   def BuildHooksNodes(self):
7275     """Build hooks nodes.
7276
7277     """
7278     nl = [
7279       self.cfg.GetMasterNode(),
7280       self.instance.primary_node,
7281       self.op.target_node,
7282       ]
7283     return (nl, nl)
7284
7285   def CheckPrereq(self):
7286     """Check prerequisites.
7287
7288     This checks that the instance is in the cluster.
7289
7290     """
7291     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7292     assert self.instance is not None, \
7293       "Cannot retrieve locked instance %s" % self.op.instance_name
7294
7295     node = self.cfg.GetNodeInfo(self.op.target_node)
7296     assert node is not None, \
7297       "Cannot retrieve locked node %s" % self.op.target_node
7298
7299     self.target_node = target_node = node.name
7300
7301     if target_node == instance.primary_node:
7302       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7303                                  (instance.name, target_node),
7304                                  errors.ECODE_STATE)
7305
7306     bep = self.cfg.GetClusterInfo().FillBE(instance)
7307
7308     for idx, dsk in enumerate(instance.disks):
7309       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7310         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7311                                    " cannot copy" % idx, errors.ECODE_STATE)
7312
7313     _CheckNodeOnline(self, target_node)
7314     _CheckNodeNotDrained(self, target_node)
7315     _CheckNodeVmCapable(self, target_node)
7316
7317     if instance.admin_state == constants.ADMINST_UP:
7318       # check memory requirements on the secondary node
7319       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7320                            instance.name, bep[constants.BE_MAXMEM],
7321                            instance.hypervisor)
7322     else:
7323       self.LogInfo("Not checking memory on the secondary node as"
7324                    " instance will not be started")
7325
7326     # check bridge existance
7327     _CheckInstanceBridgesExist(self, instance, node=target_node)
7328
7329   def Exec(self, feedback_fn):
7330     """Move an instance.
7331
7332     The move is done by shutting it down on its present node, copying
7333     the data over (slow) and starting it on the new node.
7334
7335     """
7336     instance = self.instance
7337
7338     source_node = instance.primary_node
7339     target_node = self.target_node
7340
7341     self.LogInfo("Shutting down instance %s on source node %s",
7342                  instance.name, source_node)
7343
7344     assert (self.owned_locks(locking.LEVEL_NODE) ==
7345             self.owned_locks(locking.LEVEL_NODE_RES))
7346
7347     result = self.rpc.call_instance_shutdown(source_node, instance,
7348                                              self.op.shutdown_timeout)
7349     msg = result.fail_msg
7350     if msg:
7351       if self.op.ignore_consistency:
7352         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7353                              " Proceeding anyway. Please make sure node"
7354                              " %s is down. Error details: %s",
7355                              instance.name, source_node, source_node, msg)
7356       else:
7357         raise errors.OpExecError("Could not shutdown instance %s on"
7358                                  " node %s: %s" %
7359                                  (instance.name, source_node, msg))
7360
7361     # create the target disks
7362     try:
7363       _CreateDisks(self, instance, target_node=target_node)
7364     except errors.OpExecError:
7365       self.LogWarning("Device creation failed, reverting...")
7366       try:
7367         _RemoveDisks(self, instance, target_node=target_node)
7368       finally:
7369         self.cfg.ReleaseDRBDMinors(instance.name)
7370         raise
7371
7372     cluster_name = self.cfg.GetClusterInfo().cluster_name
7373
7374     errs = []
7375     # activate, get path, copy the data over
7376     for idx, disk in enumerate(instance.disks):
7377       self.LogInfo("Copying data for disk %d", idx)
7378       result = self.rpc.call_blockdev_assemble(target_node, disk,
7379                                                instance.name, True, idx)
7380       if result.fail_msg:
7381         self.LogWarning("Can't assemble newly created disk %d: %s",
7382                         idx, result.fail_msg)
7383         errs.append(result.fail_msg)
7384         break
7385       dev_path = result.payload
7386       result = self.rpc.call_blockdev_export(source_node, disk,
7387                                              target_node, dev_path,
7388                                              cluster_name)
7389       if result.fail_msg:
7390         self.LogWarning("Can't copy data over for disk %d: %s",
7391                         idx, result.fail_msg)
7392         errs.append(result.fail_msg)
7393         break
7394
7395     if errs:
7396       self.LogWarning("Some disks failed to copy, aborting")
7397       try:
7398         _RemoveDisks(self, instance, target_node=target_node)
7399       finally:
7400         self.cfg.ReleaseDRBDMinors(instance.name)
7401         raise errors.OpExecError("Errors during disk copy: %s" %
7402                                  (",".join(errs),))
7403
7404     instance.primary_node = target_node
7405     self.cfg.Update(instance, feedback_fn)
7406
7407     self.LogInfo("Removing the disks on the original node")
7408     _RemoveDisks(self, instance, target_node=source_node)
7409
7410     # Only start the instance if it's marked as up
7411     if instance.admin_state == constants.ADMINST_UP:
7412       self.LogInfo("Starting instance %s on node %s",
7413                    instance.name, target_node)
7414
7415       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7416                                            ignore_secondaries=True)
7417       if not disks_ok:
7418         _ShutdownInstanceDisks(self, instance)
7419         raise errors.OpExecError("Can't activate the instance's disks")
7420
7421       result = self.rpc.call_instance_start(target_node,
7422                                             (instance, None, None), False)
7423       msg = result.fail_msg
7424       if msg:
7425         _ShutdownInstanceDisks(self, instance)
7426         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7427                                  (instance.name, target_node, msg))
7428
7429
7430 class LUNodeMigrate(LogicalUnit):
7431   """Migrate all instances from a node.
7432
7433   """
7434   HPATH = "node-migrate"
7435   HTYPE = constants.HTYPE_NODE
7436   REQ_BGL = False
7437
7438   def CheckArguments(self):
7439     pass
7440
7441   def ExpandNames(self):
7442     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7443
7444     self.share_locks = _ShareAll()
7445     self.needed_locks = {
7446       locking.LEVEL_NODE: [self.op.node_name],
7447       }
7448
7449   def BuildHooksEnv(self):
7450     """Build hooks env.
7451
7452     This runs on the master, the primary and all the secondaries.
7453
7454     """
7455     return {
7456       "NODE_NAME": self.op.node_name,
7457       }
7458
7459   def BuildHooksNodes(self):
7460     """Build hooks nodes.
7461
7462     """
7463     nl = [self.cfg.GetMasterNode()]
7464     return (nl, nl)
7465
7466   def CheckPrereq(self):
7467     pass
7468
7469   def Exec(self, feedback_fn):
7470     # Prepare jobs for migration instances
7471     jobs = [
7472       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7473                                  mode=self.op.mode,
7474                                  live=self.op.live,
7475                                  iallocator=self.op.iallocator,
7476                                  target_node=self.op.target_node)]
7477       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7478       ]
7479
7480     # TODO: Run iallocator in this opcode and pass correct placement options to
7481     # OpInstanceMigrate. Since other jobs can modify the cluster between
7482     # running the iallocator and the actual migration, a good consistency model
7483     # will have to be found.
7484
7485     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7486             frozenset([self.op.node_name]))
7487
7488     return ResultWithJobs(jobs)
7489
7490
7491 class TLMigrateInstance(Tasklet):
7492   """Tasklet class for instance migration.
7493
7494   @type live: boolean
7495   @ivar live: whether the migration will be done live or non-live;
7496       this variable is initalized only after CheckPrereq has run
7497   @type cleanup: boolean
7498   @ivar cleanup: Wheater we cleanup from a failed migration
7499   @type iallocator: string
7500   @ivar iallocator: The iallocator used to determine target_node
7501   @type target_node: string
7502   @ivar target_node: If given, the target_node to reallocate the instance to
7503   @type failover: boolean
7504   @ivar failover: Whether operation results in failover or migration
7505   @type fallback: boolean
7506   @ivar fallback: Whether fallback to failover is allowed if migration not
7507                   possible
7508   @type ignore_consistency: boolean
7509   @ivar ignore_consistency: Wheter we should ignore consistency between source
7510                             and target node
7511   @type shutdown_timeout: int
7512   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7513
7514   """
7515
7516   # Constants
7517   _MIGRATION_POLL_INTERVAL = 1      # seconds
7518   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7519
7520   def __init__(self, lu, instance_name, cleanup=False,
7521                failover=False, fallback=False,
7522                ignore_consistency=False,
7523                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7524     """Initializes this class.
7525
7526     """
7527     Tasklet.__init__(self, lu)
7528
7529     # Parameters
7530     self.instance_name = instance_name
7531     self.cleanup = cleanup
7532     self.live = False # will be overridden later
7533     self.failover = failover
7534     self.fallback = fallback
7535     self.ignore_consistency = ignore_consistency
7536     self.shutdown_timeout = shutdown_timeout
7537
7538   def CheckPrereq(self):
7539     """Check prerequisites.
7540
7541     This checks that the instance is in the cluster.
7542
7543     """
7544     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7545     instance = self.cfg.GetInstanceInfo(instance_name)
7546     assert instance is not None
7547     self.instance = instance
7548
7549     if (not self.cleanup and
7550         not instance.admin_state == constants.ADMINST_UP and
7551         not self.failover and self.fallback):
7552       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7553                       " switching to failover")
7554       self.failover = True
7555
7556     if instance.disk_template not in constants.DTS_MIRRORED:
7557       if self.failover:
7558         text = "failovers"
7559       else:
7560         text = "migrations"
7561       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7562                                  " %s" % (instance.disk_template, text),
7563                                  errors.ECODE_STATE)
7564
7565     if instance.disk_template in constants.DTS_EXT_MIRROR:
7566       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7567
7568       if self.lu.op.iallocator:
7569         self._RunAllocator()
7570       else:
7571         # We set set self.target_node as it is required by
7572         # BuildHooksEnv
7573         self.target_node = self.lu.op.target_node
7574
7575       # self.target_node is already populated, either directly or by the
7576       # iallocator run
7577       target_node = self.target_node
7578       if self.target_node == instance.primary_node:
7579         raise errors.OpPrereqError("Cannot migrate instance %s"
7580                                    " to its primary (%s)" %
7581                                    (instance.name, instance.primary_node))
7582
7583       if len(self.lu.tasklets) == 1:
7584         # It is safe to release locks only when we're the only tasklet
7585         # in the LU
7586         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7587                       keep=[instance.primary_node, self.target_node])
7588
7589     else:
7590       secondary_nodes = instance.secondary_nodes
7591       if not secondary_nodes:
7592         raise errors.ConfigurationError("No secondary node but using"
7593                                         " %s disk template" %
7594                                         instance.disk_template)
7595       target_node = secondary_nodes[0]
7596       if self.lu.op.iallocator or (self.lu.op.target_node and
7597                                    self.lu.op.target_node != target_node):
7598         if self.failover:
7599           text = "failed over"
7600         else:
7601           text = "migrated"
7602         raise errors.OpPrereqError("Instances with disk template %s cannot"
7603                                    " be %s to arbitrary nodes"
7604                                    " (neither an iallocator nor a target"
7605                                    " node can be passed)" %
7606                                    (instance.disk_template, text),
7607                                    errors.ECODE_INVAL)
7608
7609     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7610
7611     # check memory requirements on the secondary node
7612     if not self.failover or instance.admin_state == constants.ADMINST_UP:
7613       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7614                            instance.name, i_be[constants.BE_MAXMEM],
7615                            instance.hypervisor)
7616     else:
7617       self.lu.LogInfo("Not checking memory on the secondary node as"
7618                       " instance will not be started")
7619
7620     # check if failover must be forced instead of migration
7621     if (not self.cleanup and not self.failover and
7622         i_be[constants.BE_ALWAYS_FAILOVER]):
7623       if self.fallback:
7624         self.lu.LogInfo("Instance configured to always failover; fallback"
7625                         " to failover")
7626         self.failover = True
7627       else:
7628         raise errors.OpPrereqError("This instance has been configured to"
7629                                    " always failover, please allow failover",
7630                                    errors.ECODE_STATE)
7631
7632     # check bridge existance
7633     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7634
7635     if not self.cleanup:
7636       _CheckNodeNotDrained(self.lu, target_node)
7637       if not self.failover:
7638         result = self.rpc.call_instance_migratable(instance.primary_node,
7639                                                    instance)
7640         if result.fail_msg and self.fallback:
7641           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7642                           " failover")
7643           self.failover = True
7644         else:
7645           result.Raise("Can't migrate, please use failover",
7646                        prereq=True, ecode=errors.ECODE_STATE)
7647
7648     assert not (self.failover and self.cleanup)
7649
7650     if not self.failover:
7651       if self.lu.op.live is not None and self.lu.op.mode is not None:
7652         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7653                                    " parameters are accepted",
7654                                    errors.ECODE_INVAL)
7655       if self.lu.op.live is not None:
7656         if self.lu.op.live:
7657           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7658         else:
7659           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7660         # reset the 'live' parameter to None so that repeated
7661         # invocations of CheckPrereq do not raise an exception
7662         self.lu.op.live = None
7663       elif self.lu.op.mode is None:
7664         # read the default value from the hypervisor
7665         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7666                                                 skip_globals=False)
7667         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7668
7669       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7670     else:
7671       # Failover is never live
7672       self.live = False
7673
7674   def _RunAllocator(self):
7675     """Run the allocator based on input opcode.
7676
7677     """
7678     ial = IAllocator(self.cfg, self.rpc,
7679                      mode=constants.IALLOCATOR_MODE_RELOC,
7680                      name=self.instance_name,
7681                      # TODO See why hail breaks with a single node below
7682                      relocate_from=[self.instance.primary_node,
7683                                     self.instance.primary_node],
7684                      )
7685
7686     ial.Run(self.lu.op.iallocator)
7687
7688     if not ial.success:
7689       raise errors.OpPrereqError("Can't compute nodes using"
7690                                  " iallocator '%s': %s" %
7691                                  (self.lu.op.iallocator, ial.info),
7692                                  errors.ECODE_NORES)
7693     if len(ial.result) != ial.required_nodes:
7694       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7695                                  " of nodes (%s), required %s" %
7696                                  (self.lu.op.iallocator, len(ial.result),
7697                                   ial.required_nodes), errors.ECODE_FAULT)
7698     self.target_node = ial.result[0]
7699     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7700                  self.instance_name, self.lu.op.iallocator,
7701                  utils.CommaJoin(ial.result))
7702
7703   def _WaitUntilSync(self):
7704     """Poll with custom rpc for disk sync.
7705
7706     This uses our own step-based rpc call.
7707
7708     """
7709     self.feedback_fn("* wait until resync is done")
7710     all_done = False
7711     while not all_done:
7712       all_done = True
7713       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7714                                             self.nodes_ip,
7715                                             self.instance.disks)
7716       min_percent = 100
7717       for node, nres in result.items():
7718         nres.Raise("Cannot resync disks on node %s" % node)
7719         node_done, node_percent = nres.payload
7720         all_done = all_done and node_done
7721         if node_percent is not None:
7722           min_percent = min(min_percent, node_percent)
7723       if not all_done:
7724         if min_percent < 100:
7725           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7726         time.sleep(2)
7727
7728   def _EnsureSecondary(self, node):
7729     """Demote a node to secondary.
7730
7731     """
7732     self.feedback_fn("* switching node %s to secondary mode" % node)
7733
7734     for dev in self.instance.disks:
7735       self.cfg.SetDiskID(dev, node)
7736
7737     result = self.rpc.call_blockdev_close(node, self.instance.name,
7738                                           self.instance.disks)
7739     result.Raise("Cannot change disk to secondary on node %s" % node)
7740
7741   def _GoStandalone(self):
7742     """Disconnect from the network.
7743
7744     """
7745     self.feedback_fn("* changing into standalone mode")
7746     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7747                                                self.instance.disks)
7748     for node, nres in result.items():
7749       nres.Raise("Cannot disconnect disks node %s" % node)
7750
7751   def _GoReconnect(self, multimaster):
7752     """Reconnect to the network.
7753
7754     """
7755     if multimaster:
7756       msg = "dual-master"
7757     else:
7758       msg = "single-master"
7759     self.feedback_fn("* changing disks into %s mode" % msg)
7760     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7761                                            self.instance.disks,
7762                                            self.instance.name, multimaster)
7763     for node, nres in result.items():
7764       nres.Raise("Cannot change disks config on node %s" % node)
7765
7766   def _ExecCleanup(self):
7767     """Try to cleanup after a failed migration.
7768
7769     The cleanup is done by:
7770       - check that the instance is running only on one node
7771         (and update the config if needed)
7772       - change disks on its secondary node to secondary
7773       - wait until disks are fully synchronized
7774       - disconnect from the network
7775       - change disks into single-master mode
7776       - wait again until disks are fully synchronized
7777
7778     """
7779     instance = self.instance
7780     target_node = self.target_node
7781     source_node = self.source_node
7782
7783     # check running on only one node
7784     self.feedback_fn("* checking where the instance actually runs"
7785                      " (if this hangs, the hypervisor might be in"
7786                      " a bad state)")
7787     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7788     for node, result in ins_l.items():
7789       result.Raise("Can't contact node %s" % node)
7790
7791     runningon_source = instance.name in ins_l[source_node].payload
7792     runningon_target = instance.name in ins_l[target_node].payload
7793
7794     if runningon_source and runningon_target:
7795       raise errors.OpExecError("Instance seems to be running on two nodes,"
7796                                " or the hypervisor is confused; you will have"
7797                                " to ensure manually that it runs only on one"
7798                                " and restart this operation")
7799
7800     if not (runningon_source or runningon_target):
7801       raise errors.OpExecError("Instance does not seem to be running at all;"
7802                                " in this case it's safer to repair by"
7803                                " running 'gnt-instance stop' to ensure disk"
7804                                " shutdown, and then restarting it")
7805
7806     if runningon_target:
7807       # the migration has actually succeeded, we need to update the config
7808       self.feedback_fn("* instance running on secondary node (%s),"
7809                        " updating config" % target_node)
7810       instance.primary_node = target_node
7811       self.cfg.Update(instance, self.feedback_fn)
7812       demoted_node = source_node
7813     else:
7814       self.feedback_fn("* instance confirmed to be running on its"
7815                        " primary node (%s)" % source_node)
7816       demoted_node = target_node
7817
7818     if instance.disk_template in constants.DTS_INT_MIRROR:
7819       self._EnsureSecondary(demoted_node)
7820       try:
7821         self._WaitUntilSync()
7822       except errors.OpExecError:
7823         # we ignore here errors, since if the device is standalone, it
7824         # won't be able to sync
7825         pass
7826       self._GoStandalone()
7827       self._GoReconnect(False)
7828       self._WaitUntilSync()
7829
7830     self.feedback_fn("* done")
7831
7832   def _RevertDiskStatus(self):
7833     """Try to revert the disk status after a failed migration.
7834
7835     """
7836     target_node = self.target_node
7837     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7838       return
7839
7840     try:
7841       self._EnsureSecondary(target_node)
7842       self._GoStandalone()
7843       self._GoReconnect(False)
7844       self._WaitUntilSync()
7845     except errors.OpExecError, err:
7846       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7847                          " please try to recover the instance manually;"
7848                          " error '%s'" % str(err))
7849
7850   def _AbortMigration(self):
7851     """Call the hypervisor code to abort a started migration.
7852
7853     """
7854     instance = self.instance
7855     target_node = self.target_node
7856     source_node = self.source_node
7857     migration_info = self.migration_info
7858
7859     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7860                                                                  instance,
7861                                                                  migration_info,
7862                                                                  False)
7863     abort_msg = abort_result.fail_msg
7864     if abort_msg:
7865       logging.error("Aborting migration failed on target node %s: %s",
7866                     target_node, abort_msg)
7867       # Don't raise an exception here, as we stil have to try to revert the
7868       # disk status, even if this step failed.
7869
7870     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7871         instance, False, self.live)
7872     abort_msg = abort_result.fail_msg
7873     if abort_msg:
7874       logging.error("Aborting migration failed on source node %s: %s",
7875                     source_node, abort_msg)
7876
7877   def _ExecMigration(self):
7878     """Migrate an instance.
7879
7880     The migrate is done by:
7881       - change the disks into dual-master mode
7882       - wait until disks are fully synchronized again
7883       - migrate the instance
7884       - change disks on the new secondary node (the old primary) to secondary
7885       - wait until disks are fully synchronized
7886       - change disks into single-master mode
7887
7888     """
7889     instance = self.instance
7890     target_node = self.target_node
7891     source_node = self.source_node
7892
7893     # Check for hypervisor version mismatch and warn the user.
7894     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7895                                        None, [self.instance.hypervisor])
7896     for ninfo in nodeinfo.values():
7897       ninfo.Raise("Unable to retrieve node information from node '%s'" %
7898                   ninfo.node)
7899     (_, _, (src_info, )) = nodeinfo[source_node].payload
7900     (_, _, (dst_info, )) = nodeinfo[target_node].payload
7901
7902     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7903         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7904       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7905       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7906       if src_version != dst_version:
7907         self.feedback_fn("* warning: hypervisor version mismatch between"
7908                          " source (%s) and target (%s) node" %
7909                          (src_version, dst_version))
7910
7911     self.feedback_fn("* checking disk consistency between source and target")
7912     for dev in instance.disks:
7913       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7914         raise errors.OpExecError("Disk %s is degraded or not fully"
7915                                  " synchronized on target node,"
7916                                  " aborting migration" % dev.iv_name)
7917
7918     # First get the migration information from the remote node
7919     result = self.rpc.call_migration_info(source_node, instance)
7920     msg = result.fail_msg
7921     if msg:
7922       log_err = ("Failed fetching source migration information from %s: %s" %
7923                  (source_node, msg))
7924       logging.error(log_err)
7925       raise errors.OpExecError(log_err)
7926
7927     self.migration_info = migration_info = result.payload
7928
7929     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7930       # Then switch the disks to master/master mode
7931       self._EnsureSecondary(target_node)
7932       self._GoStandalone()
7933       self._GoReconnect(True)
7934       self._WaitUntilSync()
7935
7936     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7937     result = self.rpc.call_accept_instance(target_node,
7938                                            instance,
7939                                            migration_info,
7940                                            self.nodes_ip[target_node])
7941
7942     msg = result.fail_msg
7943     if msg:
7944       logging.error("Instance pre-migration failed, trying to revert"
7945                     " disk status: %s", msg)
7946       self.feedback_fn("Pre-migration failed, aborting")
7947       self._AbortMigration()
7948       self._RevertDiskStatus()
7949       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7950                                (instance.name, msg))
7951
7952     self.feedback_fn("* migrating instance to %s" % target_node)
7953     result = self.rpc.call_instance_migrate(source_node, instance,
7954                                             self.nodes_ip[target_node],
7955                                             self.live)
7956     msg = result.fail_msg
7957     if msg:
7958       logging.error("Instance migration failed, trying to revert"
7959                     " disk status: %s", msg)
7960       self.feedback_fn("Migration failed, aborting")
7961       self._AbortMigration()
7962       self._RevertDiskStatus()
7963       raise errors.OpExecError("Could not migrate instance %s: %s" %
7964                                (instance.name, msg))
7965
7966     self.feedback_fn("* starting memory transfer")
7967     last_feedback = time.time()
7968     while True:
7969       result = self.rpc.call_instance_get_migration_status(source_node,
7970                                                            instance)
7971       msg = result.fail_msg
7972       ms = result.payload   # MigrationStatus instance
7973       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7974         logging.error("Instance migration failed, trying to revert"
7975                       " disk status: %s", msg)
7976         self.feedback_fn("Migration failed, aborting")
7977         self._AbortMigration()
7978         self._RevertDiskStatus()
7979         raise errors.OpExecError("Could not migrate instance %s: %s" %
7980                                  (instance.name, msg))
7981
7982       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7983         self.feedback_fn("* memory transfer complete")
7984         break
7985
7986       if (utils.TimeoutExpired(last_feedback,
7987                                self._MIGRATION_FEEDBACK_INTERVAL) and
7988           ms.transferred_ram is not None):
7989         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7990         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7991         last_feedback = time.time()
7992
7993       time.sleep(self._MIGRATION_POLL_INTERVAL)
7994
7995     result = self.rpc.call_instance_finalize_migration_src(source_node,
7996                                                            instance,
7997                                                            True,
7998                                                            self.live)
7999     msg = result.fail_msg
8000     if msg:
8001       logging.error("Instance migration succeeded, but finalization failed"
8002                     " on the source node: %s", msg)
8003       raise errors.OpExecError("Could not finalize instance migration: %s" %
8004                                msg)
8005
8006     instance.primary_node = target_node
8007
8008     # distribute new instance config to the other nodes
8009     self.cfg.Update(instance, self.feedback_fn)
8010
8011     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8012                                                            instance,
8013                                                            migration_info,
8014                                                            True)
8015     msg = result.fail_msg
8016     if msg:
8017       logging.error("Instance migration succeeded, but finalization failed"
8018                     " on the target node: %s", msg)
8019       raise errors.OpExecError("Could not finalize instance migration: %s" %
8020                                msg)
8021
8022     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8023       self._EnsureSecondary(source_node)
8024       self._WaitUntilSync()
8025       self._GoStandalone()
8026       self._GoReconnect(False)
8027       self._WaitUntilSync()
8028
8029     self.feedback_fn("* done")
8030
8031   def _ExecFailover(self):
8032     """Failover an instance.
8033
8034     The failover is done by shutting it down on its present node and
8035     starting it on the secondary.
8036
8037     """
8038     instance = self.instance
8039     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8040
8041     source_node = instance.primary_node
8042     target_node = self.target_node
8043
8044     if instance.admin_state == constants.ADMINST_UP:
8045       self.feedback_fn("* checking disk consistency between source and target")
8046       for dev in instance.disks:
8047         # for drbd, these are drbd over lvm
8048         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8049           if primary_node.offline:
8050             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8051                              " target node %s" %
8052                              (primary_node.name, dev.iv_name, target_node))
8053           elif not self.ignore_consistency:
8054             raise errors.OpExecError("Disk %s is degraded on target node,"
8055                                      " aborting failover" % dev.iv_name)
8056     else:
8057       self.feedback_fn("* not checking disk consistency as instance is not"
8058                        " running")
8059
8060     self.feedback_fn("* shutting down instance on source node")
8061     logging.info("Shutting down instance %s on node %s",
8062                  instance.name, source_node)
8063
8064     result = self.rpc.call_instance_shutdown(source_node, instance,
8065                                              self.shutdown_timeout)
8066     msg = result.fail_msg
8067     if msg:
8068       if self.ignore_consistency or primary_node.offline:
8069         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8070                            " proceeding anyway; please make sure node"
8071                            " %s is down; error details: %s",
8072                            instance.name, source_node, source_node, msg)
8073       else:
8074         raise errors.OpExecError("Could not shutdown instance %s on"
8075                                  " node %s: %s" %
8076                                  (instance.name, source_node, msg))
8077
8078     self.feedback_fn("* deactivating the instance's disks on source node")
8079     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8080       raise errors.OpExecError("Can't shut down the instance's disks")
8081
8082     instance.primary_node = target_node
8083     # distribute new instance config to the other nodes
8084     self.cfg.Update(instance, self.feedback_fn)
8085
8086     # Only start the instance if it's marked as up
8087     if instance.admin_state == constants.ADMINST_UP:
8088       self.feedback_fn("* activating the instance's disks on target node %s" %
8089                        target_node)
8090       logging.info("Starting instance %s on node %s",
8091                    instance.name, target_node)
8092
8093       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8094                                            ignore_secondaries=True)
8095       if not disks_ok:
8096         _ShutdownInstanceDisks(self.lu, instance)
8097         raise errors.OpExecError("Can't activate the instance's disks")
8098
8099       self.feedback_fn("* starting the instance on the target node %s" %
8100                        target_node)
8101       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8102                                             False)
8103       msg = result.fail_msg
8104       if msg:
8105         _ShutdownInstanceDisks(self.lu, instance)
8106         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8107                                  (instance.name, target_node, msg))
8108
8109   def Exec(self, feedback_fn):
8110     """Perform the migration.
8111
8112     """
8113     self.feedback_fn = feedback_fn
8114     self.source_node = self.instance.primary_node
8115
8116     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8117     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8118       self.target_node = self.instance.secondary_nodes[0]
8119       # Otherwise self.target_node has been populated either
8120       # directly, or through an iallocator.
8121
8122     self.all_nodes = [self.source_node, self.target_node]
8123     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8124                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8125
8126     if self.failover:
8127       feedback_fn("Failover instance %s" % self.instance.name)
8128       self._ExecFailover()
8129     else:
8130       feedback_fn("Migrating instance %s" % self.instance.name)
8131
8132       if self.cleanup:
8133         return self._ExecCleanup()
8134       else:
8135         return self._ExecMigration()
8136
8137
8138 def _CreateBlockDev(lu, node, instance, device, force_create,
8139                     info, force_open):
8140   """Create a tree of block devices on a given node.
8141
8142   If this device type has to be created on secondaries, create it and
8143   all its children.
8144
8145   If not, just recurse to children keeping the same 'force' value.
8146
8147   @param lu: the lu on whose behalf we execute
8148   @param node: the node on which to create the device
8149   @type instance: L{objects.Instance}
8150   @param instance: the instance which owns the device
8151   @type device: L{objects.Disk}
8152   @param device: the device to create
8153   @type force_create: boolean
8154   @param force_create: whether to force creation of this device; this
8155       will be change to True whenever we find a device which has
8156       CreateOnSecondary() attribute
8157   @param info: the extra 'metadata' we should attach to the device
8158       (this will be represented as a LVM tag)
8159   @type force_open: boolean
8160   @param force_open: this parameter will be passes to the
8161       L{backend.BlockdevCreate} function where it specifies
8162       whether we run on primary or not, and it affects both
8163       the child assembly and the device own Open() execution
8164
8165   """
8166   if device.CreateOnSecondary():
8167     force_create = True
8168
8169   if device.children:
8170     for child in device.children:
8171       _CreateBlockDev(lu, node, instance, child, force_create,
8172                       info, force_open)
8173
8174   if not force_create:
8175     return
8176
8177   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8178
8179
8180 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8181   """Create a single block device on a given node.
8182
8183   This will not recurse over children of the device, so they must be
8184   created in advance.
8185
8186   @param lu: the lu on whose behalf we execute
8187   @param node: the node on which to create the device
8188   @type instance: L{objects.Instance}
8189   @param instance: the instance which owns the device
8190   @type device: L{objects.Disk}
8191   @param device: the device to create
8192   @param info: the extra 'metadata' we should attach to the device
8193       (this will be represented as a LVM tag)
8194   @type force_open: boolean
8195   @param force_open: this parameter will be passes to the
8196       L{backend.BlockdevCreate} function where it specifies
8197       whether we run on primary or not, and it affects both
8198       the child assembly and the device own Open() execution
8199
8200   """
8201   lu.cfg.SetDiskID(device, node)
8202   result = lu.rpc.call_blockdev_create(node, device, device.size,
8203                                        instance.name, force_open, info)
8204   result.Raise("Can't create block device %s on"
8205                " node %s for instance %s" % (device, node, instance.name))
8206   if device.physical_id is None:
8207     device.physical_id = result.payload
8208
8209
8210 def _GenerateUniqueNames(lu, exts):
8211   """Generate a suitable LV name.
8212
8213   This will generate a logical volume name for the given instance.
8214
8215   """
8216   results = []
8217   for val in exts:
8218     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8219     results.append("%s%s" % (new_id, val))
8220   return results
8221
8222
8223 def _ComputeLDParams(disk_template, disk_params):
8224   """Computes Logical Disk parameters from Disk Template parameters.
8225
8226   @type disk_template: string
8227   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8228   @type disk_params: dict
8229   @param disk_params: disk template parameters; dict(template_name -> parameters
8230   @rtype: list(dict)
8231   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8232     contains the LD parameters of the node. The tree is flattened in-order.
8233
8234   """
8235   if disk_template not in constants.DISK_TEMPLATES:
8236     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8237
8238   result = list()
8239   dt_params = disk_params[disk_template]
8240   if disk_template == constants.DT_DRBD8:
8241     drbd_params = {
8242       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8243       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8244       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8245       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8246       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8247       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8248       }
8249
8250     drbd_params = \
8251       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8252                        drbd_params)
8253
8254     result.append(drbd_params)
8255
8256     # data LV
8257     data_params = {
8258       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8259       }
8260     data_params = \
8261       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8262                        data_params)
8263     result.append(data_params)
8264
8265     # metadata LV
8266     meta_params = {
8267       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8268       }
8269     meta_params = \
8270       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8271                        meta_params)
8272     result.append(meta_params)
8273
8274   elif (disk_template == constants.DT_FILE or
8275         disk_template == constants.DT_SHARED_FILE):
8276     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8277
8278   elif disk_template == constants.DT_PLAIN:
8279     params = {
8280       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8281       }
8282     params = \
8283       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8284                        params)
8285     result.append(params)
8286
8287   elif disk_template == constants.DT_BLOCK:
8288     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8289
8290   return result
8291
8292
8293 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8294                          iv_name, p_minor, s_minor, drbd_params, data_params,
8295                          meta_params):
8296   """Generate a drbd8 device complete with its children.
8297
8298   """
8299   assert len(vgnames) == len(names) == 2
8300   port = lu.cfg.AllocatePort()
8301   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8302
8303   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8304                           logical_id=(vgnames[0], names[0]),
8305                           params=data_params)
8306   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8307                           logical_id=(vgnames[1], names[1]),
8308                           params=meta_params)
8309   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8310                           logical_id=(primary, secondary, port,
8311                                       p_minor, s_minor,
8312                                       shared_secret),
8313                           children=[dev_data, dev_meta],
8314                           iv_name=iv_name, params=drbd_params)
8315   return drbd_dev
8316
8317
8318 def _GenerateDiskTemplate(lu, template_name,
8319                           instance_name, primary_node,
8320                           secondary_nodes, disk_info,
8321                           file_storage_dir, file_driver,
8322                           base_index, feedback_fn, disk_params):
8323   """Generate the entire disk layout for a given template type.
8324
8325   """
8326   #TODO: compute space requirements
8327
8328   vgname = lu.cfg.GetVGName()
8329   disk_count = len(disk_info)
8330   disks = []
8331   ld_params = _ComputeLDParams(template_name, disk_params)
8332   if template_name == constants.DT_DISKLESS:
8333     pass
8334   elif template_name == constants.DT_PLAIN:
8335     if len(secondary_nodes) != 0:
8336       raise errors.ProgrammerError("Wrong template configuration")
8337
8338     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8339                                       for i in range(disk_count)])
8340     for idx, disk in enumerate(disk_info):
8341       disk_index = idx + base_index
8342       vg = disk.get(constants.IDISK_VG, vgname)
8343       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8344       disk_dev = objects.Disk(dev_type=constants.LD_LV,
8345                               size=disk[constants.IDISK_SIZE],
8346                               logical_id=(vg, names[idx]),
8347                               iv_name="disk/%d" % disk_index,
8348                               mode=disk[constants.IDISK_MODE],
8349                               params=ld_params[0])
8350       disks.append(disk_dev)
8351   elif template_name == constants.DT_DRBD8:
8352     drbd_params, data_params, meta_params = ld_params
8353     if len(secondary_nodes) != 1:
8354       raise errors.ProgrammerError("Wrong template configuration")
8355     remote_node = secondary_nodes[0]
8356     minors = lu.cfg.AllocateDRBDMinor(
8357       [primary_node, remote_node] * len(disk_info), instance_name)
8358
8359     names = []
8360     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8361                                                for i in range(disk_count)]):
8362       names.append(lv_prefix + "_data")
8363       names.append(lv_prefix + "_meta")
8364     for idx, disk in enumerate(disk_info):
8365       disk_index = idx + base_index
8366       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8367       data_vg = disk.get(constants.IDISK_VG, vgname)
8368       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8369       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8370                                       disk[constants.IDISK_SIZE],
8371                                       [data_vg, meta_vg],
8372                                       names[idx * 2:idx * 2 + 2],
8373                                       "disk/%d" % disk_index,
8374                                       minors[idx * 2], minors[idx * 2 + 1],
8375                                       drbd_params, data_params, meta_params)
8376       disk_dev.mode = disk[constants.IDISK_MODE]
8377       disks.append(disk_dev)
8378   elif template_name == constants.DT_FILE:
8379     if len(secondary_nodes) != 0:
8380       raise errors.ProgrammerError("Wrong template configuration")
8381
8382     opcodes.RequireFileStorage()
8383
8384     for idx, disk in enumerate(disk_info):
8385       disk_index = idx + base_index
8386       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8387                               size=disk[constants.IDISK_SIZE],
8388                               iv_name="disk/%d" % disk_index,
8389                               logical_id=(file_driver,
8390                                           "%s/disk%d" % (file_storage_dir,
8391                                                          disk_index)),
8392                               mode=disk[constants.IDISK_MODE],
8393                               params=ld_params[0])
8394       disks.append(disk_dev)
8395   elif template_name == constants.DT_SHARED_FILE:
8396     if len(secondary_nodes) != 0:
8397       raise errors.ProgrammerError("Wrong template configuration")
8398
8399     opcodes.RequireSharedFileStorage()
8400
8401     for idx, disk in enumerate(disk_info):
8402       disk_index = idx + base_index
8403       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8404                               size=disk[constants.IDISK_SIZE],
8405                               iv_name="disk/%d" % disk_index,
8406                               logical_id=(file_driver,
8407                                           "%s/disk%d" % (file_storage_dir,
8408                                                          disk_index)),
8409                               mode=disk[constants.IDISK_MODE],
8410                               params=ld_params[0])
8411       disks.append(disk_dev)
8412   elif template_name == constants.DT_BLOCK:
8413     if len(secondary_nodes) != 0:
8414       raise errors.ProgrammerError("Wrong template configuration")
8415
8416     for idx, disk in enumerate(disk_info):
8417       disk_index = idx + base_index
8418       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8419                               size=disk[constants.IDISK_SIZE],
8420                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8421                                           disk[constants.IDISK_ADOPT]),
8422                               iv_name="disk/%d" % disk_index,
8423                               mode=disk[constants.IDISK_MODE],
8424                               params=ld_params[0])
8425       disks.append(disk_dev)
8426
8427   else:
8428     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8429   return disks
8430
8431
8432 def _GetInstanceInfoText(instance):
8433   """Compute that text that should be added to the disk's metadata.
8434
8435   """
8436   return "originstname+%s" % instance.name
8437
8438
8439 def _CalcEta(time_taken, written, total_size):
8440   """Calculates the ETA based on size written and total size.
8441
8442   @param time_taken: The time taken so far
8443   @param written: amount written so far
8444   @param total_size: The total size of data to be written
8445   @return: The remaining time in seconds
8446
8447   """
8448   avg_time = time_taken / float(written)
8449   return (total_size - written) * avg_time
8450
8451
8452 def _WipeDisks(lu, instance):
8453   """Wipes instance disks.
8454
8455   @type lu: L{LogicalUnit}
8456   @param lu: the logical unit on whose behalf we execute
8457   @type instance: L{objects.Instance}
8458   @param instance: the instance whose disks we should create
8459   @return: the success of the wipe
8460
8461   """
8462   node = instance.primary_node
8463
8464   for device in instance.disks:
8465     lu.cfg.SetDiskID(device, node)
8466
8467   logging.info("Pause sync of instance %s disks", instance.name)
8468   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8469
8470   for idx, success in enumerate(result.payload):
8471     if not success:
8472       logging.warn("pause-sync of instance %s for disks %d failed",
8473                    instance.name, idx)
8474
8475   try:
8476     for idx, device in enumerate(instance.disks):
8477       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8478       # MAX_WIPE_CHUNK at max
8479       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8480                             constants.MIN_WIPE_CHUNK_PERCENT)
8481       # we _must_ make this an int, otherwise rounding errors will
8482       # occur
8483       wipe_chunk_size = int(wipe_chunk_size)
8484
8485       lu.LogInfo("* Wiping disk %d", idx)
8486       logging.info("Wiping disk %d for instance %s, node %s using"
8487                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8488
8489       offset = 0
8490       size = device.size
8491       last_output = 0
8492       start_time = time.time()
8493
8494       while offset < size:
8495         wipe_size = min(wipe_chunk_size, size - offset)
8496         logging.debug("Wiping disk %d, offset %s, chunk %s",
8497                       idx, offset, wipe_size)
8498         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8499         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8500                      (idx, offset, wipe_size))
8501         now = time.time()
8502         offset += wipe_size
8503         if now - last_output >= 60:
8504           eta = _CalcEta(now - start_time, offset, size)
8505           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8506                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8507           last_output = now
8508   finally:
8509     logging.info("Resume sync of instance %s disks", instance.name)
8510
8511     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8512
8513     for idx, success in enumerate(result.payload):
8514       if not success:
8515         lu.LogWarning("Resume sync of disk %d failed, please have a"
8516                       " look at the status and troubleshoot the issue", idx)
8517         logging.warn("resume-sync of instance %s for disks %d failed",
8518                      instance.name, idx)
8519
8520
8521 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8522   """Create all disks for an instance.
8523
8524   This abstracts away some work from AddInstance.
8525
8526   @type lu: L{LogicalUnit}
8527   @param lu: the logical unit on whose behalf we execute
8528   @type instance: L{objects.Instance}
8529   @param instance: the instance whose disks we should create
8530   @type to_skip: list
8531   @param to_skip: list of indices to skip
8532   @type target_node: string
8533   @param target_node: if passed, overrides the target node for creation
8534   @rtype: boolean
8535   @return: the success of the creation
8536
8537   """
8538   info = _GetInstanceInfoText(instance)
8539   if target_node is None:
8540     pnode = instance.primary_node
8541     all_nodes = instance.all_nodes
8542   else:
8543     pnode = target_node
8544     all_nodes = [pnode]
8545
8546   if instance.disk_template in constants.DTS_FILEBASED:
8547     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8548     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8549
8550     result.Raise("Failed to create directory '%s' on"
8551                  " node %s" % (file_storage_dir, pnode))
8552
8553   # Note: this needs to be kept in sync with adding of disks in
8554   # LUInstanceSetParams
8555   for idx, device in enumerate(instance.disks):
8556     if to_skip and idx in to_skip:
8557       continue
8558     logging.info("Creating volume %s for instance %s",
8559                  device.iv_name, instance.name)
8560     #HARDCODE
8561     for node in all_nodes:
8562       f_create = node == pnode
8563       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8564
8565
8566 def _RemoveDisks(lu, instance, target_node=None):
8567   """Remove all disks for an instance.
8568
8569   This abstracts away some work from `AddInstance()` and
8570   `RemoveInstance()`. Note that in case some of the devices couldn't
8571   be removed, the removal will continue with the other ones (compare
8572   with `_CreateDisks()`).
8573
8574   @type lu: L{LogicalUnit}
8575   @param lu: the logical unit on whose behalf we execute
8576   @type instance: L{objects.Instance}
8577   @param instance: the instance whose disks we should remove
8578   @type target_node: string
8579   @param target_node: used to override the node on which to remove the disks
8580   @rtype: boolean
8581   @return: the success of the removal
8582
8583   """
8584   logging.info("Removing block devices for instance %s", instance.name)
8585
8586   all_result = True
8587   for device in instance.disks:
8588     if target_node:
8589       edata = [(target_node, device)]
8590     else:
8591       edata = device.ComputeNodeTree(instance.primary_node)
8592     for node, disk in edata:
8593       lu.cfg.SetDiskID(disk, node)
8594       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8595       if msg:
8596         lu.LogWarning("Could not remove block device %s on node %s,"
8597                       " continuing anyway: %s", device.iv_name, node, msg)
8598         all_result = False
8599
8600     # if this is a DRBD disk, return its port to the pool
8601     if device.dev_type in constants.LDS_DRBD:
8602       tcp_port = device.logical_id[2]
8603       lu.cfg.AddTcpUdpPort(tcp_port)
8604
8605   if instance.disk_template == constants.DT_FILE:
8606     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8607     if target_node:
8608       tgt = target_node
8609     else:
8610       tgt = instance.primary_node
8611     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8612     if result.fail_msg:
8613       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8614                     file_storage_dir, instance.primary_node, result.fail_msg)
8615       all_result = False
8616
8617   return all_result
8618
8619
8620 def _ComputeDiskSizePerVG(disk_template, disks):
8621   """Compute disk size requirements in the volume group
8622
8623   """
8624   def _compute(disks, payload):
8625     """Universal algorithm.
8626
8627     """
8628     vgs = {}
8629     for disk in disks:
8630       vgs[disk[constants.IDISK_VG]] = \
8631         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8632
8633     return vgs
8634
8635   # Required free disk space as a function of disk and swap space
8636   req_size_dict = {
8637     constants.DT_DISKLESS: {},
8638     constants.DT_PLAIN: _compute(disks, 0),
8639     # 128 MB are added for drbd metadata for each disk
8640     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8641     constants.DT_FILE: {},
8642     constants.DT_SHARED_FILE: {},
8643   }
8644
8645   if disk_template not in req_size_dict:
8646     raise errors.ProgrammerError("Disk template '%s' size requirement"
8647                                  " is unknown" % disk_template)
8648
8649   return req_size_dict[disk_template]
8650
8651
8652 def _ComputeDiskSize(disk_template, disks):
8653   """Compute disk size requirements in the volume group
8654
8655   """
8656   # Required free disk space as a function of disk and swap space
8657   req_size_dict = {
8658     constants.DT_DISKLESS: None,
8659     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8660     # 128 MB are added for drbd metadata for each disk
8661     constants.DT_DRBD8:
8662       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8663     constants.DT_FILE: None,
8664     constants.DT_SHARED_FILE: 0,
8665     constants.DT_BLOCK: 0,
8666   }
8667
8668   if disk_template not in req_size_dict:
8669     raise errors.ProgrammerError("Disk template '%s' size requirement"
8670                                  " is unknown" % disk_template)
8671
8672   return req_size_dict[disk_template]
8673
8674
8675 def _FilterVmNodes(lu, nodenames):
8676   """Filters out non-vm_capable nodes from a list.
8677
8678   @type lu: L{LogicalUnit}
8679   @param lu: the logical unit for which we check
8680   @type nodenames: list
8681   @param nodenames: the list of nodes on which we should check
8682   @rtype: list
8683   @return: the list of vm-capable nodes
8684
8685   """
8686   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8687   return [name for name in nodenames if name not in vm_nodes]
8688
8689
8690 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8691   """Hypervisor parameter validation.
8692
8693   This function abstract the hypervisor parameter validation to be
8694   used in both instance create and instance modify.
8695
8696   @type lu: L{LogicalUnit}
8697   @param lu: the logical unit for which we check
8698   @type nodenames: list
8699   @param nodenames: the list of nodes on which we should check
8700   @type hvname: string
8701   @param hvname: the name of the hypervisor we should use
8702   @type hvparams: dict
8703   @param hvparams: the parameters which we need to check
8704   @raise errors.OpPrereqError: if the parameters are not valid
8705
8706   """
8707   nodenames = _FilterVmNodes(lu, nodenames)
8708
8709   cluster = lu.cfg.GetClusterInfo()
8710   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8711
8712   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8713   for node in nodenames:
8714     info = hvinfo[node]
8715     if info.offline:
8716       continue
8717     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8718
8719
8720 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8721   """OS parameters validation.
8722
8723   @type lu: L{LogicalUnit}
8724   @param lu: the logical unit for which we check
8725   @type required: boolean
8726   @param required: whether the validation should fail if the OS is not
8727       found
8728   @type nodenames: list
8729   @param nodenames: the list of nodes on which we should check
8730   @type osname: string
8731   @param osname: the name of the hypervisor we should use
8732   @type osparams: dict
8733   @param osparams: the parameters which we need to check
8734   @raise errors.OpPrereqError: if the parameters are not valid
8735
8736   """
8737   nodenames = _FilterVmNodes(lu, nodenames)
8738   result = lu.rpc.call_os_validate(nodenames, required, osname,
8739                                    [constants.OS_VALIDATE_PARAMETERS],
8740                                    osparams)
8741   for node, nres in result.items():
8742     # we don't check for offline cases since this should be run only
8743     # against the master node and/or an instance's nodes
8744     nres.Raise("OS Parameters validation failed on node %s" % node)
8745     if not nres.payload:
8746       lu.LogInfo("OS %s not found on node %s, validation skipped",
8747                  osname, node)
8748
8749
8750 class LUInstanceCreate(LogicalUnit):
8751   """Create an instance.
8752
8753   """
8754   HPATH = "instance-add"
8755   HTYPE = constants.HTYPE_INSTANCE
8756   REQ_BGL = False
8757
8758   def CheckArguments(self):
8759     """Check arguments.
8760
8761     """
8762     # do not require name_check to ease forward/backward compatibility
8763     # for tools
8764     if self.op.no_install and self.op.start:
8765       self.LogInfo("No-installation mode selected, disabling startup")
8766       self.op.start = False
8767     # validate/normalize the instance name
8768     self.op.instance_name = \
8769       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8770
8771     if self.op.ip_check and not self.op.name_check:
8772       # TODO: make the ip check more flexible and not depend on the name check
8773       raise errors.OpPrereqError("Cannot do IP address check without a name"
8774                                  " check", errors.ECODE_INVAL)
8775
8776     # check nics' parameter names
8777     for nic in self.op.nics:
8778       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8779
8780     # check disks. parameter names and consistent adopt/no-adopt strategy
8781     has_adopt = has_no_adopt = False
8782     for disk in self.op.disks:
8783       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8784       if constants.IDISK_ADOPT in disk:
8785         has_adopt = True
8786       else:
8787         has_no_adopt = True
8788     if has_adopt and has_no_adopt:
8789       raise errors.OpPrereqError("Either all disks are adopted or none is",
8790                                  errors.ECODE_INVAL)
8791     if has_adopt:
8792       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8793         raise errors.OpPrereqError("Disk adoption is not supported for the"
8794                                    " '%s' disk template" %
8795                                    self.op.disk_template,
8796                                    errors.ECODE_INVAL)
8797       if self.op.iallocator is not None:
8798         raise errors.OpPrereqError("Disk adoption not allowed with an"
8799                                    " iallocator script", errors.ECODE_INVAL)
8800       if self.op.mode == constants.INSTANCE_IMPORT:
8801         raise errors.OpPrereqError("Disk adoption not allowed for"
8802                                    " instance import", errors.ECODE_INVAL)
8803     else:
8804       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8805         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8806                                    " but no 'adopt' parameter given" %
8807                                    self.op.disk_template,
8808                                    errors.ECODE_INVAL)
8809
8810     self.adopt_disks = has_adopt
8811
8812     # instance name verification
8813     if self.op.name_check:
8814       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8815       self.op.instance_name = self.hostname1.name
8816       # used in CheckPrereq for ip ping check
8817       self.check_ip = self.hostname1.ip
8818     else:
8819       self.check_ip = None
8820
8821     # file storage checks
8822     if (self.op.file_driver and
8823         not self.op.file_driver in constants.FILE_DRIVER):
8824       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8825                                  self.op.file_driver, errors.ECODE_INVAL)
8826
8827     if self.op.disk_template == constants.DT_FILE:
8828       opcodes.RequireFileStorage()
8829     elif self.op.disk_template == constants.DT_SHARED_FILE:
8830       opcodes.RequireSharedFileStorage()
8831
8832     ### Node/iallocator related checks
8833     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8834
8835     if self.op.pnode is not None:
8836       if self.op.disk_template in constants.DTS_INT_MIRROR:
8837         if self.op.snode is None:
8838           raise errors.OpPrereqError("The networked disk templates need"
8839                                      " a mirror node", errors.ECODE_INVAL)
8840       elif self.op.snode:
8841         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8842                         " template")
8843         self.op.snode = None
8844
8845     self._cds = _GetClusterDomainSecret()
8846
8847     if self.op.mode == constants.INSTANCE_IMPORT:
8848       # On import force_variant must be True, because if we forced it at
8849       # initial install, our only chance when importing it back is that it
8850       # works again!
8851       self.op.force_variant = True
8852
8853       if self.op.no_install:
8854         self.LogInfo("No-installation mode has no effect during import")
8855
8856     elif self.op.mode == constants.INSTANCE_CREATE:
8857       if self.op.os_type is None:
8858         raise errors.OpPrereqError("No guest OS specified",
8859                                    errors.ECODE_INVAL)
8860       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8861         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8862                                    " installation" % self.op.os_type,
8863                                    errors.ECODE_STATE)
8864       if self.op.disk_template is None:
8865         raise errors.OpPrereqError("No disk template specified",
8866                                    errors.ECODE_INVAL)
8867
8868     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8869       # Check handshake to ensure both clusters have the same domain secret
8870       src_handshake = self.op.source_handshake
8871       if not src_handshake:
8872         raise errors.OpPrereqError("Missing source handshake",
8873                                    errors.ECODE_INVAL)
8874
8875       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8876                                                            src_handshake)
8877       if errmsg:
8878         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8879                                    errors.ECODE_INVAL)
8880
8881       # Load and check source CA
8882       self.source_x509_ca_pem = self.op.source_x509_ca
8883       if not self.source_x509_ca_pem:
8884         raise errors.OpPrereqError("Missing source X509 CA",
8885                                    errors.ECODE_INVAL)
8886
8887       try:
8888         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8889                                                     self._cds)
8890       except OpenSSL.crypto.Error, err:
8891         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8892                                    (err, ), errors.ECODE_INVAL)
8893
8894       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8895       if errcode is not None:
8896         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8897                                    errors.ECODE_INVAL)
8898
8899       self.source_x509_ca = cert
8900
8901       src_instance_name = self.op.source_instance_name
8902       if not src_instance_name:
8903         raise errors.OpPrereqError("Missing source instance name",
8904                                    errors.ECODE_INVAL)
8905
8906       self.source_instance_name = \
8907           netutils.GetHostname(name=src_instance_name).name
8908
8909     else:
8910       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8911                                  self.op.mode, errors.ECODE_INVAL)
8912
8913   def ExpandNames(self):
8914     """ExpandNames for CreateInstance.
8915
8916     Figure out the right locks for instance creation.
8917
8918     """
8919     self.needed_locks = {}
8920
8921     instance_name = self.op.instance_name
8922     # this is just a preventive check, but someone might still add this
8923     # instance in the meantime, and creation will fail at lock-add time
8924     if instance_name in self.cfg.GetInstanceList():
8925       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8926                                  instance_name, errors.ECODE_EXISTS)
8927
8928     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8929
8930     if self.op.iallocator:
8931       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8932       # specifying a group on instance creation and then selecting nodes from
8933       # that group
8934       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8935       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8936     else:
8937       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8938       nodelist = [self.op.pnode]
8939       if self.op.snode is not None:
8940         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8941         nodelist.append(self.op.snode)
8942       self.needed_locks[locking.LEVEL_NODE] = nodelist
8943       # Lock resources of instance's primary and secondary nodes (copy to
8944       # prevent accidential modification)
8945       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8946
8947     # in case of import lock the source node too
8948     if self.op.mode == constants.INSTANCE_IMPORT:
8949       src_node = self.op.src_node
8950       src_path = self.op.src_path
8951
8952       if src_path is None:
8953         self.op.src_path = src_path = self.op.instance_name
8954
8955       if src_node is None:
8956         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8957         self.op.src_node = None
8958         if os.path.isabs(src_path):
8959           raise errors.OpPrereqError("Importing an instance from a path"
8960                                      " requires a source node option",
8961                                      errors.ECODE_INVAL)
8962       else:
8963         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8964         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8965           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8966         if not os.path.isabs(src_path):
8967           self.op.src_path = src_path = \
8968             utils.PathJoin(constants.EXPORT_DIR, src_path)
8969
8970   def _RunAllocator(self):
8971     """Run the allocator based on input opcode.
8972
8973     """
8974     nics = [n.ToDict() for n in self.nics]
8975     ial = IAllocator(self.cfg, self.rpc,
8976                      mode=constants.IALLOCATOR_MODE_ALLOC,
8977                      name=self.op.instance_name,
8978                      disk_template=self.op.disk_template,
8979                      tags=self.op.tags,
8980                      os=self.op.os_type,
8981                      vcpus=self.be_full[constants.BE_VCPUS],
8982                      memory=self.be_full[constants.BE_MAXMEM],
8983                      disks=self.disks,
8984                      nics=nics,
8985                      hypervisor=self.op.hypervisor,
8986                      )
8987
8988     ial.Run(self.op.iallocator)
8989
8990     if not ial.success:
8991       raise errors.OpPrereqError("Can't compute nodes using"
8992                                  " iallocator '%s': %s" %
8993                                  (self.op.iallocator, ial.info),
8994                                  errors.ECODE_NORES)
8995     if len(ial.result) != ial.required_nodes:
8996       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8997                                  " of nodes (%s), required %s" %
8998                                  (self.op.iallocator, len(ial.result),
8999                                   ial.required_nodes), errors.ECODE_FAULT)
9000     self.op.pnode = ial.result[0]
9001     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9002                  self.op.instance_name, self.op.iallocator,
9003                  utils.CommaJoin(ial.result))
9004     if ial.required_nodes == 2:
9005       self.op.snode = ial.result[1]
9006
9007   def BuildHooksEnv(self):
9008     """Build hooks env.
9009
9010     This runs on master, primary and secondary nodes of the instance.
9011
9012     """
9013     env = {
9014       "ADD_MODE": self.op.mode,
9015       }
9016     if self.op.mode == constants.INSTANCE_IMPORT:
9017       env["SRC_NODE"] = self.op.src_node
9018       env["SRC_PATH"] = self.op.src_path
9019       env["SRC_IMAGES"] = self.src_images
9020
9021     env.update(_BuildInstanceHookEnv(
9022       name=self.op.instance_name,
9023       primary_node=self.op.pnode,
9024       secondary_nodes=self.secondaries,
9025       status=self.op.start,
9026       os_type=self.op.os_type,
9027       minmem=self.be_full[constants.BE_MINMEM],
9028       maxmem=self.be_full[constants.BE_MAXMEM],
9029       vcpus=self.be_full[constants.BE_VCPUS],
9030       nics=_NICListToTuple(self, self.nics),
9031       disk_template=self.op.disk_template,
9032       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9033              for d in self.disks],
9034       bep=self.be_full,
9035       hvp=self.hv_full,
9036       hypervisor_name=self.op.hypervisor,
9037       tags=self.op.tags,
9038     ))
9039
9040     return env
9041
9042   def BuildHooksNodes(self):
9043     """Build hooks nodes.
9044
9045     """
9046     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9047     return nl, nl
9048
9049   def _ReadExportInfo(self):
9050     """Reads the export information from disk.
9051
9052     It will override the opcode source node and path with the actual
9053     information, if these two were not specified before.
9054
9055     @return: the export information
9056
9057     """
9058     assert self.op.mode == constants.INSTANCE_IMPORT
9059
9060     src_node = self.op.src_node
9061     src_path = self.op.src_path
9062
9063     if src_node is None:
9064       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9065       exp_list = self.rpc.call_export_list(locked_nodes)
9066       found = False
9067       for node in exp_list:
9068         if exp_list[node].fail_msg:
9069           continue
9070         if src_path in exp_list[node].payload:
9071           found = True
9072           self.op.src_node = src_node = node
9073           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9074                                                        src_path)
9075           break
9076       if not found:
9077         raise errors.OpPrereqError("No export found for relative path %s" %
9078                                     src_path, errors.ECODE_INVAL)
9079
9080     _CheckNodeOnline(self, src_node)
9081     result = self.rpc.call_export_info(src_node, src_path)
9082     result.Raise("No export or invalid export found in dir %s" % src_path)
9083
9084     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9085     if not export_info.has_section(constants.INISECT_EXP):
9086       raise errors.ProgrammerError("Corrupted export config",
9087                                    errors.ECODE_ENVIRON)
9088
9089     ei_version = export_info.get(constants.INISECT_EXP, "version")
9090     if (int(ei_version) != constants.EXPORT_VERSION):
9091       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9092                                  (ei_version, constants.EXPORT_VERSION),
9093                                  errors.ECODE_ENVIRON)
9094     return export_info
9095
9096   def _ReadExportParams(self, einfo):
9097     """Use export parameters as defaults.
9098
9099     In case the opcode doesn't specify (as in override) some instance
9100     parameters, then try to use them from the export information, if
9101     that declares them.
9102
9103     """
9104     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9105
9106     if self.op.disk_template is None:
9107       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9108         self.op.disk_template = einfo.get(constants.INISECT_INS,
9109                                           "disk_template")
9110         if self.op.disk_template not in constants.DISK_TEMPLATES:
9111           raise errors.OpPrereqError("Disk template specified in configuration"
9112                                      " file is not one of the allowed values:"
9113                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9114       else:
9115         raise errors.OpPrereqError("No disk template specified and the export"
9116                                    " is missing the disk_template information",
9117                                    errors.ECODE_INVAL)
9118
9119     if not self.op.disks:
9120       disks = []
9121       # TODO: import the disk iv_name too
9122       for idx in range(constants.MAX_DISKS):
9123         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9124           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9125           disks.append({constants.IDISK_SIZE: disk_sz})
9126       self.op.disks = disks
9127       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9128         raise errors.OpPrereqError("No disk info specified and the export"
9129                                    " is missing the disk information",
9130                                    errors.ECODE_INVAL)
9131
9132     if not self.op.nics:
9133       nics = []
9134       for idx in range(constants.MAX_NICS):
9135         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9136           ndict = {}
9137           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9138             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9139             ndict[name] = v
9140           nics.append(ndict)
9141         else:
9142           break
9143       self.op.nics = nics
9144
9145     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9146       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9147
9148     if (self.op.hypervisor is None and
9149         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9150       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9151
9152     if einfo.has_section(constants.INISECT_HYP):
9153       # use the export parameters but do not override the ones
9154       # specified by the user
9155       for name, value in einfo.items(constants.INISECT_HYP):
9156         if name not in self.op.hvparams:
9157           self.op.hvparams[name] = value
9158
9159     if einfo.has_section(constants.INISECT_BEP):
9160       # use the parameters, without overriding
9161       for name, value in einfo.items(constants.INISECT_BEP):
9162         if name not in self.op.beparams:
9163           self.op.beparams[name] = value
9164         # Compatibility for the old "memory" be param
9165         if name == constants.BE_MEMORY:
9166           if constants.BE_MAXMEM not in self.op.beparams:
9167             self.op.beparams[constants.BE_MAXMEM] = value
9168           if constants.BE_MINMEM not in self.op.beparams:
9169             self.op.beparams[constants.BE_MINMEM] = value
9170     else:
9171       # try to read the parameters old style, from the main section
9172       for name in constants.BES_PARAMETERS:
9173         if (name not in self.op.beparams and
9174             einfo.has_option(constants.INISECT_INS, name)):
9175           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9176
9177     if einfo.has_section(constants.INISECT_OSP):
9178       # use the parameters, without overriding
9179       for name, value in einfo.items(constants.INISECT_OSP):
9180         if name not in self.op.osparams:
9181           self.op.osparams[name] = value
9182
9183   def _RevertToDefaults(self, cluster):
9184     """Revert the instance parameters to the default values.
9185
9186     """
9187     # hvparams
9188     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9189     for name in self.op.hvparams.keys():
9190       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9191         del self.op.hvparams[name]
9192     # beparams
9193     be_defs = cluster.SimpleFillBE({})
9194     for name in self.op.beparams.keys():
9195       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9196         del self.op.beparams[name]
9197     # nic params
9198     nic_defs = cluster.SimpleFillNIC({})
9199     for nic in self.op.nics:
9200       for name in constants.NICS_PARAMETERS:
9201         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9202           del nic[name]
9203     # osparams
9204     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9205     for name in self.op.osparams.keys():
9206       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9207         del self.op.osparams[name]
9208
9209   def _CalculateFileStorageDir(self):
9210     """Calculate final instance file storage dir.
9211
9212     """
9213     # file storage dir calculation/check
9214     self.instance_file_storage_dir = None
9215     if self.op.disk_template in constants.DTS_FILEBASED:
9216       # build the full file storage dir path
9217       joinargs = []
9218
9219       if self.op.disk_template == constants.DT_SHARED_FILE:
9220         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9221       else:
9222         get_fsd_fn = self.cfg.GetFileStorageDir
9223
9224       cfg_storagedir = get_fsd_fn()
9225       if not cfg_storagedir:
9226         raise errors.OpPrereqError("Cluster file storage dir not defined")
9227       joinargs.append(cfg_storagedir)
9228
9229       if self.op.file_storage_dir is not None:
9230         joinargs.append(self.op.file_storage_dir)
9231
9232       joinargs.append(self.op.instance_name)
9233
9234       # pylint: disable=W0142
9235       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9236
9237   def CheckPrereq(self):
9238     """Check prerequisites.
9239
9240     """
9241     self._CalculateFileStorageDir()
9242
9243     if self.op.mode == constants.INSTANCE_IMPORT:
9244       export_info = self._ReadExportInfo()
9245       self._ReadExportParams(export_info)
9246
9247     if (not self.cfg.GetVGName() and
9248         self.op.disk_template not in constants.DTS_NOT_LVM):
9249       raise errors.OpPrereqError("Cluster does not support lvm-based"
9250                                  " instances", errors.ECODE_STATE)
9251
9252     if (self.op.hypervisor is None or
9253         self.op.hypervisor == constants.VALUE_AUTO):
9254       self.op.hypervisor = self.cfg.GetHypervisorType()
9255
9256     cluster = self.cfg.GetClusterInfo()
9257     enabled_hvs = cluster.enabled_hypervisors
9258     if self.op.hypervisor not in enabled_hvs:
9259       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9260                                  " cluster (%s)" % (self.op.hypervisor,
9261                                   ",".join(enabled_hvs)),
9262                                  errors.ECODE_STATE)
9263
9264     # Check tag validity
9265     for tag in self.op.tags:
9266       objects.TaggableObject.ValidateTag(tag)
9267
9268     # check hypervisor parameter syntax (locally)
9269     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9270     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9271                                       self.op.hvparams)
9272     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9273     hv_type.CheckParameterSyntax(filled_hvp)
9274     self.hv_full = filled_hvp
9275     # check that we don't specify global parameters on an instance
9276     _CheckGlobalHvParams(self.op.hvparams)
9277
9278     # fill and remember the beparams dict
9279     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9280     for param, value in self.op.beparams.iteritems():
9281       if value == constants.VALUE_AUTO:
9282         self.op.beparams[param] = default_beparams[param]
9283     objects.UpgradeBeParams(self.op.beparams)
9284     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9285     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9286
9287     # build os parameters
9288     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9289
9290     # now that hvp/bep are in final format, let's reset to defaults,
9291     # if told to do so
9292     if self.op.identify_defaults:
9293       self._RevertToDefaults(cluster)
9294
9295     # NIC buildup
9296     self.nics = []
9297     for idx, nic in enumerate(self.op.nics):
9298       nic_mode_req = nic.get(constants.INIC_MODE, None)
9299       nic_mode = nic_mode_req
9300       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9301         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9302
9303       # in routed mode, for the first nic, the default ip is 'auto'
9304       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9305         default_ip_mode = constants.VALUE_AUTO
9306       else:
9307         default_ip_mode = constants.VALUE_NONE
9308
9309       # ip validity checks
9310       ip = nic.get(constants.INIC_IP, default_ip_mode)
9311       if ip is None or ip.lower() == constants.VALUE_NONE:
9312         nic_ip = None
9313       elif ip.lower() == constants.VALUE_AUTO:
9314         if not self.op.name_check:
9315           raise errors.OpPrereqError("IP address set to auto but name checks"
9316                                      " have been skipped",
9317                                      errors.ECODE_INVAL)
9318         nic_ip = self.hostname1.ip
9319       else:
9320         if not netutils.IPAddress.IsValid(ip):
9321           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9322                                      errors.ECODE_INVAL)
9323         nic_ip = ip
9324
9325       # TODO: check the ip address for uniqueness
9326       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9327         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9328                                    errors.ECODE_INVAL)
9329
9330       # MAC address verification
9331       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9332       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9333         mac = utils.NormalizeAndValidateMac(mac)
9334
9335         try:
9336           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9337         except errors.ReservationError:
9338           raise errors.OpPrereqError("MAC address %s already in use"
9339                                      " in cluster" % mac,
9340                                      errors.ECODE_NOTUNIQUE)
9341
9342       #  Build nic parameters
9343       link = nic.get(constants.INIC_LINK, None)
9344       if link == constants.VALUE_AUTO:
9345         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9346       nicparams = {}
9347       if nic_mode_req:
9348         nicparams[constants.NIC_MODE] = nic_mode
9349       if link:
9350         nicparams[constants.NIC_LINK] = link
9351
9352       check_params = cluster.SimpleFillNIC(nicparams)
9353       objects.NIC.CheckParameterSyntax(check_params)
9354       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9355
9356     # disk checks/pre-build
9357     default_vg = self.cfg.GetVGName()
9358     self.disks = []
9359     for disk in self.op.disks:
9360       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9361       if mode not in constants.DISK_ACCESS_SET:
9362         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9363                                    mode, errors.ECODE_INVAL)
9364       size = disk.get(constants.IDISK_SIZE, None)
9365       if size is None:
9366         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9367       try:
9368         size = int(size)
9369       except (TypeError, ValueError):
9370         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9371                                    errors.ECODE_INVAL)
9372
9373       data_vg = disk.get(constants.IDISK_VG, default_vg)
9374       new_disk = {
9375         constants.IDISK_SIZE: size,
9376         constants.IDISK_MODE: mode,
9377         constants.IDISK_VG: data_vg,
9378         }
9379       if constants.IDISK_METAVG in disk:
9380         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9381       if constants.IDISK_ADOPT in disk:
9382         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9383       self.disks.append(new_disk)
9384
9385     if self.op.mode == constants.INSTANCE_IMPORT:
9386       disk_images = []
9387       for idx in range(len(self.disks)):
9388         option = "disk%d_dump" % idx
9389         if export_info.has_option(constants.INISECT_INS, option):
9390           # FIXME: are the old os-es, disk sizes, etc. useful?
9391           export_name = export_info.get(constants.INISECT_INS, option)
9392           image = utils.PathJoin(self.op.src_path, export_name)
9393           disk_images.append(image)
9394         else:
9395           disk_images.append(False)
9396
9397       self.src_images = disk_images
9398
9399       old_name = export_info.get(constants.INISECT_INS, "name")
9400       if self.op.instance_name == old_name:
9401         for idx, nic in enumerate(self.nics):
9402           if nic.mac == constants.VALUE_AUTO:
9403             nic_mac_ini = "nic%d_mac" % idx
9404             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9405
9406     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9407
9408     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9409     if self.op.ip_check:
9410       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9411         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9412                                    (self.check_ip, self.op.instance_name),
9413                                    errors.ECODE_NOTUNIQUE)
9414
9415     #### mac address generation
9416     # By generating here the mac address both the allocator and the hooks get
9417     # the real final mac address rather than the 'auto' or 'generate' value.
9418     # There is a race condition between the generation and the instance object
9419     # creation, which means that we know the mac is valid now, but we're not
9420     # sure it will be when we actually add the instance. If things go bad
9421     # adding the instance will abort because of a duplicate mac, and the
9422     # creation job will fail.
9423     for nic in self.nics:
9424       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9425         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9426
9427     #### allocator run
9428
9429     if self.op.iallocator is not None:
9430       self._RunAllocator()
9431
9432     # Release all unneeded node locks
9433     _ReleaseLocks(self, locking.LEVEL_NODE,
9434                   keep=filter(None, [self.op.pnode, self.op.snode,
9435                                      self.op.src_node]))
9436
9437     #### node related checks
9438
9439     # check primary node
9440     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9441     assert self.pnode is not None, \
9442       "Cannot retrieve locked node %s" % self.op.pnode
9443     if pnode.offline:
9444       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9445                                  pnode.name, errors.ECODE_STATE)
9446     if pnode.drained:
9447       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9448                                  pnode.name, errors.ECODE_STATE)
9449     if not pnode.vm_capable:
9450       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9451                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9452
9453     self.secondaries = []
9454
9455     # mirror node verification
9456     if self.op.disk_template in constants.DTS_INT_MIRROR:
9457       if self.op.snode == pnode.name:
9458         raise errors.OpPrereqError("The secondary node cannot be the"
9459                                    " primary node", errors.ECODE_INVAL)
9460       _CheckNodeOnline(self, self.op.snode)
9461       _CheckNodeNotDrained(self, self.op.snode)
9462       _CheckNodeVmCapable(self, self.op.snode)
9463       self.secondaries.append(self.op.snode)
9464
9465       snode = self.cfg.GetNodeInfo(self.op.snode)
9466       if pnode.group != snode.group:
9467         self.LogWarning("The primary and secondary nodes are in two"
9468                         " different node groups; the disk parameters"
9469                         " from the first disk's node group will be"
9470                         " used")
9471
9472     nodenames = [pnode.name] + self.secondaries
9473
9474     # disk parameters (not customizable at instance or node level)
9475     # just use the primary node parameters, ignoring the secondary.
9476     self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9477
9478     if not self.adopt_disks:
9479       # Check lv size requirements, if not adopting
9480       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9481       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9482
9483     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9484       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9485                                 disk[constants.IDISK_ADOPT])
9486                      for disk in self.disks])
9487       if len(all_lvs) != len(self.disks):
9488         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9489                                    errors.ECODE_INVAL)
9490       for lv_name in all_lvs:
9491         try:
9492           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9493           # to ReserveLV uses the same syntax
9494           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9495         except errors.ReservationError:
9496           raise errors.OpPrereqError("LV named %s used by another instance" %
9497                                      lv_name, errors.ECODE_NOTUNIQUE)
9498
9499       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9500       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9501
9502       node_lvs = self.rpc.call_lv_list([pnode.name],
9503                                        vg_names.payload.keys())[pnode.name]
9504       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9505       node_lvs = node_lvs.payload
9506
9507       delta = all_lvs.difference(node_lvs.keys())
9508       if delta:
9509         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9510                                    utils.CommaJoin(delta),
9511                                    errors.ECODE_INVAL)
9512       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9513       if online_lvs:
9514         raise errors.OpPrereqError("Online logical volumes found, cannot"
9515                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9516                                    errors.ECODE_STATE)
9517       # update the size of disk based on what is found
9518       for dsk in self.disks:
9519         dsk[constants.IDISK_SIZE] = \
9520           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9521                                         dsk[constants.IDISK_ADOPT])][0]))
9522
9523     elif self.op.disk_template == constants.DT_BLOCK:
9524       # Normalize and de-duplicate device paths
9525       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9526                        for disk in self.disks])
9527       if len(all_disks) != len(self.disks):
9528         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9529                                    errors.ECODE_INVAL)
9530       baddisks = [d for d in all_disks
9531                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9532       if baddisks:
9533         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9534                                    " cannot be adopted" %
9535                                    (", ".join(baddisks),
9536                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9537                                    errors.ECODE_INVAL)
9538
9539       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9540                                             list(all_disks))[pnode.name]
9541       node_disks.Raise("Cannot get block device information from node %s" %
9542                        pnode.name)
9543       node_disks = node_disks.payload
9544       delta = all_disks.difference(node_disks.keys())
9545       if delta:
9546         raise errors.OpPrereqError("Missing block device(s): %s" %
9547                                    utils.CommaJoin(delta),
9548                                    errors.ECODE_INVAL)
9549       for dsk in self.disks:
9550         dsk[constants.IDISK_SIZE] = \
9551           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9552
9553     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9554
9555     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9556     # check OS parameters (remotely)
9557     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9558
9559     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9560
9561     # memory check on primary node
9562     #TODO(dynmem): use MINMEM for checking
9563     if self.op.start:
9564       _CheckNodeFreeMemory(self, self.pnode.name,
9565                            "creating instance %s" % self.op.instance_name,
9566                            self.be_full[constants.BE_MAXMEM],
9567                            self.op.hypervisor)
9568
9569     self.dry_run_result = list(nodenames)
9570
9571   def Exec(self, feedback_fn):
9572     """Create and add the instance to the cluster.
9573
9574     """
9575     instance = self.op.instance_name
9576     pnode_name = self.pnode.name
9577
9578     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9579                 self.owned_locks(locking.LEVEL_NODE)), \
9580       "Node locks differ from node resource locks"
9581
9582     ht_kind = self.op.hypervisor
9583     if ht_kind in constants.HTS_REQ_PORT:
9584       network_port = self.cfg.AllocatePort()
9585     else:
9586       network_port = None
9587
9588     disks = _GenerateDiskTemplate(self,
9589                                   self.op.disk_template,
9590                                   instance, pnode_name,
9591                                   self.secondaries,
9592                                   self.disks,
9593                                   self.instance_file_storage_dir,
9594                                   self.op.file_driver,
9595                                   0,
9596                                   feedback_fn,
9597                                   self.diskparams)
9598
9599     iobj = objects.Instance(name=instance, os=self.op.os_type,
9600                             primary_node=pnode_name,
9601                             nics=self.nics, disks=disks,
9602                             disk_template=self.op.disk_template,
9603                             admin_state=constants.ADMINST_DOWN,
9604                             network_port=network_port,
9605                             beparams=self.op.beparams,
9606                             hvparams=self.op.hvparams,
9607                             hypervisor=self.op.hypervisor,
9608                             osparams=self.op.osparams,
9609                             )
9610
9611     if self.op.tags:
9612       for tag in self.op.tags:
9613         iobj.AddTag(tag)
9614
9615     if self.adopt_disks:
9616       if self.op.disk_template == constants.DT_PLAIN:
9617         # rename LVs to the newly-generated names; we need to construct
9618         # 'fake' LV disks with the old data, plus the new unique_id
9619         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9620         rename_to = []
9621         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9622           rename_to.append(t_dsk.logical_id)
9623           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9624           self.cfg.SetDiskID(t_dsk, pnode_name)
9625         result = self.rpc.call_blockdev_rename(pnode_name,
9626                                                zip(tmp_disks, rename_to))
9627         result.Raise("Failed to rename adoped LVs")
9628     else:
9629       feedback_fn("* creating instance disks...")
9630       try:
9631         _CreateDisks(self, iobj)
9632       except errors.OpExecError:
9633         self.LogWarning("Device creation failed, reverting...")
9634         try:
9635           _RemoveDisks(self, iobj)
9636         finally:
9637           self.cfg.ReleaseDRBDMinors(instance)
9638           raise
9639
9640     feedback_fn("adding instance %s to cluster config" % instance)
9641
9642     self.cfg.AddInstance(iobj, self.proc.GetECId())
9643
9644     # Declare that we don't want to remove the instance lock anymore, as we've
9645     # added the instance to the config
9646     del self.remove_locks[locking.LEVEL_INSTANCE]
9647
9648     if self.op.mode == constants.INSTANCE_IMPORT:
9649       # Release unused nodes
9650       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9651     else:
9652       # Release all nodes
9653       _ReleaseLocks(self, locking.LEVEL_NODE)
9654
9655     disk_abort = False
9656     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9657       feedback_fn("* wiping instance disks...")
9658       try:
9659         _WipeDisks(self, iobj)
9660       except errors.OpExecError, err:
9661         logging.exception("Wiping disks failed")
9662         self.LogWarning("Wiping instance disks failed (%s)", err)
9663         disk_abort = True
9664
9665     if disk_abort:
9666       # Something is already wrong with the disks, don't do anything else
9667       pass
9668     elif self.op.wait_for_sync:
9669       disk_abort = not _WaitForSync(self, iobj)
9670     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9671       # make sure the disks are not degraded (still sync-ing is ok)
9672       feedback_fn("* checking mirrors status")
9673       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9674     else:
9675       disk_abort = False
9676
9677     if disk_abort:
9678       _RemoveDisks(self, iobj)
9679       self.cfg.RemoveInstance(iobj.name)
9680       # Make sure the instance lock gets removed
9681       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9682       raise errors.OpExecError("There are some degraded disks for"
9683                                " this instance")
9684
9685     # Release all node resource locks
9686     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9687
9688     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9689       if self.op.mode == constants.INSTANCE_CREATE:
9690         if not self.op.no_install:
9691           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9692                         not self.op.wait_for_sync)
9693           if pause_sync:
9694             feedback_fn("* pausing disk sync to install instance OS")
9695             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9696                                                               iobj.disks, True)
9697             for idx, success in enumerate(result.payload):
9698               if not success:
9699                 logging.warn("pause-sync of instance %s for disk %d failed",
9700                              instance, idx)
9701
9702           feedback_fn("* running the instance OS create scripts...")
9703           # FIXME: pass debug option from opcode to backend
9704           os_add_result = \
9705             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9706                                           self.op.debug_level)
9707           if pause_sync:
9708             feedback_fn("* resuming disk sync")
9709             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9710                                                               iobj.disks, False)
9711             for idx, success in enumerate(result.payload):
9712               if not success:
9713                 logging.warn("resume-sync of instance %s for disk %d failed",
9714                              instance, idx)
9715
9716           os_add_result.Raise("Could not add os for instance %s"
9717                               " on node %s" % (instance, pnode_name))
9718
9719       elif self.op.mode == constants.INSTANCE_IMPORT:
9720         feedback_fn("* running the instance OS import scripts...")
9721
9722         transfers = []
9723
9724         for idx, image in enumerate(self.src_images):
9725           if not image:
9726             continue
9727
9728           # FIXME: pass debug option from opcode to backend
9729           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9730                                              constants.IEIO_FILE, (image, ),
9731                                              constants.IEIO_SCRIPT,
9732                                              (iobj.disks[idx], idx),
9733                                              None)
9734           transfers.append(dt)
9735
9736         import_result = \
9737           masterd.instance.TransferInstanceData(self, feedback_fn,
9738                                                 self.op.src_node, pnode_name,
9739                                                 self.pnode.secondary_ip,
9740                                                 iobj, transfers)
9741         if not compat.all(import_result):
9742           self.LogWarning("Some disks for instance %s on node %s were not"
9743                           " imported successfully" % (instance, pnode_name))
9744
9745       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9746         feedback_fn("* preparing remote import...")
9747         # The source cluster will stop the instance before attempting to make a
9748         # connection. In some cases stopping an instance can take a long time,
9749         # hence the shutdown timeout is added to the connection timeout.
9750         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9751                            self.op.source_shutdown_timeout)
9752         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9753
9754         assert iobj.primary_node == self.pnode.name
9755         disk_results = \
9756           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9757                                         self.source_x509_ca,
9758                                         self._cds, timeouts)
9759         if not compat.all(disk_results):
9760           # TODO: Should the instance still be started, even if some disks
9761           # failed to import (valid for local imports, too)?
9762           self.LogWarning("Some disks for instance %s on node %s were not"
9763                           " imported successfully" % (instance, pnode_name))
9764
9765         # Run rename script on newly imported instance
9766         assert iobj.name == instance
9767         feedback_fn("Running rename script for %s" % instance)
9768         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9769                                                    self.source_instance_name,
9770                                                    self.op.debug_level)
9771         if result.fail_msg:
9772           self.LogWarning("Failed to run rename script for %s on node"
9773                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9774
9775       else:
9776         # also checked in the prereq part
9777         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9778                                      % self.op.mode)
9779
9780     assert not self.owned_locks(locking.LEVEL_NODE_RES)
9781
9782     if self.op.start:
9783       iobj.admin_state = constants.ADMINST_UP
9784       self.cfg.Update(iobj, feedback_fn)
9785       logging.info("Starting instance %s on node %s", instance, pnode_name)
9786       feedback_fn("* starting instance...")
9787       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9788                                             False)
9789       result.Raise("Could not start instance")
9790
9791     return list(iobj.all_nodes)
9792
9793
9794 class LUInstanceConsole(NoHooksLU):
9795   """Connect to an instance's console.
9796
9797   This is somewhat special in that it returns the command line that
9798   you need to run on the master node in order to connect to the
9799   console.
9800
9801   """
9802   REQ_BGL = False
9803
9804   def ExpandNames(self):
9805     self.share_locks = _ShareAll()
9806     self._ExpandAndLockInstance()
9807
9808   def CheckPrereq(self):
9809     """Check prerequisites.
9810
9811     This checks that the instance is in the cluster.
9812
9813     """
9814     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9815     assert self.instance is not None, \
9816       "Cannot retrieve locked instance %s" % self.op.instance_name
9817     _CheckNodeOnline(self, self.instance.primary_node)
9818
9819   def Exec(self, feedback_fn):
9820     """Connect to the console of an instance
9821
9822     """
9823     instance = self.instance
9824     node = instance.primary_node
9825
9826     node_insts = self.rpc.call_instance_list([node],
9827                                              [instance.hypervisor])[node]
9828     node_insts.Raise("Can't get node information from %s" % node)
9829
9830     if instance.name not in node_insts.payload:
9831       if instance.admin_state == constants.ADMINST_UP:
9832         state = constants.INSTST_ERRORDOWN
9833       elif instance.admin_state == constants.ADMINST_DOWN:
9834         state = constants.INSTST_ADMINDOWN
9835       else:
9836         state = constants.INSTST_ADMINOFFLINE
9837       raise errors.OpExecError("Instance %s is not running (state %s)" %
9838                                (instance.name, state))
9839
9840     logging.debug("Connecting to console of %s on %s", instance.name, node)
9841
9842     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9843
9844
9845 def _GetInstanceConsole(cluster, instance):
9846   """Returns console information for an instance.
9847
9848   @type cluster: L{objects.Cluster}
9849   @type instance: L{objects.Instance}
9850   @rtype: dict
9851
9852   """
9853   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9854   # beparams and hvparams are passed separately, to avoid editing the
9855   # instance and then saving the defaults in the instance itself.
9856   hvparams = cluster.FillHV(instance)
9857   beparams = cluster.FillBE(instance)
9858   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9859
9860   assert console.instance == instance.name
9861   assert console.Validate()
9862
9863   return console.ToDict()
9864
9865
9866 class LUInstanceReplaceDisks(LogicalUnit):
9867   """Replace the disks of an instance.
9868
9869   """
9870   HPATH = "mirrors-replace"
9871   HTYPE = constants.HTYPE_INSTANCE
9872   REQ_BGL = False
9873
9874   def CheckArguments(self):
9875     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9876                                   self.op.iallocator)
9877
9878   def ExpandNames(self):
9879     self._ExpandAndLockInstance()
9880
9881     assert locking.LEVEL_NODE not in self.needed_locks
9882     assert locking.LEVEL_NODE_RES not in self.needed_locks
9883     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9884
9885     assert self.op.iallocator is None or self.op.remote_node is None, \
9886       "Conflicting options"
9887
9888     if self.op.remote_node is not None:
9889       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9890
9891       # Warning: do not remove the locking of the new secondary here
9892       # unless DRBD8.AddChildren is changed to work in parallel;
9893       # currently it doesn't since parallel invocations of
9894       # FindUnusedMinor will conflict
9895       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9896       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9897     else:
9898       self.needed_locks[locking.LEVEL_NODE] = []
9899       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9900
9901       if self.op.iallocator is not None:
9902         # iallocator will select a new node in the same group
9903         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9904
9905     self.needed_locks[locking.LEVEL_NODE_RES] = []
9906
9907     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9908                                    self.op.iallocator, self.op.remote_node,
9909                                    self.op.disks, False, self.op.early_release)
9910
9911     self.tasklets = [self.replacer]
9912
9913   def DeclareLocks(self, level):
9914     if level == locking.LEVEL_NODEGROUP:
9915       assert self.op.remote_node is None
9916       assert self.op.iallocator is not None
9917       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9918
9919       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9920       # Lock all groups used by instance optimistically; this requires going
9921       # via the node before it's locked, requiring verification later on
9922       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9923         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9924
9925     elif level == locking.LEVEL_NODE:
9926       if self.op.iallocator is not None:
9927         assert self.op.remote_node is None
9928         assert not self.needed_locks[locking.LEVEL_NODE]
9929
9930         # Lock member nodes of all locked groups
9931         self.needed_locks[locking.LEVEL_NODE] = [node_name
9932           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9933           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9934       else:
9935         self._LockInstancesNodes()
9936     elif level == locking.LEVEL_NODE_RES:
9937       # Reuse node locks
9938       self.needed_locks[locking.LEVEL_NODE_RES] = \
9939         self.needed_locks[locking.LEVEL_NODE]
9940
9941   def BuildHooksEnv(self):
9942     """Build hooks env.
9943
9944     This runs on the master, the primary and all the secondaries.
9945
9946     """
9947     instance = self.replacer.instance
9948     env = {
9949       "MODE": self.op.mode,
9950       "NEW_SECONDARY": self.op.remote_node,
9951       "OLD_SECONDARY": instance.secondary_nodes[0],
9952       }
9953     env.update(_BuildInstanceHookEnvByObject(self, instance))
9954     return env
9955
9956   def BuildHooksNodes(self):
9957     """Build hooks nodes.
9958
9959     """
9960     instance = self.replacer.instance
9961     nl = [
9962       self.cfg.GetMasterNode(),
9963       instance.primary_node,
9964       ]
9965     if self.op.remote_node is not None:
9966       nl.append(self.op.remote_node)
9967     return nl, nl
9968
9969   def CheckPrereq(self):
9970     """Check prerequisites.
9971
9972     """
9973     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9974             self.op.iallocator is None)
9975
9976     # Verify if node group locks are still correct
9977     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9978     if owned_groups:
9979       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9980
9981     return LogicalUnit.CheckPrereq(self)
9982
9983
9984 class TLReplaceDisks(Tasklet):
9985   """Replaces disks for an instance.
9986
9987   Note: Locking is not within the scope of this class.
9988
9989   """
9990   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9991                disks, delay_iallocator, early_release):
9992     """Initializes this class.
9993
9994     """
9995     Tasklet.__init__(self, lu)
9996
9997     # Parameters
9998     self.instance_name = instance_name
9999     self.mode = mode
10000     self.iallocator_name = iallocator_name
10001     self.remote_node = remote_node
10002     self.disks = disks
10003     self.delay_iallocator = delay_iallocator
10004     self.early_release = early_release
10005
10006     # Runtime data
10007     self.instance = None
10008     self.new_node = None
10009     self.target_node = None
10010     self.other_node = None
10011     self.remote_node_info = None
10012     self.node_secondary_ip = None
10013
10014   @staticmethod
10015   def CheckArguments(mode, remote_node, iallocator):
10016     """Helper function for users of this class.
10017
10018     """
10019     # check for valid parameter combination
10020     if mode == constants.REPLACE_DISK_CHG:
10021       if remote_node is None and iallocator is None:
10022         raise errors.OpPrereqError("When changing the secondary either an"
10023                                    " iallocator script must be used or the"
10024                                    " new node given", errors.ECODE_INVAL)
10025
10026       if remote_node is not None and iallocator is not None:
10027         raise errors.OpPrereqError("Give either the iallocator or the new"
10028                                    " secondary, not both", errors.ECODE_INVAL)
10029
10030     elif remote_node is not None or iallocator is not None:
10031       # Not replacing the secondary
10032       raise errors.OpPrereqError("The iallocator and new node options can"
10033                                  " only be used when changing the"
10034                                  " secondary node", errors.ECODE_INVAL)
10035
10036   @staticmethod
10037   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10038     """Compute a new secondary node using an IAllocator.
10039
10040     """
10041     ial = IAllocator(lu.cfg, lu.rpc,
10042                      mode=constants.IALLOCATOR_MODE_RELOC,
10043                      name=instance_name,
10044                      relocate_from=list(relocate_from))
10045
10046     ial.Run(iallocator_name)
10047
10048     if not ial.success:
10049       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10050                                  " %s" % (iallocator_name, ial.info),
10051                                  errors.ECODE_NORES)
10052
10053     if len(ial.result) != ial.required_nodes:
10054       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10055                                  " of nodes (%s), required %s" %
10056                                  (iallocator_name,
10057                                   len(ial.result), ial.required_nodes),
10058                                  errors.ECODE_FAULT)
10059
10060     remote_node_name = ial.result[0]
10061
10062     lu.LogInfo("Selected new secondary for instance '%s': %s",
10063                instance_name, remote_node_name)
10064
10065     return remote_node_name
10066
10067   def _FindFaultyDisks(self, node_name):
10068     """Wrapper for L{_FindFaultyInstanceDisks}.
10069
10070     """
10071     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10072                                     node_name, True)
10073
10074   def _CheckDisksActivated(self, instance):
10075     """Checks if the instance disks are activated.
10076
10077     @param instance: The instance to check disks
10078     @return: True if they are activated, False otherwise
10079
10080     """
10081     nodes = instance.all_nodes
10082
10083     for idx, dev in enumerate(instance.disks):
10084       for node in nodes:
10085         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10086         self.cfg.SetDiskID(dev, node)
10087
10088         result = self.rpc.call_blockdev_find(node, dev)
10089
10090         if result.offline:
10091           continue
10092         elif result.fail_msg or not result.payload:
10093           return False
10094
10095     return True
10096
10097   def CheckPrereq(self):
10098     """Check prerequisites.
10099
10100     This checks that the instance is in the cluster.
10101
10102     """
10103     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10104     assert instance is not None, \
10105       "Cannot retrieve locked instance %s" % self.instance_name
10106
10107     if instance.disk_template != constants.DT_DRBD8:
10108       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10109                                  " instances", errors.ECODE_INVAL)
10110
10111     if len(instance.secondary_nodes) != 1:
10112       raise errors.OpPrereqError("The instance has a strange layout,"
10113                                  " expected one secondary but found %d" %
10114                                  len(instance.secondary_nodes),
10115                                  errors.ECODE_FAULT)
10116
10117     if not self.delay_iallocator:
10118       self._CheckPrereq2()
10119
10120   def _CheckPrereq2(self):
10121     """Check prerequisites, second part.
10122
10123     This function should always be part of CheckPrereq. It was separated and is
10124     now called from Exec because during node evacuation iallocator was only
10125     called with an unmodified cluster model, not taking planned changes into
10126     account.
10127
10128     """
10129     instance = self.instance
10130     secondary_node = instance.secondary_nodes[0]
10131
10132     if self.iallocator_name is None:
10133       remote_node = self.remote_node
10134     else:
10135       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10136                                        instance.name, instance.secondary_nodes)
10137
10138     if remote_node is None:
10139       self.remote_node_info = None
10140     else:
10141       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10142              "Remote node '%s' is not locked" % remote_node
10143
10144       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10145       assert self.remote_node_info is not None, \
10146         "Cannot retrieve locked node %s" % remote_node
10147
10148     if remote_node == self.instance.primary_node:
10149       raise errors.OpPrereqError("The specified node is the primary node of"
10150                                  " the instance", errors.ECODE_INVAL)
10151
10152     if remote_node == secondary_node:
10153       raise errors.OpPrereqError("The specified node is already the"
10154                                  " secondary node of the instance",
10155                                  errors.ECODE_INVAL)
10156
10157     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10158                                     constants.REPLACE_DISK_CHG):
10159       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10160                                  errors.ECODE_INVAL)
10161
10162     if self.mode == constants.REPLACE_DISK_AUTO:
10163       if not self._CheckDisksActivated(instance):
10164         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10165                                    " first" % self.instance_name,
10166                                    errors.ECODE_STATE)
10167       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10168       faulty_secondary = self._FindFaultyDisks(secondary_node)
10169
10170       if faulty_primary and faulty_secondary:
10171         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10172                                    " one node and can not be repaired"
10173                                    " automatically" % self.instance_name,
10174                                    errors.ECODE_STATE)
10175
10176       if faulty_primary:
10177         self.disks = faulty_primary
10178         self.target_node = instance.primary_node
10179         self.other_node = secondary_node
10180         check_nodes = [self.target_node, self.other_node]
10181       elif faulty_secondary:
10182         self.disks = faulty_secondary
10183         self.target_node = secondary_node
10184         self.other_node = instance.primary_node
10185         check_nodes = [self.target_node, self.other_node]
10186       else:
10187         self.disks = []
10188         check_nodes = []
10189
10190     else:
10191       # Non-automatic modes
10192       if self.mode == constants.REPLACE_DISK_PRI:
10193         self.target_node = instance.primary_node
10194         self.other_node = secondary_node
10195         check_nodes = [self.target_node, self.other_node]
10196
10197       elif self.mode == constants.REPLACE_DISK_SEC:
10198         self.target_node = secondary_node
10199         self.other_node = instance.primary_node
10200         check_nodes = [self.target_node, self.other_node]
10201
10202       elif self.mode == constants.REPLACE_DISK_CHG:
10203         self.new_node = remote_node
10204         self.other_node = instance.primary_node
10205         self.target_node = secondary_node
10206         check_nodes = [self.new_node, self.other_node]
10207
10208         _CheckNodeNotDrained(self.lu, remote_node)
10209         _CheckNodeVmCapable(self.lu, remote_node)
10210
10211         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10212         assert old_node_info is not None
10213         if old_node_info.offline and not self.early_release:
10214           # doesn't make sense to delay the release
10215           self.early_release = True
10216           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10217                           " early-release mode", secondary_node)
10218
10219       else:
10220         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10221                                      self.mode)
10222
10223       # If not specified all disks should be replaced
10224       if not self.disks:
10225         self.disks = range(len(self.instance.disks))
10226
10227     # TODO: compute disk parameters
10228     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10229     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10230     if primary_node_info.group != secondary_node_info.group:
10231       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10232                       " different node groups; the disk parameters of the"
10233                       " primary node's group will be applied.")
10234
10235     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10236
10237     for node in check_nodes:
10238       _CheckNodeOnline(self.lu, node)
10239
10240     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10241                                                           self.other_node,
10242                                                           self.target_node]
10243                               if node_name is not None)
10244
10245     # Release unneeded node and node resource locks
10246     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10247     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10248
10249     # Release any owned node group
10250     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10251       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10252
10253     # Check whether disks are valid
10254     for disk_idx in self.disks:
10255       instance.FindDisk(disk_idx)
10256
10257     # Get secondary node IP addresses
10258     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10259                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10260
10261   def Exec(self, feedback_fn):
10262     """Execute disk replacement.
10263
10264     This dispatches the disk replacement to the appropriate handler.
10265
10266     """
10267     if self.delay_iallocator:
10268       self._CheckPrereq2()
10269
10270     if __debug__:
10271       # Verify owned locks before starting operation
10272       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10273       assert set(owned_nodes) == set(self.node_secondary_ip), \
10274           ("Incorrect node locks, owning %s, expected %s" %
10275            (owned_nodes, self.node_secondary_ip.keys()))
10276       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10277               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10278
10279       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10280       assert list(owned_instances) == [self.instance_name], \
10281           "Instance '%s' not locked" % self.instance_name
10282
10283       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10284           "Should not own any node group lock at this point"
10285
10286     if not self.disks:
10287       feedback_fn("No disks need replacement")
10288       return
10289
10290     feedback_fn("Replacing disk(s) %s for %s" %
10291                 (utils.CommaJoin(self.disks), self.instance.name))
10292
10293     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10294
10295     # Activate the instance disks if we're replacing them on a down instance
10296     if activate_disks:
10297       _StartInstanceDisks(self.lu, self.instance, True)
10298
10299     try:
10300       # Should we replace the secondary node?
10301       if self.new_node is not None:
10302         fn = self._ExecDrbd8Secondary
10303       else:
10304         fn = self._ExecDrbd8DiskOnly
10305
10306       result = fn(feedback_fn)
10307     finally:
10308       # Deactivate the instance disks if we're replacing them on a
10309       # down instance
10310       if activate_disks:
10311         _SafeShutdownInstanceDisks(self.lu, self.instance)
10312
10313     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10314
10315     if __debug__:
10316       # Verify owned locks
10317       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10318       nodes = frozenset(self.node_secondary_ip)
10319       assert ((self.early_release and not owned_nodes) or
10320               (not self.early_release and not (set(owned_nodes) - nodes))), \
10321         ("Not owning the correct locks, early_release=%s, owned=%r,"
10322          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10323
10324     return result
10325
10326   def _CheckVolumeGroup(self, nodes):
10327     self.lu.LogInfo("Checking volume groups")
10328
10329     vgname = self.cfg.GetVGName()
10330
10331     # Make sure volume group exists on all involved nodes
10332     results = self.rpc.call_vg_list(nodes)
10333     if not results:
10334       raise errors.OpExecError("Can't list volume groups on the nodes")
10335
10336     for node in nodes:
10337       res = results[node]
10338       res.Raise("Error checking node %s" % node)
10339       if vgname not in res.payload:
10340         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10341                                  (vgname, node))
10342
10343   def _CheckDisksExistence(self, nodes):
10344     # Check disk existence
10345     for idx, dev in enumerate(self.instance.disks):
10346       if idx not in self.disks:
10347         continue
10348
10349       for node in nodes:
10350         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10351         self.cfg.SetDiskID(dev, node)
10352
10353         result = self.rpc.call_blockdev_find(node, dev)
10354
10355         msg = result.fail_msg
10356         if msg or not result.payload:
10357           if not msg:
10358             msg = "disk not found"
10359           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10360                                    (idx, node, msg))
10361
10362   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10363     for idx, dev in enumerate(self.instance.disks):
10364       if idx not in self.disks:
10365         continue
10366
10367       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10368                       (idx, node_name))
10369
10370       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10371                                    ldisk=ldisk):
10372         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10373                                  " replace disks for instance %s" %
10374                                  (node_name, self.instance.name))
10375
10376   def _CreateNewStorage(self, node_name):
10377     """Create new storage on the primary or secondary node.
10378
10379     This is only used for same-node replaces, not for changing the
10380     secondary node, hence we don't want to modify the existing disk.
10381
10382     """
10383     iv_names = {}
10384
10385     for idx, dev in enumerate(self.instance.disks):
10386       if idx not in self.disks:
10387         continue
10388
10389       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10390
10391       self.cfg.SetDiskID(dev, node_name)
10392
10393       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10394       names = _GenerateUniqueNames(self.lu, lv_names)
10395
10396       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10397
10398       vg_data = dev.children[0].logical_id[0]
10399       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10400                              logical_id=(vg_data, names[0]), params=data_p)
10401       vg_meta = dev.children[1].logical_id[0]
10402       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10403                              logical_id=(vg_meta, names[1]), params=meta_p)
10404
10405       new_lvs = [lv_data, lv_meta]
10406       old_lvs = [child.Copy() for child in dev.children]
10407       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10408
10409       # we pass force_create=True to force the LVM creation
10410       for new_lv in new_lvs:
10411         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10412                         _GetInstanceInfoText(self.instance), False)
10413
10414     return iv_names
10415
10416   def _CheckDevices(self, node_name, iv_names):
10417     for name, (dev, _, _) in iv_names.iteritems():
10418       self.cfg.SetDiskID(dev, node_name)
10419
10420       result = self.rpc.call_blockdev_find(node_name, dev)
10421
10422       msg = result.fail_msg
10423       if msg or not result.payload:
10424         if not msg:
10425           msg = "disk not found"
10426         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10427                                  (name, msg))
10428
10429       if result.payload.is_degraded:
10430         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10431
10432   def _RemoveOldStorage(self, node_name, iv_names):
10433     for name, (_, old_lvs, _) in iv_names.iteritems():
10434       self.lu.LogInfo("Remove logical volumes for %s" % name)
10435
10436       for lv in old_lvs:
10437         self.cfg.SetDiskID(lv, node_name)
10438
10439         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10440         if msg:
10441           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10442                              hint="remove unused LVs manually")
10443
10444   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10445     """Replace a disk on the primary or secondary for DRBD 8.
10446
10447     The algorithm for replace is quite complicated:
10448
10449       1. for each disk to be replaced:
10450
10451         1. create new LVs on the target node with unique names
10452         1. detach old LVs from the drbd device
10453         1. rename old LVs to name_replaced.<time_t>
10454         1. rename new LVs to old LVs
10455         1. attach the new LVs (with the old names now) to the drbd device
10456
10457       1. wait for sync across all devices
10458
10459       1. for each modified disk:
10460
10461         1. remove old LVs (which have the name name_replaces.<time_t>)
10462
10463     Failures are not very well handled.
10464
10465     """
10466     steps_total = 6
10467
10468     # Step: check device activation
10469     self.lu.LogStep(1, steps_total, "Check device existence")
10470     self._CheckDisksExistence([self.other_node, self.target_node])
10471     self._CheckVolumeGroup([self.target_node, self.other_node])
10472
10473     # Step: check other node consistency
10474     self.lu.LogStep(2, steps_total, "Check peer consistency")
10475     self._CheckDisksConsistency(self.other_node,
10476                                 self.other_node == self.instance.primary_node,
10477                                 False)
10478
10479     # Step: create new storage
10480     self.lu.LogStep(3, steps_total, "Allocate new storage")
10481     iv_names = self._CreateNewStorage(self.target_node)
10482
10483     # Step: for each lv, detach+rename*2+attach
10484     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10485     for dev, old_lvs, new_lvs in iv_names.itervalues():
10486       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10487
10488       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10489                                                      old_lvs)
10490       result.Raise("Can't detach drbd from local storage on node"
10491                    " %s for device %s" % (self.target_node, dev.iv_name))
10492       #dev.children = []
10493       #cfg.Update(instance)
10494
10495       # ok, we created the new LVs, so now we know we have the needed
10496       # storage; as such, we proceed on the target node to rename
10497       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10498       # using the assumption that logical_id == physical_id (which in
10499       # turn is the unique_id on that node)
10500
10501       # FIXME(iustin): use a better name for the replaced LVs
10502       temp_suffix = int(time.time())
10503       ren_fn = lambda d, suff: (d.physical_id[0],
10504                                 d.physical_id[1] + "_replaced-%s" % suff)
10505
10506       # Build the rename list based on what LVs exist on the node
10507       rename_old_to_new = []
10508       for to_ren in old_lvs:
10509         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10510         if not result.fail_msg and result.payload:
10511           # device exists
10512           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10513
10514       self.lu.LogInfo("Renaming the old LVs on the target node")
10515       result = self.rpc.call_blockdev_rename(self.target_node,
10516                                              rename_old_to_new)
10517       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10518
10519       # Now we rename the new LVs to the old LVs
10520       self.lu.LogInfo("Renaming the new LVs on the target node")
10521       rename_new_to_old = [(new, old.physical_id)
10522                            for old, new in zip(old_lvs, new_lvs)]
10523       result = self.rpc.call_blockdev_rename(self.target_node,
10524                                              rename_new_to_old)
10525       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10526
10527       # Intermediate steps of in memory modifications
10528       for old, new in zip(old_lvs, new_lvs):
10529         new.logical_id = old.logical_id
10530         self.cfg.SetDiskID(new, self.target_node)
10531
10532       # We need to modify old_lvs so that removal later removes the
10533       # right LVs, not the newly added ones; note that old_lvs is a
10534       # copy here
10535       for disk in old_lvs:
10536         disk.logical_id = ren_fn(disk, temp_suffix)
10537         self.cfg.SetDiskID(disk, self.target_node)
10538
10539       # Now that the new lvs have the old name, we can add them to the device
10540       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10541       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10542                                                   new_lvs)
10543       msg = result.fail_msg
10544       if msg:
10545         for new_lv in new_lvs:
10546           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10547                                                new_lv).fail_msg
10548           if msg2:
10549             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10550                                hint=("cleanup manually the unused logical"
10551                                      "volumes"))
10552         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10553
10554     cstep = itertools.count(5)
10555
10556     if self.early_release:
10557       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10558       self._RemoveOldStorage(self.target_node, iv_names)
10559       # TODO: Check if releasing locks early still makes sense
10560       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10561     else:
10562       # Release all resource locks except those used by the instance
10563       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10564                     keep=self.node_secondary_ip.keys())
10565
10566     # Release all node locks while waiting for sync
10567     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10568
10569     # TODO: Can the instance lock be downgraded here? Take the optional disk
10570     # shutdown in the caller into consideration.
10571
10572     # Wait for sync
10573     # This can fail as the old devices are degraded and _WaitForSync
10574     # does a combined result over all disks, so we don't check its return value
10575     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10576     _WaitForSync(self.lu, self.instance)
10577
10578     # Check all devices manually
10579     self._CheckDevices(self.instance.primary_node, iv_names)
10580
10581     # Step: remove old storage
10582     if not self.early_release:
10583       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10584       self._RemoveOldStorage(self.target_node, iv_names)
10585
10586   def _ExecDrbd8Secondary(self, feedback_fn):
10587     """Replace the secondary node for DRBD 8.
10588
10589     The algorithm for replace is quite complicated:
10590       - for all disks of the instance:
10591         - create new LVs on the new node with same names
10592         - shutdown the drbd device on the old secondary
10593         - disconnect the drbd network on the primary
10594         - create the drbd device on the new secondary
10595         - network attach the drbd on the primary, using an artifice:
10596           the drbd code for Attach() will connect to the network if it
10597           finds a device which is connected to the good local disks but
10598           not network enabled
10599       - wait for sync across all devices
10600       - remove all disks from the old secondary
10601
10602     Failures are not very well handled.
10603
10604     """
10605     steps_total = 6
10606
10607     pnode = self.instance.primary_node
10608
10609     # Step: check device activation
10610     self.lu.LogStep(1, steps_total, "Check device existence")
10611     self._CheckDisksExistence([self.instance.primary_node])
10612     self._CheckVolumeGroup([self.instance.primary_node])
10613
10614     # Step: check other node consistency
10615     self.lu.LogStep(2, steps_total, "Check peer consistency")
10616     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10617
10618     # Step: create new storage
10619     self.lu.LogStep(3, steps_total, "Allocate new storage")
10620     for idx, dev in enumerate(self.instance.disks):
10621       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10622                       (self.new_node, idx))
10623       # we pass force_create=True to force LVM creation
10624       for new_lv in dev.children:
10625         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10626                         _GetInstanceInfoText(self.instance), False)
10627
10628     # Step 4: dbrd minors and drbd setups changes
10629     # after this, we must manually remove the drbd minors on both the
10630     # error and the success paths
10631     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10632     minors = self.cfg.AllocateDRBDMinor([self.new_node
10633                                          for dev in self.instance.disks],
10634                                         self.instance.name)
10635     logging.debug("Allocated minors %r", minors)
10636
10637     iv_names = {}
10638     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10639       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10640                       (self.new_node, idx))
10641       # create new devices on new_node; note that we create two IDs:
10642       # one without port, so the drbd will be activated without
10643       # networking information on the new node at this stage, and one
10644       # with network, for the latter activation in step 4
10645       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10646       if self.instance.primary_node == o_node1:
10647         p_minor = o_minor1
10648       else:
10649         assert self.instance.primary_node == o_node2, "Three-node instance?"
10650         p_minor = o_minor2
10651
10652       new_alone_id = (self.instance.primary_node, self.new_node, None,
10653                       p_minor, new_minor, o_secret)
10654       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10655                     p_minor, new_minor, o_secret)
10656
10657       iv_names[idx] = (dev, dev.children, new_net_id)
10658       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10659                     new_net_id)
10660       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10661       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10662                               logical_id=new_alone_id,
10663                               children=dev.children,
10664                               size=dev.size,
10665                               params=drbd_params)
10666       try:
10667         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10668                               _GetInstanceInfoText(self.instance), False)
10669       except errors.GenericError:
10670         self.cfg.ReleaseDRBDMinors(self.instance.name)
10671         raise
10672
10673     # We have new devices, shutdown the drbd on the old secondary
10674     for idx, dev in enumerate(self.instance.disks):
10675       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10676       self.cfg.SetDiskID(dev, self.target_node)
10677       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10678       if msg:
10679         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10680                            "node: %s" % (idx, msg),
10681                            hint=("Please cleanup this device manually as"
10682                                  " soon as possible"))
10683
10684     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10685     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10686                                                self.instance.disks)[pnode]
10687
10688     msg = result.fail_msg
10689     if msg:
10690       # detaches didn't succeed (unlikely)
10691       self.cfg.ReleaseDRBDMinors(self.instance.name)
10692       raise errors.OpExecError("Can't detach the disks from the network on"
10693                                " old node: %s" % (msg,))
10694
10695     # if we managed to detach at least one, we update all the disks of
10696     # the instance to point to the new secondary
10697     self.lu.LogInfo("Updating instance configuration")
10698     for dev, _, new_logical_id in iv_names.itervalues():
10699       dev.logical_id = new_logical_id
10700       self.cfg.SetDiskID(dev, self.instance.primary_node)
10701
10702     self.cfg.Update(self.instance, feedback_fn)
10703
10704     # Release all node locks (the configuration has been updated)
10705     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10706
10707     # and now perform the drbd attach
10708     self.lu.LogInfo("Attaching primary drbds to new secondary"
10709                     " (standalone => connected)")
10710     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10711                                             self.new_node],
10712                                            self.node_secondary_ip,
10713                                            self.instance.disks,
10714                                            self.instance.name,
10715                                            False)
10716     for to_node, to_result in result.items():
10717       msg = to_result.fail_msg
10718       if msg:
10719         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10720                            to_node, msg,
10721                            hint=("please do a gnt-instance info to see the"
10722                                  " status of disks"))
10723
10724     cstep = itertools.count(5)
10725
10726     if self.early_release:
10727       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10728       self._RemoveOldStorage(self.target_node, iv_names)
10729       # TODO: Check if releasing locks early still makes sense
10730       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10731     else:
10732       # Release all resource locks except those used by the instance
10733       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10734                     keep=self.node_secondary_ip.keys())
10735
10736     # TODO: Can the instance lock be downgraded here? Take the optional disk
10737     # shutdown in the caller into consideration.
10738
10739     # Wait for sync
10740     # This can fail as the old devices are degraded and _WaitForSync
10741     # does a combined result over all disks, so we don't check its return value
10742     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10743     _WaitForSync(self.lu, self.instance)
10744
10745     # Check all devices manually
10746     self._CheckDevices(self.instance.primary_node, iv_names)
10747
10748     # Step: remove old storage
10749     if not self.early_release:
10750       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10751       self._RemoveOldStorage(self.target_node, iv_names)
10752
10753
10754 class LURepairNodeStorage(NoHooksLU):
10755   """Repairs the volume group on a node.
10756
10757   """
10758   REQ_BGL = False
10759
10760   def CheckArguments(self):
10761     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10762
10763     storage_type = self.op.storage_type
10764
10765     if (constants.SO_FIX_CONSISTENCY not in
10766         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10767       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10768                                  " repaired" % storage_type,
10769                                  errors.ECODE_INVAL)
10770
10771   def ExpandNames(self):
10772     self.needed_locks = {
10773       locking.LEVEL_NODE: [self.op.node_name],
10774       }
10775
10776   def _CheckFaultyDisks(self, instance, node_name):
10777     """Ensure faulty disks abort the opcode or at least warn."""
10778     try:
10779       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10780                                   node_name, True):
10781         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10782                                    " node '%s'" % (instance.name, node_name),
10783                                    errors.ECODE_STATE)
10784     except errors.OpPrereqError, err:
10785       if self.op.ignore_consistency:
10786         self.proc.LogWarning(str(err.args[0]))
10787       else:
10788         raise
10789
10790   def CheckPrereq(self):
10791     """Check prerequisites.
10792
10793     """
10794     # Check whether any instance on this node has faulty disks
10795     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10796       if inst.admin_state != constants.ADMINST_UP:
10797         continue
10798       check_nodes = set(inst.all_nodes)
10799       check_nodes.discard(self.op.node_name)
10800       for inst_node_name in check_nodes:
10801         self._CheckFaultyDisks(inst, inst_node_name)
10802
10803   def Exec(self, feedback_fn):
10804     feedback_fn("Repairing storage unit '%s' on %s ..." %
10805                 (self.op.name, self.op.node_name))
10806
10807     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10808     result = self.rpc.call_storage_execute(self.op.node_name,
10809                                            self.op.storage_type, st_args,
10810                                            self.op.name,
10811                                            constants.SO_FIX_CONSISTENCY)
10812     result.Raise("Failed to repair storage unit '%s' on %s" %
10813                  (self.op.name, self.op.node_name))
10814
10815
10816 class LUNodeEvacuate(NoHooksLU):
10817   """Evacuates instances off a list of nodes.
10818
10819   """
10820   REQ_BGL = False
10821
10822   _MODE2IALLOCATOR = {
10823     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10824     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10825     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10826     }
10827   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10828   assert (frozenset(_MODE2IALLOCATOR.values()) ==
10829           constants.IALLOCATOR_NEVAC_MODES)
10830
10831   def CheckArguments(self):
10832     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10833
10834   def ExpandNames(self):
10835     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10836
10837     if self.op.remote_node is not None:
10838       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10839       assert self.op.remote_node
10840
10841       if self.op.remote_node == self.op.node_name:
10842         raise errors.OpPrereqError("Can not use evacuated node as a new"
10843                                    " secondary node", errors.ECODE_INVAL)
10844
10845       if self.op.mode != constants.NODE_EVAC_SEC:
10846         raise errors.OpPrereqError("Without the use of an iallocator only"
10847                                    " secondary instances can be evacuated",
10848                                    errors.ECODE_INVAL)
10849
10850     # Declare locks
10851     self.share_locks = _ShareAll()
10852     self.needed_locks = {
10853       locking.LEVEL_INSTANCE: [],
10854       locking.LEVEL_NODEGROUP: [],
10855       locking.LEVEL_NODE: [],
10856       }
10857
10858     # Determine nodes (via group) optimistically, needs verification once locks
10859     # have been acquired
10860     self.lock_nodes = self._DetermineNodes()
10861
10862   def _DetermineNodes(self):
10863     """Gets the list of nodes to operate on.
10864
10865     """
10866     if self.op.remote_node is None:
10867       # Iallocator will choose any node(s) in the same group
10868       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10869     else:
10870       group_nodes = frozenset([self.op.remote_node])
10871
10872     # Determine nodes to be locked
10873     return set([self.op.node_name]) | group_nodes
10874
10875   def _DetermineInstances(self):
10876     """Builds list of instances to operate on.
10877
10878     """
10879     assert self.op.mode in constants.NODE_EVAC_MODES
10880
10881     if self.op.mode == constants.NODE_EVAC_PRI:
10882       # Primary instances only
10883       inst_fn = _GetNodePrimaryInstances
10884       assert self.op.remote_node is None, \
10885         "Evacuating primary instances requires iallocator"
10886     elif self.op.mode == constants.NODE_EVAC_SEC:
10887       # Secondary instances only
10888       inst_fn = _GetNodeSecondaryInstances
10889     else:
10890       # All instances
10891       assert self.op.mode == constants.NODE_EVAC_ALL
10892       inst_fn = _GetNodeInstances
10893       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10894       # per instance
10895       raise errors.OpPrereqError("Due to an issue with the iallocator"
10896                                  " interface it is not possible to evacuate"
10897                                  " all instances at once; specify explicitly"
10898                                  " whether to evacuate primary or secondary"
10899                                  " instances",
10900                                  errors.ECODE_INVAL)
10901
10902     return inst_fn(self.cfg, self.op.node_name)
10903
10904   def DeclareLocks(self, level):
10905     if level == locking.LEVEL_INSTANCE:
10906       # Lock instances optimistically, needs verification once node and group
10907       # locks have been acquired
10908       self.needed_locks[locking.LEVEL_INSTANCE] = \
10909         set(i.name for i in self._DetermineInstances())
10910
10911     elif level == locking.LEVEL_NODEGROUP:
10912       # Lock node groups for all potential target nodes optimistically, needs
10913       # verification once nodes have been acquired
10914       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10915         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10916
10917     elif level == locking.LEVEL_NODE:
10918       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10919
10920   def CheckPrereq(self):
10921     # Verify locks
10922     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10923     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10924     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10925
10926     need_nodes = self._DetermineNodes()
10927
10928     if not owned_nodes.issuperset(need_nodes):
10929       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10930                                  " locks were acquired, current nodes are"
10931                                  " are '%s', used to be '%s'; retry the"
10932                                  " operation" %
10933                                  (self.op.node_name,
10934                                   utils.CommaJoin(need_nodes),
10935                                   utils.CommaJoin(owned_nodes)),
10936                                  errors.ECODE_STATE)
10937
10938     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10939     if owned_groups != wanted_groups:
10940       raise errors.OpExecError("Node groups changed since locks were acquired,"
10941                                " current groups are '%s', used to be '%s';"
10942                                " retry the operation" %
10943                                (utils.CommaJoin(wanted_groups),
10944                                 utils.CommaJoin(owned_groups)))
10945
10946     # Determine affected instances
10947     self.instances = self._DetermineInstances()
10948     self.instance_names = [i.name for i in self.instances]
10949
10950     if set(self.instance_names) != owned_instances:
10951       raise errors.OpExecError("Instances on node '%s' changed since locks"
10952                                " were acquired, current instances are '%s',"
10953                                " used to be '%s'; retry the operation" %
10954                                (self.op.node_name,
10955                                 utils.CommaJoin(self.instance_names),
10956                                 utils.CommaJoin(owned_instances)))
10957
10958     if self.instance_names:
10959       self.LogInfo("Evacuating instances from node '%s': %s",
10960                    self.op.node_name,
10961                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10962     else:
10963       self.LogInfo("No instances to evacuate from node '%s'",
10964                    self.op.node_name)
10965
10966     if self.op.remote_node is not None:
10967       for i in self.instances:
10968         if i.primary_node == self.op.remote_node:
10969           raise errors.OpPrereqError("Node %s is the primary node of"
10970                                      " instance %s, cannot use it as"
10971                                      " secondary" %
10972                                      (self.op.remote_node, i.name),
10973                                      errors.ECODE_INVAL)
10974
10975   def Exec(self, feedback_fn):
10976     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10977
10978     if not self.instance_names:
10979       # No instances to evacuate
10980       jobs = []
10981
10982     elif self.op.iallocator is not None:
10983       # TODO: Implement relocation to other group
10984       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10985                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10986                        instances=list(self.instance_names))
10987
10988       ial.Run(self.op.iallocator)
10989
10990       if not ial.success:
10991         raise errors.OpPrereqError("Can't compute node evacuation using"
10992                                    " iallocator '%s': %s" %
10993                                    (self.op.iallocator, ial.info),
10994                                    errors.ECODE_NORES)
10995
10996       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10997
10998     elif self.op.remote_node is not None:
10999       assert self.op.mode == constants.NODE_EVAC_SEC
11000       jobs = [
11001         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11002                                         remote_node=self.op.remote_node,
11003                                         disks=[],
11004                                         mode=constants.REPLACE_DISK_CHG,
11005                                         early_release=self.op.early_release)]
11006         for instance_name in self.instance_names
11007         ]
11008
11009     else:
11010       raise errors.ProgrammerError("No iallocator or remote node")
11011
11012     return ResultWithJobs(jobs)
11013
11014
11015 def _SetOpEarlyRelease(early_release, op):
11016   """Sets C{early_release} flag on opcodes if available.
11017
11018   """
11019   try:
11020     op.early_release = early_release
11021   except AttributeError:
11022     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11023
11024   return op
11025
11026
11027 def _NodeEvacDest(use_nodes, group, nodes):
11028   """Returns group or nodes depending on caller's choice.
11029
11030   """
11031   if use_nodes:
11032     return utils.CommaJoin(nodes)
11033   else:
11034     return group
11035
11036
11037 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11038   """Unpacks the result of change-group and node-evacuate iallocator requests.
11039
11040   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11041   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11042
11043   @type lu: L{LogicalUnit}
11044   @param lu: Logical unit instance
11045   @type alloc_result: tuple/list
11046   @param alloc_result: Result from iallocator
11047   @type early_release: bool
11048   @param early_release: Whether to release locks early if possible
11049   @type use_nodes: bool
11050   @param use_nodes: Whether to display node names instead of groups
11051
11052   """
11053   (moved, failed, jobs) = alloc_result
11054
11055   if failed:
11056     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11057                                  for (name, reason) in failed)
11058     lu.LogWarning("Unable to evacuate instances %s", failreason)
11059     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11060
11061   if moved:
11062     lu.LogInfo("Instances to be moved: %s",
11063                utils.CommaJoin("%s (to %s)" %
11064                                (name, _NodeEvacDest(use_nodes, group, nodes))
11065                                for (name, group, nodes) in moved))
11066
11067   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11068               map(opcodes.OpCode.LoadOpCode, ops))
11069           for ops in jobs]
11070
11071
11072 class LUInstanceGrowDisk(LogicalUnit):
11073   """Grow a disk of an instance.
11074
11075   """
11076   HPATH = "disk-grow"
11077   HTYPE = constants.HTYPE_INSTANCE
11078   REQ_BGL = False
11079
11080   def ExpandNames(self):
11081     self._ExpandAndLockInstance()
11082     self.needed_locks[locking.LEVEL_NODE] = []
11083     self.needed_locks[locking.LEVEL_NODE_RES] = []
11084     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11085
11086   def DeclareLocks(self, level):
11087     if level == locking.LEVEL_NODE:
11088       self._LockInstancesNodes()
11089     elif level == locking.LEVEL_NODE_RES:
11090       # Copy node locks
11091       self.needed_locks[locking.LEVEL_NODE_RES] = \
11092         self.needed_locks[locking.LEVEL_NODE][:]
11093
11094   def BuildHooksEnv(self):
11095     """Build hooks env.
11096
11097     This runs on the master, the primary and all the secondaries.
11098
11099     """
11100     env = {
11101       "DISK": self.op.disk,
11102       "AMOUNT": self.op.amount,
11103       }
11104     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11105     return env
11106
11107   def BuildHooksNodes(self):
11108     """Build hooks nodes.
11109
11110     """
11111     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11112     return (nl, nl)
11113
11114   def CheckPrereq(self):
11115     """Check prerequisites.
11116
11117     This checks that the instance is in the cluster.
11118
11119     """
11120     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11121     assert instance is not None, \
11122       "Cannot retrieve locked instance %s" % self.op.instance_name
11123     nodenames = list(instance.all_nodes)
11124     for node in nodenames:
11125       _CheckNodeOnline(self, node)
11126
11127     self.instance = instance
11128
11129     if instance.disk_template not in constants.DTS_GROWABLE:
11130       raise errors.OpPrereqError("Instance's disk layout does not support"
11131                                  " growing", errors.ECODE_INVAL)
11132
11133     self.disk = instance.FindDisk(self.op.disk)
11134
11135     if instance.disk_template not in (constants.DT_FILE,
11136                                       constants.DT_SHARED_FILE):
11137       # TODO: check the free disk space for file, when that feature will be
11138       # supported
11139       _CheckNodesFreeDiskPerVG(self, nodenames,
11140                                self.disk.ComputeGrowth(self.op.amount))
11141
11142   def Exec(self, feedback_fn):
11143     """Execute disk grow.
11144
11145     """
11146     instance = self.instance
11147     disk = self.disk
11148
11149     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11150     assert (self.owned_locks(locking.LEVEL_NODE) ==
11151             self.owned_locks(locking.LEVEL_NODE_RES))
11152
11153     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11154     if not disks_ok:
11155       raise errors.OpExecError("Cannot activate block device to grow")
11156
11157     feedback_fn("Growing disk %s of instance '%s' by %s" %
11158                 (self.op.disk, instance.name,
11159                  utils.FormatUnit(self.op.amount, "h")))
11160
11161     # First run all grow ops in dry-run mode
11162     for node in instance.all_nodes:
11163       self.cfg.SetDiskID(disk, node)
11164       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11165       result.Raise("Grow request failed to node %s" % node)
11166
11167     # We know that (as far as we can test) operations across different
11168     # nodes will succeed, time to run it for real
11169     for node in instance.all_nodes:
11170       self.cfg.SetDiskID(disk, node)
11171       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11172       result.Raise("Grow request failed to node %s" % node)
11173
11174       # TODO: Rewrite code to work properly
11175       # DRBD goes into sync mode for a short amount of time after executing the
11176       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11177       # calling "resize" in sync mode fails. Sleeping for a short amount of
11178       # time is a work-around.
11179       time.sleep(5)
11180
11181     disk.RecordGrow(self.op.amount)
11182     self.cfg.Update(instance, feedback_fn)
11183
11184     # Changes have been recorded, release node lock
11185     _ReleaseLocks(self, locking.LEVEL_NODE)
11186
11187     # Downgrade lock while waiting for sync
11188     self.glm.downgrade(locking.LEVEL_INSTANCE)
11189
11190     if self.op.wait_for_sync:
11191       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11192       if disk_abort:
11193         self.proc.LogWarning("Disk sync-ing has not returned a good"
11194                              " status; please check the instance")
11195       if instance.admin_state != constants.ADMINST_UP:
11196         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11197     elif instance.admin_state != constants.ADMINST_UP:
11198       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11199                            " not supposed to be running because no wait for"
11200                            " sync mode was requested")
11201
11202     assert self.owned_locks(locking.LEVEL_NODE_RES)
11203     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11204
11205
11206 class LUInstanceQueryData(NoHooksLU):
11207   """Query runtime instance data.
11208
11209   """
11210   REQ_BGL = False
11211
11212   def ExpandNames(self):
11213     self.needed_locks = {}
11214
11215     # Use locking if requested or when non-static information is wanted
11216     if not (self.op.static or self.op.use_locking):
11217       self.LogWarning("Non-static data requested, locks need to be acquired")
11218       self.op.use_locking = True
11219
11220     if self.op.instances or not self.op.use_locking:
11221       # Expand instance names right here
11222       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11223     else:
11224       # Will use acquired locks
11225       self.wanted_names = None
11226
11227     if self.op.use_locking:
11228       self.share_locks = _ShareAll()
11229
11230       if self.wanted_names is None:
11231         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11232       else:
11233         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11234
11235       self.needed_locks[locking.LEVEL_NODE] = []
11236       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11237
11238   def DeclareLocks(self, level):
11239     if self.op.use_locking and level == locking.LEVEL_NODE:
11240       self._LockInstancesNodes()
11241
11242   def CheckPrereq(self):
11243     """Check prerequisites.
11244
11245     This only checks the optional instance list against the existing names.
11246
11247     """
11248     if self.wanted_names is None:
11249       assert self.op.use_locking, "Locking was not used"
11250       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11251
11252     self.wanted_instances = \
11253         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11254
11255   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11256     """Returns the status of a block device
11257
11258     """
11259     if self.op.static or not node:
11260       return None
11261
11262     self.cfg.SetDiskID(dev, node)
11263
11264     result = self.rpc.call_blockdev_find(node, dev)
11265     if result.offline:
11266       return None
11267
11268     result.Raise("Can't compute disk status for %s" % instance_name)
11269
11270     status = result.payload
11271     if status is None:
11272       return None
11273
11274     return (status.dev_path, status.major, status.minor,
11275             status.sync_percent, status.estimated_time,
11276             status.is_degraded, status.ldisk_status)
11277
11278   def _ComputeDiskStatus(self, instance, snode, dev):
11279     """Compute block device status.
11280
11281     """
11282     if dev.dev_type in constants.LDS_DRBD:
11283       # we change the snode then (otherwise we use the one passed in)
11284       if dev.logical_id[0] == instance.primary_node:
11285         snode = dev.logical_id[1]
11286       else:
11287         snode = dev.logical_id[0]
11288
11289     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11290                                               instance.name, dev)
11291     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11292
11293     if dev.children:
11294       dev_children = map(compat.partial(self._ComputeDiskStatus,
11295                                         instance, snode),
11296                          dev.children)
11297     else:
11298       dev_children = []
11299
11300     return {
11301       "iv_name": dev.iv_name,
11302       "dev_type": dev.dev_type,
11303       "logical_id": dev.logical_id,
11304       "physical_id": dev.physical_id,
11305       "pstatus": dev_pstatus,
11306       "sstatus": dev_sstatus,
11307       "children": dev_children,
11308       "mode": dev.mode,
11309       "size": dev.size,
11310       }
11311
11312   def Exec(self, feedback_fn):
11313     """Gather and return data"""
11314     result = {}
11315
11316     cluster = self.cfg.GetClusterInfo()
11317
11318     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11319                                           for i in self.wanted_instances)
11320     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11321       if self.op.static or pnode.offline:
11322         remote_state = None
11323         if pnode.offline:
11324           self.LogWarning("Primary node %s is marked offline, returning static"
11325                           " information only for instance %s" %
11326                           (pnode.name, instance.name))
11327       else:
11328         remote_info = self.rpc.call_instance_info(instance.primary_node,
11329                                                   instance.name,
11330                                                   instance.hypervisor)
11331         remote_info.Raise("Error checking node %s" % instance.primary_node)
11332         remote_info = remote_info.payload
11333         if remote_info and "state" in remote_info:
11334           remote_state = "up"
11335         else:
11336           if instance.admin_state == constants.ADMINST_UP:
11337             remote_state = "down"
11338           else:
11339             remote_state = instance.admin_state
11340
11341       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11342                   instance.disks)
11343
11344       result[instance.name] = {
11345         "name": instance.name,
11346         "config_state": instance.admin_state,
11347         "run_state": remote_state,
11348         "pnode": instance.primary_node,
11349         "snodes": instance.secondary_nodes,
11350         "os": instance.os,
11351         # this happens to be the same format used for hooks
11352         "nics": _NICListToTuple(self, instance.nics),
11353         "disk_template": instance.disk_template,
11354         "disks": disks,
11355         "hypervisor": instance.hypervisor,
11356         "network_port": instance.network_port,
11357         "hv_instance": instance.hvparams,
11358         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11359         "be_instance": instance.beparams,
11360         "be_actual": cluster.FillBE(instance),
11361         "os_instance": instance.osparams,
11362         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11363         "serial_no": instance.serial_no,
11364         "mtime": instance.mtime,
11365         "ctime": instance.ctime,
11366         "uuid": instance.uuid,
11367         }
11368
11369     return result
11370
11371
11372 class LUInstanceSetParams(LogicalUnit):
11373   """Modifies an instances's parameters.
11374
11375   """
11376   HPATH = "instance-modify"
11377   HTYPE = constants.HTYPE_INSTANCE
11378   REQ_BGL = False
11379
11380   def CheckArguments(self):
11381     if not (self.op.nics or self.op.disks or self.op.disk_template or
11382             self.op.hvparams or self.op.beparams or self.op.os_name or
11383             self.op.online_inst or self.op.offline_inst):
11384       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11385
11386     if self.op.hvparams:
11387       _CheckGlobalHvParams(self.op.hvparams)
11388
11389     # Disk validation
11390     disk_addremove = 0
11391     for disk_op, disk_dict in self.op.disks:
11392       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11393       if disk_op == constants.DDM_REMOVE:
11394         disk_addremove += 1
11395         continue
11396       elif disk_op == constants.DDM_ADD:
11397         disk_addremove += 1
11398       else:
11399         if not isinstance(disk_op, int):
11400           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11401         if not isinstance(disk_dict, dict):
11402           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11403           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11404
11405       if disk_op == constants.DDM_ADD:
11406         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11407         if mode not in constants.DISK_ACCESS_SET:
11408           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11409                                      errors.ECODE_INVAL)
11410         size = disk_dict.get(constants.IDISK_SIZE, None)
11411         if size is None:
11412           raise errors.OpPrereqError("Required disk parameter size missing",
11413                                      errors.ECODE_INVAL)
11414         try:
11415           size = int(size)
11416         except (TypeError, ValueError), err:
11417           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11418                                      str(err), errors.ECODE_INVAL)
11419         disk_dict[constants.IDISK_SIZE] = size
11420       else:
11421         # modification of disk
11422         if constants.IDISK_SIZE in disk_dict:
11423           raise errors.OpPrereqError("Disk size change not possible, use"
11424                                      " grow-disk", errors.ECODE_INVAL)
11425
11426     if disk_addremove > 1:
11427       raise errors.OpPrereqError("Only one disk add or remove operation"
11428                                  " supported at a time", errors.ECODE_INVAL)
11429
11430     if self.op.disks and self.op.disk_template is not None:
11431       raise errors.OpPrereqError("Disk template conversion and other disk"
11432                                  " changes not supported at the same time",
11433                                  errors.ECODE_INVAL)
11434
11435     if (self.op.disk_template and
11436         self.op.disk_template in constants.DTS_INT_MIRROR and
11437         self.op.remote_node is None):
11438       raise errors.OpPrereqError("Changing the disk template to a mirrored"
11439                                  " one requires specifying a secondary node",
11440                                  errors.ECODE_INVAL)
11441
11442     # NIC validation
11443     nic_addremove = 0
11444     for nic_op, nic_dict in self.op.nics:
11445       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11446       if nic_op == constants.DDM_REMOVE:
11447         nic_addremove += 1
11448         continue
11449       elif nic_op == constants.DDM_ADD:
11450         nic_addremove += 1
11451       else:
11452         if not isinstance(nic_op, int):
11453           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11454         if not isinstance(nic_dict, dict):
11455           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11456           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11457
11458       # nic_dict should be a dict
11459       nic_ip = nic_dict.get(constants.INIC_IP, None)
11460       if nic_ip is not None:
11461         if nic_ip.lower() == constants.VALUE_NONE:
11462           nic_dict[constants.INIC_IP] = None
11463         else:
11464           if not netutils.IPAddress.IsValid(nic_ip):
11465             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11466                                        errors.ECODE_INVAL)
11467
11468       nic_bridge = nic_dict.get("bridge", None)
11469       nic_link = nic_dict.get(constants.INIC_LINK, None)
11470       if nic_bridge and nic_link:
11471         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11472                                    " at the same time", errors.ECODE_INVAL)
11473       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11474         nic_dict["bridge"] = None
11475       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11476         nic_dict[constants.INIC_LINK] = None
11477
11478       if nic_op == constants.DDM_ADD:
11479         nic_mac = nic_dict.get(constants.INIC_MAC, None)
11480         if nic_mac is None:
11481           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11482
11483       if constants.INIC_MAC in nic_dict:
11484         nic_mac = nic_dict[constants.INIC_MAC]
11485         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11486           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11487
11488         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11489           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11490                                      " modifying an existing nic",
11491                                      errors.ECODE_INVAL)
11492
11493     if nic_addremove > 1:
11494       raise errors.OpPrereqError("Only one NIC add or remove operation"
11495                                  " supported at a time", errors.ECODE_INVAL)
11496
11497   def ExpandNames(self):
11498     self._ExpandAndLockInstance()
11499     # Can't even acquire node locks in shared mode as upcoming changes in
11500     # Ganeti 2.6 will start to modify the node object on disk conversion
11501     self.needed_locks[locking.LEVEL_NODE] = []
11502     self.needed_locks[locking.LEVEL_NODE_RES] = []
11503     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11504
11505   def DeclareLocks(self, level):
11506     if level == locking.LEVEL_NODE:
11507       self._LockInstancesNodes()
11508       if self.op.disk_template and self.op.remote_node:
11509         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11510         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11511     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11512       # Copy node locks
11513       self.needed_locks[locking.LEVEL_NODE_RES] = \
11514         self.needed_locks[locking.LEVEL_NODE][:]
11515
11516   def BuildHooksEnv(self):
11517     """Build hooks env.
11518
11519     This runs on the master, primary and secondaries.
11520
11521     """
11522     args = dict()
11523     if constants.BE_MINMEM in self.be_new:
11524       args["minmem"] = self.be_new[constants.BE_MINMEM]
11525     if constants.BE_MAXMEM in self.be_new:
11526       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11527     if constants.BE_VCPUS in self.be_new:
11528       args["vcpus"] = self.be_new[constants.BE_VCPUS]
11529     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11530     # information at all.
11531     if self.op.nics:
11532       args["nics"] = []
11533       nic_override = dict(self.op.nics)
11534       for idx, nic in enumerate(self.instance.nics):
11535         if idx in nic_override:
11536           this_nic_override = nic_override[idx]
11537         else:
11538           this_nic_override = {}
11539         if constants.INIC_IP in this_nic_override:
11540           ip = this_nic_override[constants.INIC_IP]
11541         else:
11542           ip = nic.ip
11543         if constants.INIC_MAC in this_nic_override:
11544           mac = this_nic_override[constants.INIC_MAC]
11545         else:
11546           mac = nic.mac
11547         if idx in self.nic_pnew:
11548           nicparams = self.nic_pnew[idx]
11549         else:
11550           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11551         mode = nicparams[constants.NIC_MODE]
11552         link = nicparams[constants.NIC_LINK]
11553         args["nics"].append((ip, mac, mode, link))
11554       if constants.DDM_ADD in nic_override:
11555         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11556         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11557         nicparams = self.nic_pnew[constants.DDM_ADD]
11558         mode = nicparams[constants.NIC_MODE]
11559         link = nicparams[constants.NIC_LINK]
11560         args["nics"].append((ip, mac, mode, link))
11561       elif constants.DDM_REMOVE in nic_override:
11562         del args["nics"][-1]
11563
11564     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11565     if self.op.disk_template:
11566       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11567
11568     return env
11569
11570   def BuildHooksNodes(self):
11571     """Build hooks nodes.
11572
11573     """
11574     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11575     return (nl, nl)
11576
11577   def CheckPrereq(self):
11578     """Check prerequisites.
11579
11580     This only checks the instance list against the existing names.
11581
11582     """
11583     # checking the new params on the primary/secondary nodes
11584
11585     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11586     cluster = self.cluster = self.cfg.GetClusterInfo()
11587     assert self.instance is not None, \
11588       "Cannot retrieve locked instance %s" % self.op.instance_name
11589     pnode = instance.primary_node
11590     nodelist = list(instance.all_nodes)
11591     pnode_info = self.cfg.GetNodeInfo(pnode)
11592     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11593
11594     # OS change
11595     if self.op.os_name and not self.op.force:
11596       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11597                       self.op.force_variant)
11598       instance_os = self.op.os_name
11599     else:
11600       instance_os = instance.os
11601
11602     if self.op.disk_template:
11603       if instance.disk_template == self.op.disk_template:
11604         raise errors.OpPrereqError("Instance already has disk template %s" %
11605                                    instance.disk_template, errors.ECODE_INVAL)
11606
11607       if (instance.disk_template,
11608           self.op.disk_template) not in self._DISK_CONVERSIONS:
11609         raise errors.OpPrereqError("Unsupported disk template conversion from"
11610                                    " %s to %s" % (instance.disk_template,
11611                                                   self.op.disk_template),
11612                                    errors.ECODE_INVAL)
11613       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11614                           msg="cannot change disk template")
11615       if self.op.disk_template in constants.DTS_INT_MIRROR:
11616         if self.op.remote_node == pnode:
11617           raise errors.OpPrereqError("Given new secondary node %s is the same"
11618                                      " as the primary node of the instance" %
11619                                      self.op.remote_node, errors.ECODE_STATE)
11620         _CheckNodeOnline(self, self.op.remote_node)
11621         _CheckNodeNotDrained(self, self.op.remote_node)
11622         # FIXME: here we assume that the old instance type is DT_PLAIN
11623         assert instance.disk_template == constants.DT_PLAIN
11624         disks = [{constants.IDISK_SIZE: d.size,
11625                   constants.IDISK_VG: d.logical_id[0]}
11626                  for d in instance.disks]
11627         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11628         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11629
11630         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11631         if pnode_info.group != snode_info.group:
11632           self.LogWarning("The primary and secondary nodes are in two"
11633                           " different node groups; the disk parameters"
11634                           " from the first disk's node group will be"
11635                           " used")
11636
11637     # hvparams processing
11638     if self.op.hvparams:
11639       hv_type = instance.hypervisor
11640       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11641       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11642       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11643
11644       # local check
11645       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11646       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11647       self.hv_proposed = self.hv_new = hv_new # the new actual values
11648       self.hv_inst = i_hvdict # the new dict (without defaults)
11649     else:
11650       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11651                                               instance.hvparams)
11652       self.hv_new = self.hv_inst = {}
11653
11654     # beparams processing
11655     if self.op.beparams:
11656       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11657                                    use_none=True)
11658       objects.UpgradeBeParams(i_bedict)
11659       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11660       be_new = cluster.SimpleFillBE(i_bedict)
11661       self.be_proposed = self.be_new = be_new # the new actual values
11662       self.be_inst = i_bedict # the new dict (without defaults)
11663     else:
11664       self.be_new = self.be_inst = {}
11665       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11666     be_old = cluster.FillBE(instance)
11667
11668     # CPU param validation -- checking every time a paramtere is
11669     # changed to cover all cases where either CPU mask or vcpus have
11670     # changed
11671     if (constants.BE_VCPUS in self.be_proposed and
11672         constants.HV_CPU_MASK in self.hv_proposed):
11673       cpu_list = \
11674         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11675       # Verify mask is consistent with number of vCPUs. Can skip this
11676       # test if only 1 entry in the CPU mask, which means same mask
11677       # is applied to all vCPUs.
11678       if (len(cpu_list) > 1 and
11679           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11680         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11681                                    " CPU mask [%s]" %
11682                                    (self.be_proposed[constants.BE_VCPUS],
11683                                     self.hv_proposed[constants.HV_CPU_MASK]),
11684                                    errors.ECODE_INVAL)
11685
11686       # Only perform this test if a new CPU mask is given
11687       if constants.HV_CPU_MASK in self.hv_new:
11688         # Calculate the largest CPU number requested
11689         max_requested_cpu = max(map(max, cpu_list))
11690         # Check that all of the instance's nodes have enough physical CPUs to
11691         # satisfy the requested CPU mask
11692         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11693                                 max_requested_cpu + 1, instance.hypervisor)
11694
11695     # osparams processing
11696     if self.op.osparams:
11697       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11698       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11699       self.os_inst = i_osdict # the new dict (without defaults)
11700     else:
11701       self.os_inst = {}
11702
11703     self.warn = []
11704
11705     #TODO(dynmem): do the appropriate check involving MINMEM
11706     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11707         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11708       mem_check_list = [pnode]
11709       if be_new[constants.BE_AUTO_BALANCE]:
11710         # either we changed auto_balance to yes or it was from before
11711         mem_check_list.extend(instance.secondary_nodes)
11712       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11713                                                   instance.hypervisor)
11714       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11715                                          [instance.hypervisor])
11716       pninfo = nodeinfo[pnode]
11717       msg = pninfo.fail_msg
11718       if msg:
11719         # Assume the primary node is unreachable and go ahead
11720         self.warn.append("Can't get info from primary node %s: %s" %
11721                          (pnode, msg))
11722       else:
11723         (_, _, (pnhvinfo, )) = pninfo.payload
11724         if not isinstance(pnhvinfo.get("memory_free", None), int):
11725           self.warn.append("Node data from primary node %s doesn't contain"
11726                            " free memory information" % pnode)
11727         elif instance_info.fail_msg:
11728           self.warn.append("Can't get instance runtime information: %s" %
11729                           instance_info.fail_msg)
11730         else:
11731           if instance_info.payload:
11732             current_mem = int(instance_info.payload["memory"])
11733           else:
11734             # Assume instance not running
11735             # (there is a slight race condition here, but it's not very
11736             # probable, and we have no other way to check)
11737             # TODO: Describe race condition
11738             current_mem = 0
11739           #TODO(dynmem): do the appropriate check involving MINMEM
11740           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11741                       pnhvinfo["memory_free"])
11742           if miss_mem > 0:
11743             raise errors.OpPrereqError("This change will prevent the instance"
11744                                        " from starting, due to %d MB of memory"
11745                                        " missing on its primary node" %
11746                                        miss_mem,
11747                                        errors.ECODE_NORES)
11748
11749       if be_new[constants.BE_AUTO_BALANCE]:
11750         for node, nres in nodeinfo.items():
11751           if node not in instance.secondary_nodes:
11752             continue
11753           nres.Raise("Can't get info from secondary node %s" % node,
11754                      prereq=True, ecode=errors.ECODE_STATE)
11755           (_, _, (nhvinfo, )) = nres.payload
11756           if not isinstance(nhvinfo.get("memory_free", None), int):
11757             raise errors.OpPrereqError("Secondary node %s didn't return free"
11758                                        " memory information" % node,
11759                                        errors.ECODE_STATE)
11760           #TODO(dynmem): do the appropriate check involving MINMEM
11761           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11762             raise errors.OpPrereqError("This change will prevent the instance"
11763                                        " from failover to its secondary node"
11764                                        " %s, due to not enough memory" % node,
11765                                        errors.ECODE_STATE)
11766
11767     # NIC processing
11768     self.nic_pnew = {}
11769     self.nic_pinst = {}
11770     for nic_op, nic_dict in self.op.nics:
11771       if nic_op == constants.DDM_REMOVE:
11772         if not instance.nics:
11773           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11774                                      errors.ECODE_INVAL)
11775         continue
11776       if nic_op != constants.DDM_ADD:
11777         # an existing nic
11778         if not instance.nics:
11779           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11780                                      " no NICs" % nic_op,
11781                                      errors.ECODE_INVAL)
11782         if nic_op < 0 or nic_op >= len(instance.nics):
11783           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11784                                      " are 0 to %d" %
11785                                      (nic_op, len(instance.nics) - 1),
11786                                      errors.ECODE_INVAL)
11787         old_nic_params = instance.nics[nic_op].nicparams
11788         old_nic_ip = instance.nics[nic_op].ip
11789       else:
11790         old_nic_params = {}
11791         old_nic_ip = None
11792
11793       update_params_dict = dict([(key, nic_dict[key])
11794                                  for key in constants.NICS_PARAMETERS
11795                                  if key in nic_dict])
11796
11797       if "bridge" in nic_dict:
11798         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11799
11800       new_nic_params = _GetUpdatedParams(old_nic_params,
11801                                          update_params_dict)
11802       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11803       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11804       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11805       self.nic_pinst[nic_op] = new_nic_params
11806       self.nic_pnew[nic_op] = new_filled_nic_params
11807       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11808
11809       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11810         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11811         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11812         if msg:
11813           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11814           if self.op.force:
11815             self.warn.append(msg)
11816           else:
11817             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11818       if new_nic_mode == constants.NIC_MODE_ROUTED:
11819         if constants.INIC_IP in nic_dict:
11820           nic_ip = nic_dict[constants.INIC_IP]
11821         else:
11822           nic_ip = old_nic_ip
11823         if nic_ip is None:
11824           raise errors.OpPrereqError("Cannot set the nic ip to None"
11825                                      " on a routed nic", errors.ECODE_INVAL)
11826       if constants.INIC_MAC in nic_dict:
11827         nic_mac = nic_dict[constants.INIC_MAC]
11828         if nic_mac is None:
11829           raise errors.OpPrereqError("Cannot set the nic mac to None",
11830                                      errors.ECODE_INVAL)
11831         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11832           # otherwise generate the mac
11833           nic_dict[constants.INIC_MAC] = \
11834             self.cfg.GenerateMAC(self.proc.GetECId())
11835         else:
11836           # or validate/reserve the current one
11837           try:
11838             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11839           except errors.ReservationError:
11840             raise errors.OpPrereqError("MAC address %s already in use"
11841                                        " in cluster" % nic_mac,
11842                                        errors.ECODE_NOTUNIQUE)
11843
11844     # DISK processing
11845     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11846       raise errors.OpPrereqError("Disk operations not supported for"
11847                                  " diskless instances",
11848                                  errors.ECODE_INVAL)
11849     for disk_op, _ in self.op.disks:
11850       if disk_op == constants.DDM_REMOVE:
11851         if len(instance.disks) == 1:
11852           raise errors.OpPrereqError("Cannot remove the last disk of"
11853                                      " an instance", errors.ECODE_INVAL)
11854         _CheckInstanceState(self, instance, INSTANCE_DOWN,
11855                             msg="cannot remove disks")
11856
11857       if (disk_op == constants.DDM_ADD and
11858           len(instance.disks) >= constants.MAX_DISKS):
11859         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11860                                    " add more" % constants.MAX_DISKS,
11861                                    errors.ECODE_STATE)
11862       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11863         # an existing disk
11864         if disk_op < 0 or disk_op >= len(instance.disks):
11865           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11866                                      " are 0 to %d" %
11867                                      (disk_op, len(instance.disks)),
11868                                      errors.ECODE_INVAL)
11869
11870     # disabling the instance
11871     if self.op.offline_inst:
11872       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11873                           msg="cannot change instance state to offline")
11874
11875     # enabling the instance
11876     if self.op.online_inst:
11877       _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11878                           msg="cannot make instance go online")
11879
11880   def _ConvertPlainToDrbd(self, feedback_fn):
11881     """Converts an instance from plain to drbd.
11882
11883     """
11884     feedback_fn("Converting template to drbd")
11885     instance = self.instance
11886     pnode = instance.primary_node
11887     snode = self.op.remote_node
11888
11889     assert instance.disk_template == constants.DT_PLAIN
11890
11891     # create a fake disk info for _GenerateDiskTemplate
11892     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11893                   constants.IDISK_VG: d.logical_id[0]}
11894                  for d in instance.disks]
11895     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11896                                       instance.name, pnode, [snode],
11897                                       disk_info, None, None, 0, feedback_fn,
11898                                       self.diskparams)
11899     info = _GetInstanceInfoText(instance)
11900     feedback_fn("Creating aditional volumes...")
11901     # first, create the missing data and meta devices
11902     for disk in new_disks:
11903       # unfortunately this is... not too nice
11904       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11905                             info, True)
11906       for child in disk.children:
11907         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11908     # at this stage, all new LVs have been created, we can rename the
11909     # old ones
11910     feedback_fn("Renaming original volumes...")
11911     rename_list = [(o, n.children[0].logical_id)
11912                    for (o, n) in zip(instance.disks, new_disks)]
11913     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11914     result.Raise("Failed to rename original LVs")
11915
11916     feedback_fn("Initializing DRBD devices...")
11917     # all child devices are in place, we can now create the DRBD devices
11918     for disk in new_disks:
11919       for node in [pnode, snode]:
11920         f_create = node == pnode
11921         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11922
11923     # at this point, the instance has been modified
11924     instance.disk_template = constants.DT_DRBD8
11925     instance.disks = new_disks
11926     self.cfg.Update(instance, feedback_fn)
11927
11928     # Release node locks while waiting for sync
11929     _ReleaseLocks(self, locking.LEVEL_NODE)
11930
11931     # disks are created, waiting for sync
11932     disk_abort = not _WaitForSync(self, instance,
11933                                   oneshot=not self.op.wait_for_sync)
11934     if disk_abort:
11935       raise errors.OpExecError("There are some degraded disks for"
11936                                " this instance, please cleanup manually")
11937
11938     # Node resource locks will be released by caller
11939
11940   def _ConvertDrbdToPlain(self, feedback_fn):
11941     """Converts an instance from drbd to plain.
11942
11943     """
11944     instance = self.instance
11945
11946     assert len(instance.secondary_nodes) == 1
11947     assert instance.disk_template == constants.DT_DRBD8
11948
11949     pnode = instance.primary_node
11950     snode = instance.secondary_nodes[0]
11951     feedback_fn("Converting template to plain")
11952
11953     old_disks = instance.disks
11954     new_disks = [d.children[0] for d in old_disks]
11955
11956     # copy over size and mode
11957     for parent, child in zip(old_disks, new_disks):
11958       child.size = parent.size
11959       child.mode = parent.mode
11960
11961     # update instance structure
11962     instance.disks = new_disks
11963     instance.disk_template = constants.DT_PLAIN
11964     self.cfg.Update(instance, feedback_fn)
11965
11966     # Release locks in case removing disks takes a while
11967     _ReleaseLocks(self, locking.LEVEL_NODE)
11968
11969     feedback_fn("Removing volumes on the secondary node...")
11970     for disk in old_disks:
11971       self.cfg.SetDiskID(disk, snode)
11972       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11973       if msg:
11974         self.LogWarning("Could not remove block device %s on node %s,"
11975                         " continuing anyway: %s", disk.iv_name, snode, msg)
11976
11977     feedback_fn("Removing unneeded volumes on the primary node...")
11978     for idx, disk in enumerate(old_disks):
11979       meta = disk.children[1]
11980       self.cfg.SetDiskID(meta, pnode)
11981       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11982       if msg:
11983         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11984                         " continuing anyway: %s", idx, pnode, msg)
11985
11986     # this is a DRBD disk, return its port to the pool
11987     for disk in old_disks:
11988       tcp_port = disk.logical_id[2]
11989       self.cfg.AddTcpUdpPort(tcp_port)
11990
11991     # Node resource locks will be released by caller
11992
11993   def Exec(self, feedback_fn):
11994     """Modifies an instance.
11995
11996     All parameters take effect only at the next restart of the instance.
11997
11998     """
11999     # Process here the warnings from CheckPrereq, as we don't have a
12000     # feedback_fn there.
12001     for warn in self.warn:
12002       feedback_fn("WARNING: %s" % warn)
12003
12004     assert ((self.op.disk_template is None) ^
12005             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12006       "Not owning any node resource locks"
12007
12008     result = []
12009     instance = self.instance
12010     # disk changes
12011     for disk_op, disk_dict in self.op.disks:
12012       if disk_op == constants.DDM_REMOVE:
12013         # remove the last disk
12014         device = instance.disks.pop()
12015         device_idx = len(instance.disks)
12016         for node, disk in device.ComputeNodeTree(instance.primary_node):
12017           self.cfg.SetDiskID(disk, node)
12018           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12019           if msg:
12020             self.LogWarning("Could not remove disk/%d on node %s: %s,"
12021                             " continuing anyway", device_idx, node, msg)
12022         result.append(("disk/%d" % device_idx, "remove"))
12023
12024         # if this is a DRBD disk, return its port to the pool
12025         if device.dev_type in constants.LDS_DRBD:
12026           tcp_port = device.logical_id[2]
12027           self.cfg.AddTcpUdpPort(tcp_port)
12028       elif disk_op == constants.DDM_ADD:
12029         # add a new disk
12030         if instance.disk_template in (constants.DT_FILE,
12031                                         constants.DT_SHARED_FILE):
12032           file_driver, file_path = instance.disks[0].logical_id
12033           file_path = os.path.dirname(file_path)
12034         else:
12035           file_driver = file_path = None
12036         disk_idx_base = len(instance.disks)
12037         new_disk = _GenerateDiskTemplate(self,
12038                                          instance.disk_template,
12039                                          instance.name, instance.primary_node,
12040                                          instance.secondary_nodes,
12041                                          [disk_dict],
12042                                          file_path,
12043                                          file_driver,
12044                                          disk_idx_base,
12045                                          feedback_fn,
12046                                          self.diskparams)[0]
12047         instance.disks.append(new_disk)
12048         info = _GetInstanceInfoText(instance)
12049
12050         logging.info("Creating volume %s for instance %s",
12051                      new_disk.iv_name, instance.name)
12052         # Note: this needs to be kept in sync with _CreateDisks
12053         #HARDCODE
12054         for node in instance.all_nodes:
12055           f_create = node == instance.primary_node
12056           try:
12057             _CreateBlockDev(self, node, instance, new_disk,
12058                             f_create, info, f_create)
12059           except errors.OpExecError, err:
12060             self.LogWarning("Failed to create volume %s (%s) on"
12061                             " node %s: %s",
12062                             new_disk.iv_name, new_disk, node, err)
12063         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12064                        (new_disk.size, new_disk.mode)))
12065       else:
12066         # change a given disk
12067         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12068         result.append(("disk.mode/%d" % disk_op,
12069                        disk_dict[constants.IDISK_MODE]))
12070
12071     if self.op.disk_template:
12072       if __debug__:
12073         check_nodes = set(instance.all_nodes)
12074         if self.op.remote_node:
12075           check_nodes.add(self.op.remote_node)
12076         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12077           owned = self.owned_locks(level)
12078           assert not (check_nodes - owned), \
12079             ("Not owning the correct locks, owning %r, expected at least %r" %
12080              (owned, check_nodes))
12081
12082       r_shut = _ShutdownInstanceDisks(self, instance)
12083       if not r_shut:
12084         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12085                                  " proceed with disk template conversion")
12086       mode = (instance.disk_template, self.op.disk_template)
12087       try:
12088         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12089       except:
12090         self.cfg.ReleaseDRBDMinors(instance.name)
12091         raise
12092       result.append(("disk_template", self.op.disk_template))
12093
12094       assert instance.disk_template == self.op.disk_template, \
12095         ("Expected disk template '%s', found '%s'" %
12096          (self.op.disk_template, instance.disk_template))
12097
12098     # Release node and resource locks if there are any (they might already have
12099     # been released during disk conversion)
12100     _ReleaseLocks(self, locking.LEVEL_NODE)
12101     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12102
12103     # NIC changes
12104     for nic_op, nic_dict in self.op.nics:
12105       if nic_op == constants.DDM_REMOVE:
12106         # remove the last nic
12107         del instance.nics[-1]
12108         result.append(("nic.%d" % len(instance.nics), "remove"))
12109       elif nic_op == constants.DDM_ADD:
12110         # mac and bridge should be set, by now
12111         mac = nic_dict[constants.INIC_MAC]
12112         ip = nic_dict.get(constants.INIC_IP, None)
12113         nicparams = self.nic_pinst[constants.DDM_ADD]
12114         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12115         instance.nics.append(new_nic)
12116         result.append(("nic.%d" % (len(instance.nics) - 1),
12117                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12118                        (new_nic.mac, new_nic.ip,
12119                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12120                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12121                        )))
12122       else:
12123         for key in (constants.INIC_MAC, constants.INIC_IP):
12124           if key in nic_dict:
12125             setattr(instance.nics[nic_op], key, nic_dict[key])
12126         if nic_op in self.nic_pinst:
12127           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12128         for key, val in nic_dict.iteritems():
12129           result.append(("nic.%s/%d" % (key, nic_op), val))
12130
12131     # hvparams changes
12132     if self.op.hvparams:
12133       instance.hvparams = self.hv_inst
12134       for key, val in self.op.hvparams.iteritems():
12135         result.append(("hv/%s" % key, val))
12136
12137     # beparams changes
12138     if self.op.beparams:
12139       instance.beparams = self.be_inst
12140       for key, val in self.op.beparams.iteritems():
12141         result.append(("be/%s" % key, val))
12142
12143     # OS change
12144     if self.op.os_name:
12145       instance.os = self.op.os_name
12146
12147     # osparams changes
12148     if self.op.osparams:
12149       instance.osparams = self.os_inst
12150       for key, val in self.op.osparams.iteritems():
12151         result.append(("os/%s" % key, val))
12152
12153     # online/offline instance
12154     if self.op.online_inst:
12155       self.cfg.MarkInstanceDown(instance.name)
12156       result.append(("admin_state", constants.ADMINST_DOWN))
12157     if self.op.offline_inst:
12158       self.cfg.MarkInstanceOffline(instance.name)
12159       result.append(("admin_state", constants.ADMINST_OFFLINE))
12160
12161     self.cfg.Update(instance, feedback_fn)
12162
12163     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12164                 self.owned_locks(locking.LEVEL_NODE)), \
12165       "All node locks should have been released by now"
12166
12167     return result
12168
12169   _DISK_CONVERSIONS = {
12170     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12171     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12172     }
12173
12174
12175 class LUInstanceChangeGroup(LogicalUnit):
12176   HPATH = "instance-change-group"
12177   HTYPE = constants.HTYPE_INSTANCE
12178   REQ_BGL = False
12179
12180   def ExpandNames(self):
12181     self.share_locks = _ShareAll()
12182     self.needed_locks = {
12183       locking.LEVEL_NODEGROUP: [],
12184       locking.LEVEL_NODE: [],
12185       }
12186
12187     self._ExpandAndLockInstance()
12188
12189     if self.op.target_groups:
12190       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12191                                   self.op.target_groups)
12192     else:
12193       self.req_target_uuids = None
12194
12195     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12196
12197   def DeclareLocks(self, level):
12198     if level == locking.LEVEL_NODEGROUP:
12199       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12200
12201       if self.req_target_uuids:
12202         lock_groups = set(self.req_target_uuids)
12203
12204         # Lock all groups used by instance optimistically; this requires going
12205         # via the node before it's locked, requiring verification later on
12206         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12207         lock_groups.update(instance_groups)
12208       else:
12209         # No target groups, need to lock all of them
12210         lock_groups = locking.ALL_SET
12211
12212       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12213
12214     elif level == locking.LEVEL_NODE:
12215       if self.req_target_uuids:
12216         # Lock all nodes used by instances
12217         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12218         self._LockInstancesNodes()
12219
12220         # Lock all nodes in all potential target groups
12221         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12222                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12223         member_nodes = [node_name
12224                         for group in lock_groups
12225                         for node_name in self.cfg.GetNodeGroup(group).members]
12226         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12227       else:
12228         # Lock all nodes as all groups are potential targets
12229         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12230
12231   def CheckPrereq(self):
12232     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12233     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12234     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12235
12236     assert (self.req_target_uuids is None or
12237             owned_groups.issuperset(self.req_target_uuids))
12238     assert owned_instances == set([self.op.instance_name])
12239
12240     # Get instance information
12241     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12242
12243     # Check if node groups for locked instance are still correct
12244     assert owned_nodes.issuperset(self.instance.all_nodes), \
12245       ("Instance %s's nodes changed while we kept the lock" %
12246        self.op.instance_name)
12247
12248     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12249                                            owned_groups)
12250
12251     if self.req_target_uuids:
12252       # User requested specific target groups
12253       self.target_uuids = self.req_target_uuids
12254     else:
12255       # All groups except those used by the instance are potential targets
12256       self.target_uuids = owned_groups - inst_groups
12257
12258     conflicting_groups = self.target_uuids & inst_groups
12259     if conflicting_groups:
12260       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12261                                  " used by the instance '%s'" %
12262                                  (utils.CommaJoin(conflicting_groups),
12263                                   self.op.instance_name),
12264                                  errors.ECODE_INVAL)
12265
12266     if not self.target_uuids:
12267       raise errors.OpPrereqError("There are no possible target groups",
12268                                  errors.ECODE_INVAL)
12269
12270   def BuildHooksEnv(self):
12271     """Build hooks env.
12272
12273     """
12274     assert self.target_uuids
12275
12276     env = {
12277       "TARGET_GROUPS": " ".join(self.target_uuids),
12278       }
12279
12280     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12281
12282     return env
12283
12284   def BuildHooksNodes(self):
12285     """Build hooks nodes.
12286
12287     """
12288     mn = self.cfg.GetMasterNode()
12289     return ([mn], [mn])
12290
12291   def Exec(self, feedback_fn):
12292     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12293
12294     assert instances == [self.op.instance_name], "Instance not locked"
12295
12296     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12297                      instances=instances, target_groups=list(self.target_uuids))
12298
12299     ial.Run(self.op.iallocator)
12300
12301     if not ial.success:
12302       raise errors.OpPrereqError("Can't compute solution for changing group of"
12303                                  " instance '%s' using iallocator '%s': %s" %
12304                                  (self.op.instance_name, self.op.iallocator,
12305                                   ial.info),
12306                                  errors.ECODE_NORES)
12307
12308     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12309
12310     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12311                  " instance '%s'", len(jobs), self.op.instance_name)
12312
12313     return ResultWithJobs(jobs)
12314
12315
12316 class LUBackupQuery(NoHooksLU):
12317   """Query the exports list
12318
12319   """
12320   REQ_BGL = False
12321
12322   def ExpandNames(self):
12323     self.needed_locks = {}
12324     self.share_locks[locking.LEVEL_NODE] = 1
12325     if not self.op.nodes:
12326       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12327     else:
12328       self.needed_locks[locking.LEVEL_NODE] = \
12329         _GetWantedNodes(self, self.op.nodes)
12330
12331   def Exec(self, feedback_fn):
12332     """Compute the list of all the exported system images.
12333
12334     @rtype: dict
12335     @return: a dictionary with the structure node->(export-list)
12336         where export-list is a list of the instances exported on
12337         that node.
12338
12339     """
12340     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12341     rpcresult = self.rpc.call_export_list(self.nodes)
12342     result = {}
12343     for node in rpcresult:
12344       if rpcresult[node].fail_msg:
12345         result[node] = False
12346       else:
12347         result[node] = rpcresult[node].payload
12348
12349     return result
12350
12351
12352 class LUBackupPrepare(NoHooksLU):
12353   """Prepares an instance for an export and returns useful information.
12354
12355   """
12356   REQ_BGL = False
12357
12358   def ExpandNames(self):
12359     self._ExpandAndLockInstance()
12360
12361   def CheckPrereq(self):
12362     """Check prerequisites.
12363
12364     """
12365     instance_name = self.op.instance_name
12366
12367     self.instance = self.cfg.GetInstanceInfo(instance_name)
12368     assert self.instance is not None, \
12369           "Cannot retrieve locked instance %s" % self.op.instance_name
12370     _CheckNodeOnline(self, self.instance.primary_node)
12371
12372     self._cds = _GetClusterDomainSecret()
12373
12374   def Exec(self, feedback_fn):
12375     """Prepares an instance for an export.
12376
12377     """
12378     instance = self.instance
12379
12380     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12381       salt = utils.GenerateSecret(8)
12382
12383       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12384       result = self.rpc.call_x509_cert_create(instance.primary_node,
12385                                               constants.RIE_CERT_VALIDITY)
12386       result.Raise("Can't create X509 key and certificate on %s" % result.node)
12387
12388       (name, cert_pem) = result.payload
12389
12390       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12391                                              cert_pem)
12392
12393       return {
12394         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12395         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12396                           salt),
12397         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12398         }
12399
12400     return None
12401
12402
12403 class LUBackupExport(LogicalUnit):
12404   """Export an instance to an image in the cluster.
12405
12406   """
12407   HPATH = "instance-export"
12408   HTYPE = constants.HTYPE_INSTANCE
12409   REQ_BGL = False
12410
12411   def CheckArguments(self):
12412     """Check the arguments.
12413
12414     """
12415     self.x509_key_name = self.op.x509_key_name
12416     self.dest_x509_ca_pem = self.op.destination_x509_ca
12417
12418     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12419       if not self.x509_key_name:
12420         raise errors.OpPrereqError("Missing X509 key name for encryption",
12421                                    errors.ECODE_INVAL)
12422
12423       if not self.dest_x509_ca_pem:
12424         raise errors.OpPrereqError("Missing destination X509 CA",
12425                                    errors.ECODE_INVAL)
12426
12427   def ExpandNames(self):
12428     self._ExpandAndLockInstance()
12429
12430     # Lock all nodes for local exports
12431     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12432       # FIXME: lock only instance primary and destination node
12433       #
12434       # Sad but true, for now we have do lock all nodes, as we don't know where
12435       # the previous export might be, and in this LU we search for it and
12436       # remove it from its current node. In the future we could fix this by:
12437       #  - making a tasklet to search (share-lock all), then create the
12438       #    new one, then one to remove, after
12439       #  - removing the removal operation altogether
12440       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12441
12442   def DeclareLocks(self, level):
12443     """Last minute lock declaration."""
12444     # All nodes are locked anyway, so nothing to do here.
12445
12446   def BuildHooksEnv(self):
12447     """Build hooks env.
12448
12449     This will run on the master, primary node and target node.
12450
12451     """
12452     env = {
12453       "EXPORT_MODE": self.op.mode,
12454       "EXPORT_NODE": self.op.target_node,
12455       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12456       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12457       # TODO: Generic function for boolean env variables
12458       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12459       }
12460
12461     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12462
12463     return env
12464
12465   def BuildHooksNodes(self):
12466     """Build hooks nodes.
12467
12468     """
12469     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12470
12471     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12472       nl.append(self.op.target_node)
12473
12474     return (nl, nl)
12475
12476   def CheckPrereq(self):
12477     """Check prerequisites.
12478
12479     This checks that the instance and node names are valid.
12480
12481     """
12482     instance_name = self.op.instance_name
12483
12484     self.instance = self.cfg.GetInstanceInfo(instance_name)
12485     assert self.instance is not None, \
12486           "Cannot retrieve locked instance %s" % self.op.instance_name
12487     _CheckNodeOnline(self, self.instance.primary_node)
12488
12489     if (self.op.remove_instance and
12490         self.instance.admin_state == constants.ADMINST_UP and
12491         not self.op.shutdown):
12492       raise errors.OpPrereqError("Can not remove instance without shutting it"
12493                                  " down before")
12494
12495     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12496       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12497       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12498       assert self.dst_node is not None
12499
12500       _CheckNodeOnline(self, self.dst_node.name)
12501       _CheckNodeNotDrained(self, self.dst_node.name)
12502
12503       self._cds = None
12504       self.dest_disk_info = None
12505       self.dest_x509_ca = None
12506
12507     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12508       self.dst_node = None
12509
12510       if len(self.op.target_node) != len(self.instance.disks):
12511         raise errors.OpPrereqError(("Received destination information for %s"
12512                                     " disks, but instance %s has %s disks") %
12513                                    (len(self.op.target_node), instance_name,
12514                                     len(self.instance.disks)),
12515                                    errors.ECODE_INVAL)
12516
12517       cds = _GetClusterDomainSecret()
12518
12519       # Check X509 key name
12520       try:
12521         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12522       except (TypeError, ValueError), err:
12523         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12524
12525       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12526         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12527                                    errors.ECODE_INVAL)
12528
12529       # Load and verify CA
12530       try:
12531         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12532       except OpenSSL.crypto.Error, err:
12533         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12534                                    (err, ), errors.ECODE_INVAL)
12535
12536       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12537       if errcode is not None:
12538         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12539                                    (msg, ), errors.ECODE_INVAL)
12540
12541       self.dest_x509_ca = cert
12542
12543       # Verify target information
12544       disk_info = []
12545       for idx, disk_data in enumerate(self.op.target_node):
12546         try:
12547           (host, port, magic) = \
12548             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12549         except errors.GenericError, err:
12550           raise errors.OpPrereqError("Target info for disk %s: %s" %
12551                                      (idx, err), errors.ECODE_INVAL)
12552
12553         disk_info.append((host, port, magic))
12554
12555       assert len(disk_info) == len(self.op.target_node)
12556       self.dest_disk_info = disk_info
12557
12558     else:
12559       raise errors.ProgrammerError("Unhandled export mode %r" %
12560                                    self.op.mode)
12561
12562     # instance disk type verification
12563     # TODO: Implement export support for file-based disks
12564     for disk in self.instance.disks:
12565       if disk.dev_type == constants.LD_FILE:
12566         raise errors.OpPrereqError("Export not supported for instances with"
12567                                    " file-based disks", errors.ECODE_INVAL)
12568
12569   def _CleanupExports(self, feedback_fn):
12570     """Removes exports of current instance from all other nodes.
12571
12572     If an instance in a cluster with nodes A..D was exported to node C, its
12573     exports will be removed from the nodes A, B and D.
12574
12575     """
12576     assert self.op.mode != constants.EXPORT_MODE_REMOTE
12577
12578     nodelist = self.cfg.GetNodeList()
12579     nodelist.remove(self.dst_node.name)
12580
12581     # on one-node clusters nodelist will be empty after the removal
12582     # if we proceed the backup would be removed because OpBackupQuery
12583     # substitutes an empty list with the full cluster node list.
12584     iname = self.instance.name
12585     if nodelist:
12586       feedback_fn("Removing old exports for instance %s" % iname)
12587       exportlist = self.rpc.call_export_list(nodelist)
12588       for node in exportlist:
12589         if exportlist[node].fail_msg:
12590           continue
12591         if iname in exportlist[node].payload:
12592           msg = self.rpc.call_export_remove(node, iname).fail_msg
12593           if msg:
12594             self.LogWarning("Could not remove older export for instance %s"
12595                             " on node %s: %s", iname, node, msg)
12596
12597   def Exec(self, feedback_fn):
12598     """Export an instance to an image in the cluster.
12599
12600     """
12601     assert self.op.mode in constants.EXPORT_MODES
12602
12603     instance = self.instance
12604     src_node = instance.primary_node
12605
12606     if self.op.shutdown:
12607       # shutdown the instance, but not the disks
12608       feedback_fn("Shutting down instance %s" % instance.name)
12609       result = self.rpc.call_instance_shutdown(src_node, instance,
12610                                                self.op.shutdown_timeout)
12611       # TODO: Maybe ignore failures if ignore_remove_failures is set
12612       result.Raise("Could not shutdown instance %s on"
12613                    " node %s" % (instance.name, src_node))
12614
12615     # set the disks ID correctly since call_instance_start needs the
12616     # correct drbd minor to create the symlinks
12617     for disk in instance.disks:
12618       self.cfg.SetDiskID(disk, src_node)
12619
12620     activate_disks = (instance.admin_state != constants.ADMINST_UP)
12621
12622     if activate_disks:
12623       # Activate the instance disks if we'exporting a stopped instance
12624       feedback_fn("Activating disks for %s" % instance.name)
12625       _StartInstanceDisks(self, instance, None)
12626
12627     try:
12628       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12629                                                      instance)
12630
12631       helper.CreateSnapshots()
12632       try:
12633         if (self.op.shutdown and
12634             instance.admin_state == constants.ADMINST_UP and
12635             not self.op.remove_instance):
12636           assert not activate_disks
12637           feedback_fn("Starting instance %s" % instance.name)
12638           result = self.rpc.call_instance_start(src_node,
12639                                                 (instance, None, None), False)
12640           msg = result.fail_msg
12641           if msg:
12642             feedback_fn("Failed to start instance: %s" % msg)
12643             _ShutdownInstanceDisks(self, instance)
12644             raise errors.OpExecError("Could not start instance: %s" % msg)
12645
12646         if self.op.mode == constants.EXPORT_MODE_LOCAL:
12647           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12648         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12649           connect_timeout = constants.RIE_CONNECT_TIMEOUT
12650           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12651
12652           (key_name, _, _) = self.x509_key_name
12653
12654           dest_ca_pem = \
12655             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12656                                             self.dest_x509_ca)
12657
12658           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12659                                                      key_name, dest_ca_pem,
12660                                                      timeouts)
12661       finally:
12662         helper.Cleanup()
12663
12664       # Check for backwards compatibility
12665       assert len(dresults) == len(instance.disks)
12666       assert compat.all(isinstance(i, bool) for i in dresults), \
12667              "Not all results are boolean: %r" % dresults
12668
12669     finally:
12670       if activate_disks:
12671         feedback_fn("Deactivating disks for %s" % instance.name)
12672         _ShutdownInstanceDisks(self, instance)
12673
12674     if not (compat.all(dresults) and fin_resu):
12675       failures = []
12676       if not fin_resu:
12677         failures.append("export finalization")
12678       if not compat.all(dresults):
12679         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12680                                if not dsk)
12681         failures.append("disk export: disk(s) %s" % fdsk)
12682
12683       raise errors.OpExecError("Export failed, errors in %s" %
12684                                utils.CommaJoin(failures))
12685
12686     # At this point, the export was successful, we can cleanup/finish
12687
12688     # Remove instance if requested
12689     if self.op.remove_instance:
12690       feedback_fn("Removing instance %s" % instance.name)
12691       _RemoveInstance(self, feedback_fn, instance,
12692                       self.op.ignore_remove_failures)
12693
12694     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12695       self._CleanupExports(feedback_fn)
12696
12697     return fin_resu, dresults
12698
12699
12700 class LUBackupRemove(NoHooksLU):
12701   """Remove exports related to the named instance.
12702
12703   """
12704   REQ_BGL = False
12705
12706   def ExpandNames(self):
12707     self.needed_locks = {}
12708     # We need all nodes to be locked in order for RemoveExport to work, but we
12709     # don't need to lock the instance itself, as nothing will happen to it (and
12710     # we can remove exports also for a removed instance)
12711     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12712
12713   def Exec(self, feedback_fn):
12714     """Remove any export.
12715
12716     """
12717     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12718     # If the instance was not found we'll try with the name that was passed in.
12719     # This will only work if it was an FQDN, though.
12720     fqdn_warn = False
12721     if not instance_name:
12722       fqdn_warn = True
12723       instance_name = self.op.instance_name
12724
12725     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12726     exportlist = self.rpc.call_export_list(locked_nodes)
12727     found = False
12728     for node in exportlist:
12729       msg = exportlist[node].fail_msg
12730       if msg:
12731         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12732         continue
12733       if instance_name in exportlist[node].payload:
12734         found = True
12735         result = self.rpc.call_export_remove(node, instance_name)
12736         msg = result.fail_msg
12737         if msg:
12738           logging.error("Could not remove export for instance %s"
12739                         " on node %s: %s", instance_name, node, msg)
12740
12741     if fqdn_warn and not found:
12742       feedback_fn("Export not found. If trying to remove an export belonging"
12743                   " to a deleted instance please use its Fully Qualified"
12744                   " Domain Name.")
12745
12746
12747 class LUGroupAdd(LogicalUnit):
12748   """Logical unit for creating node groups.
12749
12750   """
12751   HPATH = "group-add"
12752   HTYPE = constants.HTYPE_GROUP
12753   REQ_BGL = False
12754
12755   def ExpandNames(self):
12756     # We need the new group's UUID here so that we can create and acquire the
12757     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12758     # that it should not check whether the UUID exists in the configuration.
12759     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12760     self.needed_locks = {}
12761     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12762
12763   def CheckPrereq(self):
12764     """Check prerequisites.
12765
12766     This checks that the given group name is not an existing node group
12767     already.
12768
12769     """
12770     try:
12771       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12772     except errors.OpPrereqError:
12773       pass
12774     else:
12775       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12776                                  " node group (UUID: %s)" %
12777                                  (self.op.group_name, existing_uuid),
12778                                  errors.ECODE_EXISTS)
12779
12780     if self.op.ndparams:
12781       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12782
12783     if self.op.diskparams:
12784       for templ in constants.DISK_TEMPLATES:
12785         if templ not in self.op.diskparams:
12786           self.op.diskparams[templ] = {}
12787         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12788     else:
12789       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12790
12791     cluster = self.cfg.GetClusterInfo()
12792     full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
12793     objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12794
12795   def BuildHooksEnv(self):
12796     """Build hooks env.
12797
12798     """
12799     return {
12800       "GROUP_NAME": self.op.group_name,
12801       }
12802
12803   def BuildHooksNodes(self):
12804     """Build hooks nodes.
12805
12806     """
12807     mn = self.cfg.GetMasterNode()
12808     return ([mn], [mn])
12809
12810   def Exec(self, feedback_fn):
12811     """Add the node group to the cluster.
12812
12813     """
12814     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12815                                   uuid=self.group_uuid,
12816                                   alloc_policy=self.op.alloc_policy,
12817                                   ndparams=self.op.ndparams,
12818                                   diskparams=self.op.diskparams,
12819                                   ipolicy=self.op.ipolicy)
12820
12821     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12822     del self.remove_locks[locking.LEVEL_NODEGROUP]
12823
12824
12825 class LUGroupAssignNodes(NoHooksLU):
12826   """Logical unit for assigning nodes to groups.
12827
12828   """
12829   REQ_BGL = False
12830
12831   def ExpandNames(self):
12832     # These raise errors.OpPrereqError on their own:
12833     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12834     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12835
12836     # We want to lock all the affected nodes and groups. We have readily
12837     # available the list of nodes, and the *destination* group. To gather the
12838     # list of "source" groups, we need to fetch node information later on.
12839     self.needed_locks = {
12840       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12841       locking.LEVEL_NODE: self.op.nodes,
12842       }
12843
12844   def DeclareLocks(self, level):
12845     if level == locking.LEVEL_NODEGROUP:
12846       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12847
12848       # Try to get all affected nodes' groups without having the group or node
12849       # lock yet. Needs verification later in the code flow.
12850       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12851
12852       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12853
12854   def CheckPrereq(self):
12855     """Check prerequisites.
12856
12857     """
12858     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12859     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12860             frozenset(self.op.nodes))
12861
12862     expected_locks = (set([self.group_uuid]) |
12863                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12864     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12865     if actual_locks != expected_locks:
12866       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12867                                " current groups are '%s', used to be '%s'" %
12868                                (utils.CommaJoin(expected_locks),
12869                                 utils.CommaJoin(actual_locks)))
12870
12871     self.node_data = self.cfg.GetAllNodesInfo()
12872     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12873     instance_data = self.cfg.GetAllInstancesInfo()
12874
12875     if self.group is None:
12876       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12877                                (self.op.group_name, self.group_uuid))
12878
12879     (new_splits, previous_splits) = \
12880       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12881                                              for node in self.op.nodes],
12882                                             self.node_data, instance_data)
12883
12884     if new_splits:
12885       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12886
12887       if not self.op.force:
12888         raise errors.OpExecError("The following instances get split by this"
12889                                  " change and --force was not given: %s" %
12890                                  fmt_new_splits)
12891       else:
12892         self.LogWarning("This operation will split the following instances: %s",
12893                         fmt_new_splits)
12894
12895         if previous_splits:
12896           self.LogWarning("In addition, these already-split instances continue"
12897                           " to be split across groups: %s",
12898                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12899
12900   def Exec(self, feedback_fn):
12901     """Assign nodes to a new group.
12902
12903     """
12904     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12905
12906     self.cfg.AssignGroupNodes(mods)
12907
12908   @staticmethod
12909   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12910     """Check for split instances after a node assignment.
12911
12912     This method considers a series of node assignments as an atomic operation,
12913     and returns information about split instances after applying the set of
12914     changes.
12915
12916     In particular, it returns information about newly split instances, and
12917     instances that were already split, and remain so after the change.
12918
12919     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12920     considered.
12921
12922     @type changes: list of (node_name, new_group_uuid) pairs.
12923     @param changes: list of node assignments to consider.
12924     @param node_data: a dict with data for all nodes
12925     @param instance_data: a dict with all instances to consider
12926     @rtype: a two-tuple
12927     @return: a list of instances that were previously okay and result split as a
12928       consequence of this change, and a list of instances that were previously
12929       split and this change does not fix.
12930
12931     """
12932     changed_nodes = dict((node, group) for node, group in changes
12933                          if node_data[node].group != group)
12934
12935     all_split_instances = set()
12936     previously_split_instances = set()
12937
12938     def InstanceNodes(instance):
12939       return [instance.primary_node] + list(instance.secondary_nodes)
12940
12941     for inst in instance_data.values():
12942       if inst.disk_template not in constants.DTS_INT_MIRROR:
12943         continue
12944
12945       instance_nodes = InstanceNodes(inst)
12946
12947       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12948         previously_split_instances.add(inst.name)
12949
12950       if len(set(changed_nodes.get(node, node_data[node].group)
12951                  for node in instance_nodes)) > 1:
12952         all_split_instances.add(inst.name)
12953
12954     return (list(all_split_instances - previously_split_instances),
12955             list(previously_split_instances & all_split_instances))
12956
12957
12958 class _GroupQuery(_QueryBase):
12959   FIELDS = query.GROUP_FIELDS
12960
12961   def ExpandNames(self, lu):
12962     lu.needed_locks = {}
12963
12964     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12965     self._cluster = lu.cfg.GetClusterInfo()
12966     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12967
12968     if not self.names:
12969       self.wanted = [name_to_uuid[name]
12970                      for name in utils.NiceSort(name_to_uuid.keys())]
12971     else:
12972       # Accept names to be either names or UUIDs.
12973       missing = []
12974       self.wanted = []
12975       all_uuid = frozenset(self._all_groups.keys())
12976
12977       for name in self.names:
12978         if name in all_uuid:
12979           self.wanted.append(name)
12980         elif name in name_to_uuid:
12981           self.wanted.append(name_to_uuid[name])
12982         else:
12983           missing.append(name)
12984
12985       if missing:
12986         raise errors.OpPrereqError("Some groups do not exist: %s" %
12987                                    utils.CommaJoin(missing),
12988                                    errors.ECODE_NOENT)
12989
12990   def DeclareLocks(self, lu, level):
12991     pass
12992
12993   def _GetQueryData(self, lu):
12994     """Computes the list of node groups and their attributes.
12995
12996     """
12997     do_nodes = query.GQ_NODE in self.requested_data
12998     do_instances = query.GQ_INST in self.requested_data
12999
13000     group_to_nodes = None
13001     group_to_instances = None
13002
13003     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13004     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13005     # latter GetAllInstancesInfo() is not enough, for we have to go through
13006     # instance->node. Hence, we will need to process nodes even if we only need
13007     # instance information.
13008     if do_nodes or do_instances:
13009       all_nodes = lu.cfg.GetAllNodesInfo()
13010       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13011       node_to_group = {}
13012
13013       for node in all_nodes.values():
13014         if node.group in group_to_nodes:
13015           group_to_nodes[node.group].append(node.name)
13016           node_to_group[node.name] = node.group
13017
13018       if do_instances:
13019         all_instances = lu.cfg.GetAllInstancesInfo()
13020         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13021
13022         for instance in all_instances.values():
13023           node = instance.primary_node
13024           if node in node_to_group:
13025             group_to_instances[node_to_group[node]].append(instance.name)
13026
13027         if not do_nodes:
13028           # Do not pass on node information if it was not requested.
13029           group_to_nodes = None
13030
13031     return query.GroupQueryData(self._cluster,
13032                                 [self._all_groups[uuid]
13033                                  for uuid in self.wanted],
13034                                 group_to_nodes, group_to_instances)
13035
13036
13037 class LUGroupQuery(NoHooksLU):
13038   """Logical unit for querying node groups.
13039
13040   """
13041   REQ_BGL = False
13042
13043   def CheckArguments(self):
13044     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13045                           self.op.output_fields, False)
13046
13047   def ExpandNames(self):
13048     self.gq.ExpandNames(self)
13049
13050   def DeclareLocks(self, level):
13051     self.gq.DeclareLocks(self, level)
13052
13053   def Exec(self, feedback_fn):
13054     return self.gq.OldStyleQuery(self)
13055
13056
13057 class LUGroupSetParams(LogicalUnit):
13058   """Modifies the parameters of a node group.
13059
13060   """
13061   HPATH = "group-modify"
13062   HTYPE = constants.HTYPE_GROUP
13063   REQ_BGL = False
13064
13065   def CheckArguments(self):
13066     all_changes = [
13067       self.op.ndparams,
13068       self.op.diskparams,
13069       self.op.alloc_policy,
13070       self.op.hv_state,
13071       self.op.disk_state,
13072       self.op.ipolicy,
13073       ]
13074
13075     if all_changes.count(None) == len(all_changes):
13076       raise errors.OpPrereqError("Please pass at least one modification",
13077                                  errors.ECODE_INVAL)
13078
13079   def ExpandNames(self):
13080     # This raises errors.OpPrereqError on its own:
13081     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13082
13083     self.needed_locks = {
13084       locking.LEVEL_NODEGROUP: [self.group_uuid],
13085       }
13086
13087   def CheckPrereq(self):
13088     """Check prerequisites.
13089
13090     """
13091     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13092
13093     if self.group is None:
13094       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13095                                (self.op.group_name, self.group_uuid))
13096
13097     if self.op.ndparams:
13098       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13099       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13100       self.new_ndparams = new_ndparams
13101
13102     if self.op.diskparams:
13103       self.new_diskparams = dict()
13104       for templ in constants.DISK_TEMPLATES:
13105         if templ not in self.op.diskparams:
13106           self.op.diskparams[templ] = {}
13107         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13108                                              self.op.diskparams[templ])
13109         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13110         self.new_diskparams[templ] = new_templ_params
13111
13112     if self.op.hv_state:
13113       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13114                                                  self.group.hv_state_static)
13115
13116     if self.op.disk_state:
13117       self.new_disk_state = \
13118         _MergeAndVerifyDiskState(self.op.disk_state,
13119                                  self.group.disk_state_static)
13120
13121     if self.op.ipolicy:
13122       g_ipolicy = {}
13123       for key, value in self.op.ipolicy.iteritems():
13124         g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13125                                            value,
13126                                            use_none=True)
13127         utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13128       self.new_ipolicy = g_ipolicy
13129       objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13130
13131   def BuildHooksEnv(self):
13132     """Build hooks env.
13133
13134     """
13135     return {
13136       "GROUP_NAME": self.op.group_name,
13137       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13138       }
13139
13140   def BuildHooksNodes(self):
13141     """Build hooks nodes.
13142
13143     """
13144     mn = self.cfg.GetMasterNode()
13145     return ([mn], [mn])
13146
13147   def Exec(self, feedback_fn):
13148     """Modifies the node group.
13149
13150     """
13151     result = []
13152
13153     if self.op.ndparams:
13154       self.group.ndparams = self.new_ndparams
13155       result.append(("ndparams", str(self.group.ndparams)))
13156
13157     if self.op.diskparams:
13158       self.group.diskparams = self.new_diskparams
13159       result.append(("diskparams", str(self.group.diskparams)))
13160
13161     if self.op.alloc_policy:
13162       self.group.alloc_policy = self.op.alloc_policy
13163
13164     if self.op.hv_state:
13165       self.group.hv_state_static = self.new_hv_state
13166
13167     if self.op.disk_state:
13168       self.group.disk_state_static = self.new_disk_state
13169
13170     if self.op.ipolicy:
13171       self.group.ipolicy = self.new_ipolicy
13172
13173     self.cfg.Update(self.group, feedback_fn)
13174     return result
13175
13176
13177 class LUGroupRemove(LogicalUnit):
13178   HPATH = "group-remove"
13179   HTYPE = constants.HTYPE_GROUP
13180   REQ_BGL = False
13181
13182   def ExpandNames(self):
13183     # This will raises errors.OpPrereqError on its own:
13184     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13185     self.needed_locks = {
13186       locking.LEVEL_NODEGROUP: [self.group_uuid],
13187       }
13188
13189   def CheckPrereq(self):
13190     """Check prerequisites.
13191
13192     This checks that the given group name exists as a node group, that is
13193     empty (i.e., contains no nodes), and that is not the last group of the
13194     cluster.
13195
13196     """
13197     # Verify that the group is empty.
13198     group_nodes = [node.name
13199                    for node in self.cfg.GetAllNodesInfo().values()
13200                    if node.group == self.group_uuid]
13201
13202     if group_nodes:
13203       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13204                                  " nodes: %s" %
13205                                  (self.op.group_name,
13206                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13207                                  errors.ECODE_STATE)
13208
13209     # Verify the cluster would not be left group-less.
13210     if len(self.cfg.GetNodeGroupList()) == 1:
13211       raise errors.OpPrereqError("Group '%s' is the only group,"
13212                                  " cannot be removed" %
13213                                  self.op.group_name,
13214                                  errors.ECODE_STATE)
13215
13216   def BuildHooksEnv(self):
13217     """Build hooks env.
13218
13219     """
13220     return {
13221       "GROUP_NAME": self.op.group_name,
13222       }
13223
13224   def BuildHooksNodes(self):
13225     """Build hooks nodes.
13226
13227     """
13228     mn = self.cfg.GetMasterNode()
13229     return ([mn], [mn])
13230
13231   def Exec(self, feedback_fn):
13232     """Remove the node group.
13233
13234     """
13235     try:
13236       self.cfg.RemoveNodeGroup(self.group_uuid)
13237     except errors.ConfigurationError:
13238       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13239                                (self.op.group_name, self.group_uuid))
13240
13241     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13242
13243
13244 class LUGroupRename(LogicalUnit):
13245   HPATH = "group-rename"
13246   HTYPE = constants.HTYPE_GROUP
13247   REQ_BGL = False
13248
13249   def ExpandNames(self):
13250     # This raises errors.OpPrereqError on its own:
13251     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13252
13253     self.needed_locks = {
13254       locking.LEVEL_NODEGROUP: [self.group_uuid],
13255       }
13256
13257   def CheckPrereq(self):
13258     """Check prerequisites.
13259
13260     Ensures requested new name is not yet used.
13261
13262     """
13263     try:
13264       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13265     except errors.OpPrereqError:
13266       pass
13267     else:
13268       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13269                                  " node group (UUID: %s)" %
13270                                  (self.op.new_name, new_name_uuid),
13271                                  errors.ECODE_EXISTS)
13272
13273   def BuildHooksEnv(self):
13274     """Build hooks env.
13275
13276     """
13277     return {
13278       "OLD_NAME": self.op.group_name,
13279       "NEW_NAME": self.op.new_name,
13280       }
13281
13282   def BuildHooksNodes(self):
13283     """Build hooks nodes.
13284
13285     """
13286     mn = self.cfg.GetMasterNode()
13287
13288     all_nodes = self.cfg.GetAllNodesInfo()
13289     all_nodes.pop(mn, None)
13290
13291     run_nodes = [mn]
13292     run_nodes.extend(node.name for node in all_nodes.values()
13293                      if node.group == self.group_uuid)
13294
13295     return (run_nodes, run_nodes)
13296
13297   def Exec(self, feedback_fn):
13298     """Rename the node group.
13299
13300     """
13301     group = self.cfg.GetNodeGroup(self.group_uuid)
13302
13303     if group is None:
13304       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13305                                (self.op.group_name, self.group_uuid))
13306
13307     group.name = self.op.new_name
13308     self.cfg.Update(group, feedback_fn)
13309
13310     return self.op.new_name
13311
13312
13313 class LUGroupEvacuate(LogicalUnit):
13314   HPATH = "group-evacuate"
13315   HTYPE = constants.HTYPE_GROUP
13316   REQ_BGL = False
13317
13318   def ExpandNames(self):
13319     # This raises errors.OpPrereqError on its own:
13320     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13321
13322     if self.op.target_groups:
13323       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13324                                   self.op.target_groups)
13325     else:
13326       self.req_target_uuids = []
13327
13328     if self.group_uuid in self.req_target_uuids:
13329       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13330                                  " as a target group (targets are %s)" %
13331                                  (self.group_uuid,
13332                                   utils.CommaJoin(self.req_target_uuids)),
13333                                  errors.ECODE_INVAL)
13334
13335     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13336
13337     self.share_locks = _ShareAll()
13338     self.needed_locks = {
13339       locking.LEVEL_INSTANCE: [],
13340       locking.LEVEL_NODEGROUP: [],
13341       locking.LEVEL_NODE: [],
13342       }
13343
13344   def DeclareLocks(self, level):
13345     if level == locking.LEVEL_INSTANCE:
13346       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13347
13348       # Lock instances optimistically, needs verification once node and group
13349       # locks have been acquired
13350       self.needed_locks[locking.LEVEL_INSTANCE] = \
13351         self.cfg.GetNodeGroupInstances(self.group_uuid)
13352
13353     elif level == locking.LEVEL_NODEGROUP:
13354       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13355
13356       if self.req_target_uuids:
13357         lock_groups = set([self.group_uuid] + self.req_target_uuids)
13358
13359         # Lock all groups used by instances optimistically; this requires going
13360         # via the node before it's locked, requiring verification later on
13361         lock_groups.update(group_uuid
13362                            for instance_name in
13363                              self.owned_locks(locking.LEVEL_INSTANCE)
13364                            for group_uuid in
13365                              self.cfg.GetInstanceNodeGroups(instance_name))
13366       else:
13367         # No target groups, need to lock all of them
13368         lock_groups = locking.ALL_SET
13369
13370       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13371
13372     elif level == locking.LEVEL_NODE:
13373       # This will only lock the nodes in the group to be evacuated which
13374       # contain actual instances
13375       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13376       self._LockInstancesNodes()
13377
13378       # Lock all nodes in group to be evacuated and target groups
13379       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13380       assert self.group_uuid in owned_groups
13381       member_nodes = [node_name
13382                       for group in owned_groups
13383                       for node_name in self.cfg.GetNodeGroup(group).members]
13384       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13385
13386   def CheckPrereq(self):
13387     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13388     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13389     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13390
13391     assert owned_groups.issuperset(self.req_target_uuids)
13392     assert self.group_uuid in owned_groups
13393
13394     # Check if locked instances are still correct
13395     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13396
13397     # Get instance information
13398     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13399
13400     # Check if node groups for locked instances are still correct
13401     for instance_name in owned_instances:
13402       inst = self.instances[instance_name]
13403       assert owned_nodes.issuperset(inst.all_nodes), \
13404         "Instance %s's nodes changed while we kept the lock" % instance_name
13405
13406       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13407                                              owned_groups)
13408
13409       assert self.group_uuid in inst_groups, \
13410         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13411
13412     if self.req_target_uuids:
13413       # User requested specific target groups
13414       self.target_uuids = self.req_target_uuids
13415     else:
13416       # All groups except the one to be evacuated are potential targets
13417       self.target_uuids = [group_uuid for group_uuid in owned_groups
13418                            if group_uuid != self.group_uuid]
13419
13420       if not self.target_uuids:
13421         raise errors.OpPrereqError("There are no possible target groups",
13422                                    errors.ECODE_INVAL)
13423
13424   def BuildHooksEnv(self):
13425     """Build hooks env.
13426
13427     """
13428     return {
13429       "GROUP_NAME": self.op.group_name,
13430       "TARGET_GROUPS": " ".join(self.target_uuids),
13431       }
13432
13433   def BuildHooksNodes(self):
13434     """Build hooks nodes.
13435
13436     """
13437     mn = self.cfg.GetMasterNode()
13438
13439     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13440
13441     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13442
13443     return (run_nodes, run_nodes)
13444
13445   def Exec(self, feedback_fn):
13446     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13447
13448     assert self.group_uuid not in self.target_uuids
13449
13450     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13451                      instances=instances, target_groups=self.target_uuids)
13452
13453     ial.Run(self.op.iallocator)
13454
13455     if not ial.success:
13456       raise errors.OpPrereqError("Can't compute group evacuation using"
13457                                  " iallocator '%s': %s" %
13458                                  (self.op.iallocator, ial.info),
13459                                  errors.ECODE_NORES)
13460
13461     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13462
13463     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13464                  len(jobs), self.op.group_name)
13465
13466     return ResultWithJobs(jobs)
13467
13468
13469 class TagsLU(NoHooksLU): # pylint: disable=W0223
13470   """Generic tags LU.
13471
13472   This is an abstract class which is the parent of all the other tags LUs.
13473
13474   """
13475   def ExpandNames(self):
13476     self.group_uuid = None
13477     self.needed_locks = {}
13478     if self.op.kind == constants.TAG_NODE:
13479       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13480       self.needed_locks[locking.LEVEL_NODE] = self.op.name
13481     elif self.op.kind == constants.TAG_INSTANCE:
13482       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13483       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13484     elif self.op.kind == constants.TAG_NODEGROUP:
13485       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13486
13487     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13488     # not possible to acquire the BGL based on opcode parameters)
13489
13490   def CheckPrereq(self):
13491     """Check prerequisites.
13492
13493     """
13494     if self.op.kind == constants.TAG_CLUSTER:
13495       self.target = self.cfg.GetClusterInfo()
13496     elif self.op.kind == constants.TAG_NODE:
13497       self.target = self.cfg.GetNodeInfo(self.op.name)
13498     elif self.op.kind == constants.TAG_INSTANCE:
13499       self.target = self.cfg.GetInstanceInfo(self.op.name)
13500     elif self.op.kind == constants.TAG_NODEGROUP:
13501       self.target = self.cfg.GetNodeGroup(self.group_uuid)
13502     else:
13503       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13504                                  str(self.op.kind), errors.ECODE_INVAL)
13505
13506
13507 class LUTagsGet(TagsLU):
13508   """Returns the tags of a given object.
13509
13510   """
13511   REQ_BGL = False
13512
13513   def ExpandNames(self):
13514     TagsLU.ExpandNames(self)
13515
13516     # Share locks as this is only a read operation
13517     self.share_locks = _ShareAll()
13518
13519   def Exec(self, feedback_fn):
13520     """Returns the tag list.
13521
13522     """
13523     return list(self.target.GetTags())
13524
13525
13526 class LUTagsSearch(NoHooksLU):
13527   """Searches the tags for a given pattern.
13528
13529   """
13530   REQ_BGL = False
13531
13532   def ExpandNames(self):
13533     self.needed_locks = {}
13534
13535   def CheckPrereq(self):
13536     """Check prerequisites.
13537
13538     This checks the pattern passed for validity by compiling it.
13539
13540     """
13541     try:
13542       self.re = re.compile(self.op.pattern)
13543     except re.error, err:
13544       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13545                                  (self.op.pattern, err), errors.ECODE_INVAL)
13546
13547   def Exec(self, feedback_fn):
13548     """Returns the tag list.
13549
13550     """
13551     cfg = self.cfg
13552     tgts = [("/cluster", cfg.GetClusterInfo())]
13553     ilist = cfg.GetAllInstancesInfo().values()
13554     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13555     nlist = cfg.GetAllNodesInfo().values()
13556     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13557     tgts.extend(("/nodegroup/%s" % n.name, n)
13558                 for n in cfg.GetAllNodeGroupsInfo().values())
13559     results = []
13560     for path, target in tgts:
13561       for tag in target.GetTags():
13562         if self.re.search(tag):
13563           results.append((path, tag))
13564     return results
13565
13566
13567 class LUTagsSet(TagsLU):
13568   """Sets a tag on a given object.
13569
13570   """
13571   REQ_BGL = False
13572
13573   def CheckPrereq(self):
13574     """Check prerequisites.
13575
13576     This checks the type and length of the tag name and value.
13577
13578     """
13579     TagsLU.CheckPrereq(self)
13580     for tag in self.op.tags:
13581       objects.TaggableObject.ValidateTag(tag)
13582
13583   def Exec(self, feedback_fn):
13584     """Sets the tag.
13585
13586     """
13587     try:
13588       for tag in self.op.tags:
13589         self.target.AddTag(tag)
13590     except errors.TagError, err:
13591       raise errors.OpExecError("Error while setting tag: %s" % str(err))
13592     self.cfg.Update(self.target, feedback_fn)
13593
13594
13595 class LUTagsDel(TagsLU):
13596   """Delete a list of tags from a given object.
13597
13598   """
13599   REQ_BGL = False
13600
13601   def CheckPrereq(self):
13602     """Check prerequisites.
13603
13604     This checks that we have the given tag.
13605
13606     """
13607     TagsLU.CheckPrereq(self)
13608     for tag in self.op.tags:
13609       objects.TaggableObject.ValidateTag(tag)
13610     del_tags = frozenset(self.op.tags)
13611     cur_tags = self.target.GetTags()
13612
13613     diff_tags = del_tags - cur_tags
13614     if diff_tags:
13615       diff_names = ("'%s'" % i for i in sorted(diff_tags))
13616       raise errors.OpPrereqError("Tag(s) %s not found" %
13617                                  (utils.CommaJoin(diff_names), ),
13618                                  errors.ECODE_NOENT)
13619
13620   def Exec(self, feedback_fn):
13621     """Remove the tag from the object.
13622
13623     """
13624     for tag in self.op.tags:
13625       self.target.RemoveTag(tag)
13626     self.cfg.Update(self.target, feedback_fn)
13627
13628
13629 class LUTestDelay(NoHooksLU):
13630   """Sleep for a specified amount of time.
13631
13632   This LU sleeps on the master and/or nodes for a specified amount of
13633   time.
13634
13635   """
13636   REQ_BGL = False
13637
13638   def ExpandNames(self):
13639     """Expand names and set required locks.
13640
13641     This expands the node list, if any.
13642
13643     """
13644     self.needed_locks = {}
13645     if self.op.on_nodes:
13646       # _GetWantedNodes can be used here, but is not always appropriate to use
13647       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13648       # more information.
13649       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13650       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13651
13652   def _TestDelay(self):
13653     """Do the actual sleep.
13654
13655     """
13656     if self.op.on_master:
13657       if not utils.TestDelay(self.op.duration):
13658         raise errors.OpExecError("Error during master delay test")
13659     if self.op.on_nodes:
13660       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13661       for node, node_result in result.items():
13662         node_result.Raise("Failure during rpc call to node %s" % node)
13663
13664   def Exec(self, feedback_fn):
13665     """Execute the test delay opcode, with the wanted repetitions.
13666
13667     """
13668     if self.op.repeat == 0:
13669       self._TestDelay()
13670     else:
13671       top_value = self.op.repeat - 1
13672       for i in range(self.op.repeat):
13673         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13674         self._TestDelay()
13675
13676
13677 class LUTestJqueue(NoHooksLU):
13678   """Utility LU to test some aspects of the job queue.
13679
13680   """
13681   REQ_BGL = False
13682
13683   # Must be lower than default timeout for WaitForJobChange to see whether it
13684   # notices changed jobs
13685   _CLIENT_CONNECT_TIMEOUT = 20.0
13686   _CLIENT_CONFIRM_TIMEOUT = 60.0
13687
13688   @classmethod
13689   def _NotifyUsingSocket(cls, cb, errcls):
13690     """Opens a Unix socket and waits for another program to connect.
13691
13692     @type cb: callable
13693     @param cb: Callback to send socket name to client
13694     @type errcls: class
13695     @param errcls: Exception class to use for errors
13696
13697     """
13698     # Using a temporary directory as there's no easy way to create temporary
13699     # sockets without writing a custom loop around tempfile.mktemp and
13700     # socket.bind
13701     tmpdir = tempfile.mkdtemp()
13702     try:
13703       tmpsock = utils.PathJoin(tmpdir, "sock")
13704
13705       logging.debug("Creating temporary socket at %s", tmpsock)
13706       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13707       try:
13708         sock.bind(tmpsock)
13709         sock.listen(1)
13710
13711         # Send details to client
13712         cb(tmpsock)
13713
13714         # Wait for client to connect before continuing
13715         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13716         try:
13717           (conn, _) = sock.accept()
13718         except socket.error, err:
13719           raise errcls("Client didn't connect in time (%s)" % err)
13720       finally:
13721         sock.close()
13722     finally:
13723       # Remove as soon as client is connected
13724       shutil.rmtree(tmpdir)
13725
13726     # Wait for client to close
13727     try:
13728       try:
13729         # pylint: disable=E1101
13730         # Instance of '_socketobject' has no ... member
13731         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13732         conn.recv(1)
13733       except socket.error, err:
13734         raise errcls("Client failed to confirm notification (%s)" % err)
13735     finally:
13736       conn.close()
13737
13738   def _SendNotification(self, test, arg, sockname):
13739     """Sends a notification to the client.
13740
13741     @type test: string
13742     @param test: Test name
13743     @param arg: Test argument (depends on test)
13744     @type sockname: string
13745     @param sockname: Socket path
13746
13747     """
13748     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13749
13750   def _Notify(self, prereq, test, arg):
13751     """Notifies the client of a test.
13752
13753     @type prereq: bool
13754     @param prereq: Whether this is a prereq-phase test
13755     @type test: string
13756     @param test: Test name
13757     @param arg: Test argument (depends on test)
13758
13759     """
13760     if prereq:
13761       errcls = errors.OpPrereqError
13762     else:
13763       errcls = errors.OpExecError
13764
13765     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13766                                                   test, arg),
13767                                    errcls)
13768
13769   def CheckArguments(self):
13770     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13771     self.expandnames_calls = 0
13772
13773   def ExpandNames(self):
13774     checkargs_calls = getattr(self, "checkargs_calls", 0)
13775     if checkargs_calls < 1:
13776       raise errors.ProgrammerError("CheckArguments was not called")
13777
13778     self.expandnames_calls += 1
13779
13780     if self.op.notify_waitlock:
13781       self._Notify(True, constants.JQT_EXPANDNAMES, None)
13782
13783     self.LogInfo("Expanding names")
13784
13785     # Get lock on master node (just to get a lock, not for a particular reason)
13786     self.needed_locks = {
13787       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13788       }
13789
13790   def Exec(self, feedback_fn):
13791     if self.expandnames_calls < 1:
13792       raise errors.ProgrammerError("ExpandNames was not called")
13793
13794     if self.op.notify_exec:
13795       self._Notify(False, constants.JQT_EXEC, None)
13796
13797     self.LogInfo("Executing")
13798
13799     if self.op.log_messages:
13800       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13801       for idx, msg in enumerate(self.op.log_messages):
13802         self.LogInfo("Sending log message %s", idx + 1)
13803         feedback_fn(constants.JQT_MSGPREFIX + msg)
13804         # Report how many test messages have been sent
13805         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13806
13807     if self.op.fail:
13808       raise errors.OpExecError("Opcode failure was requested")
13809
13810     return True
13811
13812
13813 class IAllocator(object):
13814   """IAllocator framework.
13815
13816   An IAllocator instance has three sets of attributes:
13817     - cfg that is needed to query the cluster
13818     - input data (all members of the _KEYS class attribute are required)
13819     - four buffer attributes (in|out_data|text), that represent the
13820       input (to the external script) in text and data structure format,
13821       and the output from it, again in two formats
13822     - the result variables from the script (success, info, nodes) for
13823       easy usage
13824
13825   """
13826   # pylint: disable=R0902
13827   # lots of instance attributes
13828
13829   def __init__(self, cfg, rpc_runner, mode, **kwargs):
13830     self.cfg = cfg
13831     self.rpc = rpc_runner
13832     # init buffer variables
13833     self.in_text = self.out_text = self.in_data = self.out_data = None
13834     # init all input fields so that pylint is happy
13835     self.mode = mode
13836     self.memory = self.disks = self.disk_template = None
13837     self.os = self.tags = self.nics = self.vcpus = None
13838     self.hypervisor = None
13839     self.relocate_from = None
13840     self.name = None
13841     self.instances = None
13842     self.evac_mode = None
13843     self.target_groups = []
13844     # computed fields
13845     self.required_nodes = None
13846     # init result fields
13847     self.success = self.info = self.result = None
13848
13849     try:
13850       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13851     except KeyError:
13852       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13853                                    " IAllocator" % self.mode)
13854
13855     keyset = [n for (n, _) in keydata]
13856
13857     for key in kwargs:
13858       if key not in keyset:
13859         raise errors.ProgrammerError("Invalid input parameter '%s' to"
13860                                      " IAllocator" % key)
13861       setattr(self, key, kwargs[key])
13862
13863     for key in keyset:
13864       if key not in kwargs:
13865         raise errors.ProgrammerError("Missing input parameter '%s' to"
13866                                      " IAllocator" % key)
13867     self._BuildInputData(compat.partial(fn, self), keydata)
13868
13869   def _ComputeClusterData(self):
13870     """Compute the generic allocator input data.
13871
13872     This is the data that is independent of the actual operation.
13873
13874     """
13875     cfg = self.cfg
13876     cluster_info = cfg.GetClusterInfo()
13877     # cluster data
13878     data = {
13879       "version": constants.IALLOCATOR_VERSION,
13880       "cluster_name": cfg.GetClusterName(),
13881       "cluster_tags": list(cluster_info.GetTags()),
13882       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13883       # we don't have job IDs
13884       }
13885     ninfo = cfg.GetAllNodesInfo()
13886     iinfo = cfg.GetAllInstancesInfo().values()
13887     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13888
13889     # node data
13890     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13891
13892     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13893       hypervisor_name = self.hypervisor
13894     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13895       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13896     else:
13897       hypervisor_name = cluster_info.primary_hypervisor
13898
13899     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13900                                         [hypervisor_name])
13901     node_iinfo = \
13902       self.rpc.call_all_instances_info(node_list,
13903                                        cluster_info.enabled_hypervisors)
13904
13905     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13906
13907     config_ndata = self._ComputeBasicNodeData(ninfo)
13908     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13909                                                  i_list, config_ndata)
13910     assert len(data["nodes"]) == len(ninfo), \
13911         "Incomplete node data computed"
13912
13913     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13914
13915     self.in_data = data
13916
13917   @staticmethod
13918   def _ComputeNodeGroupData(cfg):
13919     """Compute node groups data.
13920
13921     """
13922     ng = dict((guuid, {
13923       "name": gdata.name,
13924       "alloc_policy": gdata.alloc_policy,
13925       })
13926       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13927
13928     return ng
13929
13930   @staticmethod
13931   def _ComputeBasicNodeData(node_cfg):
13932     """Compute global node data.
13933
13934     @rtype: dict
13935     @returns: a dict of name: (node dict, node config)
13936
13937     """
13938     # fill in static (config-based) values
13939     node_results = dict((ninfo.name, {
13940       "tags": list(ninfo.GetTags()),
13941       "primary_ip": ninfo.primary_ip,
13942       "secondary_ip": ninfo.secondary_ip,
13943       "offline": ninfo.offline,
13944       "drained": ninfo.drained,
13945       "master_candidate": ninfo.master_candidate,
13946       "group": ninfo.group,
13947       "master_capable": ninfo.master_capable,
13948       "vm_capable": ninfo.vm_capable,
13949       })
13950       for ninfo in node_cfg.values())
13951
13952     return node_results
13953
13954   @staticmethod
13955   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13956                               node_results):
13957     """Compute global node data.
13958
13959     @param node_results: the basic node structures as filled from the config
13960
13961     """
13962     #TODO(dynmem): compute the right data on MAX and MIN memory
13963     # make a copy of the current dict
13964     node_results = dict(node_results)
13965     for nname, nresult in node_data.items():
13966       assert nname in node_results, "Missing basic data for node %s" % nname
13967       ninfo = node_cfg[nname]
13968
13969       if not (ninfo.offline or ninfo.drained):
13970         nresult.Raise("Can't get data for node %s" % nname)
13971         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13972                                 nname)
13973         remote_info = _MakeLegacyNodeInfo(nresult.payload)
13974
13975         for attr in ["memory_total", "memory_free", "memory_dom0",
13976                      "vg_size", "vg_free", "cpu_total"]:
13977           if attr not in remote_info:
13978             raise errors.OpExecError("Node '%s' didn't return attribute"
13979                                      " '%s'" % (nname, attr))
13980           if not isinstance(remote_info[attr], int):
13981             raise errors.OpExecError("Node '%s' returned invalid value"
13982                                      " for '%s': %s" %
13983                                      (nname, attr, remote_info[attr]))
13984         # compute memory used by primary instances
13985         i_p_mem = i_p_up_mem = 0
13986         for iinfo, beinfo in i_list:
13987           if iinfo.primary_node == nname:
13988             i_p_mem += beinfo[constants.BE_MAXMEM]
13989             if iinfo.name not in node_iinfo[nname].payload:
13990               i_used_mem = 0
13991             else:
13992               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13993             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13994             remote_info["memory_free"] -= max(0, i_mem_diff)
13995
13996             if iinfo.admin_state == constants.ADMINST_UP:
13997               i_p_up_mem += beinfo[constants.BE_MAXMEM]
13998
13999         # compute memory used by instances
14000         pnr_dyn = {
14001           "total_memory": remote_info["memory_total"],
14002           "reserved_memory": remote_info["memory_dom0"],
14003           "free_memory": remote_info["memory_free"],
14004           "total_disk": remote_info["vg_size"],
14005           "free_disk": remote_info["vg_free"],
14006           "total_cpus": remote_info["cpu_total"],
14007           "i_pri_memory": i_p_mem,
14008           "i_pri_up_memory": i_p_up_mem,
14009           }
14010         pnr_dyn.update(node_results[nname])
14011         node_results[nname] = pnr_dyn
14012
14013     return node_results
14014
14015   @staticmethod
14016   def _ComputeInstanceData(cluster_info, i_list):
14017     """Compute global instance data.
14018
14019     """
14020     instance_data = {}
14021     for iinfo, beinfo in i_list:
14022       nic_data = []
14023       for nic in iinfo.nics:
14024         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14025         nic_dict = {
14026           "mac": nic.mac,
14027           "ip": nic.ip,
14028           "mode": filled_params[constants.NIC_MODE],
14029           "link": filled_params[constants.NIC_LINK],
14030           }
14031         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14032           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14033         nic_data.append(nic_dict)
14034       pir = {
14035         "tags": list(iinfo.GetTags()),
14036         "admin_state": iinfo.admin_state,
14037         "vcpus": beinfo[constants.BE_VCPUS],
14038         "memory": beinfo[constants.BE_MAXMEM],
14039         "os": iinfo.os,
14040         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14041         "nics": nic_data,
14042         "disks": [{constants.IDISK_SIZE: dsk.size,
14043                    constants.IDISK_MODE: dsk.mode}
14044                   for dsk in iinfo.disks],
14045         "disk_template": iinfo.disk_template,
14046         "hypervisor": iinfo.hypervisor,
14047         }
14048       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14049                                                  pir["disks"])
14050       instance_data[iinfo.name] = pir
14051
14052     return instance_data
14053
14054   def _AddNewInstance(self):
14055     """Add new instance data to allocator structure.
14056
14057     This in combination with _AllocatorGetClusterData will create the
14058     correct structure needed as input for the allocator.
14059
14060     The checks for the completeness of the opcode must have already been
14061     done.
14062
14063     """
14064     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14065
14066     if self.disk_template in constants.DTS_INT_MIRROR:
14067       self.required_nodes = 2
14068     else:
14069       self.required_nodes = 1
14070
14071     request = {
14072       "name": self.name,
14073       "disk_template": self.disk_template,
14074       "tags": self.tags,
14075       "os": self.os,
14076       "vcpus": self.vcpus,
14077       "memory": self.memory,
14078       "disks": self.disks,
14079       "disk_space_total": disk_space,
14080       "nics": self.nics,
14081       "required_nodes": self.required_nodes,
14082       "hypervisor": self.hypervisor,
14083       }
14084
14085     return request
14086
14087   def _AddRelocateInstance(self):
14088     """Add relocate instance data to allocator structure.
14089
14090     This in combination with _IAllocatorGetClusterData will create the
14091     correct structure needed as input for the allocator.
14092
14093     The checks for the completeness of the opcode must have already been
14094     done.
14095
14096     """
14097     instance = self.cfg.GetInstanceInfo(self.name)
14098     if instance is None:
14099       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14100                                    " IAllocator" % self.name)
14101
14102     if instance.disk_template not in constants.DTS_MIRRORED:
14103       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14104                                  errors.ECODE_INVAL)
14105
14106     if instance.disk_template in constants.DTS_INT_MIRROR and \
14107         len(instance.secondary_nodes) != 1:
14108       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14109                                  errors.ECODE_STATE)
14110
14111     self.required_nodes = 1
14112     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14113     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14114
14115     request = {
14116       "name": self.name,
14117       "disk_space_total": disk_space,
14118       "required_nodes": self.required_nodes,
14119       "relocate_from": self.relocate_from,
14120       }
14121     return request
14122
14123   def _AddNodeEvacuate(self):
14124     """Get data for node-evacuate requests.
14125
14126     """
14127     return {
14128       "instances": self.instances,
14129       "evac_mode": self.evac_mode,
14130       }
14131
14132   def _AddChangeGroup(self):
14133     """Get data for node-evacuate requests.
14134
14135     """
14136     return {
14137       "instances": self.instances,
14138       "target_groups": self.target_groups,
14139       }
14140
14141   def _BuildInputData(self, fn, keydata):
14142     """Build input data structures.
14143
14144     """
14145     self._ComputeClusterData()
14146
14147     request = fn()
14148     request["type"] = self.mode
14149     for keyname, keytype in keydata:
14150       if keyname not in request:
14151         raise errors.ProgrammerError("Request parameter %s is missing" %
14152                                      keyname)
14153       val = request[keyname]
14154       if not keytype(val):
14155         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14156                                      " validation, value %s, expected"
14157                                      " type %s" % (keyname, val, keytype))
14158     self.in_data["request"] = request
14159
14160     self.in_text = serializer.Dump(self.in_data)
14161
14162   _STRING_LIST = ht.TListOf(ht.TString)
14163   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14164      # pylint: disable=E1101
14165      # Class '...' has no 'OP_ID' member
14166      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14167                           opcodes.OpInstanceMigrate.OP_ID,
14168                           opcodes.OpInstanceReplaceDisks.OP_ID])
14169      })))
14170
14171   _NEVAC_MOVED = \
14172     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14173                        ht.TItems([ht.TNonEmptyString,
14174                                   ht.TNonEmptyString,
14175                                   ht.TListOf(ht.TNonEmptyString),
14176                                  ])))
14177   _NEVAC_FAILED = \
14178     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14179                        ht.TItems([ht.TNonEmptyString,
14180                                   ht.TMaybeString,
14181                                  ])))
14182   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14183                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14184
14185   _MODE_DATA = {
14186     constants.IALLOCATOR_MODE_ALLOC:
14187       (_AddNewInstance,
14188        [
14189         ("name", ht.TString),
14190         ("memory", ht.TInt),
14191         ("disks", ht.TListOf(ht.TDict)),
14192         ("disk_template", ht.TString),
14193         ("os", ht.TString),
14194         ("tags", _STRING_LIST),
14195         ("nics", ht.TListOf(ht.TDict)),
14196         ("vcpus", ht.TInt),
14197         ("hypervisor", ht.TString),
14198         ], ht.TList),
14199     constants.IALLOCATOR_MODE_RELOC:
14200       (_AddRelocateInstance,
14201        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14202        ht.TList),
14203      constants.IALLOCATOR_MODE_NODE_EVAC:
14204       (_AddNodeEvacuate, [
14205         ("instances", _STRING_LIST),
14206         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14207         ], _NEVAC_RESULT),
14208      constants.IALLOCATOR_MODE_CHG_GROUP:
14209       (_AddChangeGroup, [
14210         ("instances", _STRING_LIST),
14211         ("target_groups", _STRING_LIST),
14212         ], _NEVAC_RESULT),
14213     }
14214
14215   def Run(self, name, validate=True, call_fn=None):
14216     """Run an instance allocator and return the results.
14217
14218     """
14219     if call_fn is None:
14220       call_fn = self.rpc.call_iallocator_runner
14221
14222     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14223     result.Raise("Failure while running the iallocator script")
14224
14225     self.out_text = result.payload
14226     if validate:
14227       self._ValidateResult()
14228
14229   def _ValidateResult(self):
14230     """Process the allocator results.
14231
14232     This will process and if successful save the result in
14233     self.out_data and the other parameters.
14234
14235     """
14236     try:
14237       rdict = serializer.Load(self.out_text)
14238     except Exception, err:
14239       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14240
14241     if not isinstance(rdict, dict):
14242       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14243
14244     # TODO: remove backwards compatiblity in later versions
14245     if "nodes" in rdict and "result" not in rdict:
14246       rdict["result"] = rdict["nodes"]
14247       del rdict["nodes"]
14248
14249     for key in "success", "info", "result":
14250       if key not in rdict:
14251         raise errors.OpExecError("Can't parse iallocator results:"
14252                                  " missing key '%s'" % key)
14253       setattr(self, key, rdict[key])
14254
14255     if not self._result_check(self.result):
14256       raise errors.OpExecError("Iallocator returned invalid result,"
14257                                " expected %s, got %s" %
14258                                (self._result_check, self.result),
14259                                errors.ECODE_INVAL)
14260
14261     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14262       assert self.relocate_from is not None
14263       assert self.required_nodes == 1
14264
14265       node2group = dict((name, ndata["group"])
14266                         for (name, ndata) in self.in_data["nodes"].items())
14267
14268       fn = compat.partial(self._NodesToGroups, node2group,
14269                           self.in_data["nodegroups"])
14270
14271       instance = self.cfg.GetInstanceInfo(self.name)
14272       request_groups = fn(self.relocate_from + [instance.primary_node])
14273       result_groups = fn(rdict["result"] + [instance.primary_node])
14274
14275       if self.success and not set(result_groups).issubset(request_groups):
14276         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14277                                  " differ from original groups (%s)" %
14278                                  (utils.CommaJoin(result_groups),
14279                                   utils.CommaJoin(request_groups)))
14280
14281     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14282       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14283
14284     self.out_data = rdict
14285
14286   @staticmethod
14287   def _NodesToGroups(node2group, groups, nodes):
14288     """Returns a list of unique group names for a list of nodes.
14289
14290     @type node2group: dict
14291     @param node2group: Map from node name to group UUID
14292     @type groups: dict
14293     @param groups: Group information
14294     @type nodes: list
14295     @param nodes: Node names
14296
14297     """
14298     result = set()
14299
14300     for node in nodes:
14301       try:
14302         group_uuid = node2group[node]
14303       except KeyError:
14304         # Ignore unknown node
14305         pass
14306       else:
14307         try:
14308           group = groups[group_uuid]
14309         except KeyError:
14310           # Can't find group, let's use UUID
14311           group_name = group_uuid
14312         else:
14313           group_name = group["name"]
14314
14315         result.add(group_name)
14316
14317     return sorted(result)
14318
14319
14320 class LUTestAllocator(NoHooksLU):
14321   """Run allocator tests.
14322
14323   This LU runs the allocator tests
14324
14325   """
14326   def CheckPrereq(self):
14327     """Check prerequisites.
14328
14329     This checks the opcode parameters depending on the director and mode test.
14330
14331     """
14332     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14333       for attr in ["memory", "disks", "disk_template",
14334                    "os", "tags", "nics", "vcpus"]:
14335         if not hasattr(self.op, attr):
14336           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14337                                      attr, errors.ECODE_INVAL)
14338       iname = self.cfg.ExpandInstanceName(self.op.name)
14339       if iname is not None:
14340         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14341                                    iname, errors.ECODE_EXISTS)
14342       if not isinstance(self.op.nics, list):
14343         raise errors.OpPrereqError("Invalid parameter 'nics'",
14344                                    errors.ECODE_INVAL)
14345       if not isinstance(self.op.disks, list):
14346         raise errors.OpPrereqError("Invalid parameter 'disks'",
14347                                    errors.ECODE_INVAL)
14348       for row in self.op.disks:
14349         if (not isinstance(row, dict) or
14350             constants.IDISK_SIZE not in row or
14351             not isinstance(row[constants.IDISK_SIZE], int) or
14352             constants.IDISK_MODE not in row or
14353             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14354           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14355                                      " parameter", errors.ECODE_INVAL)
14356       if self.op.hypervisor is None:
14357         self.op.hypervisor = self.cfg.GetHypervisorType()
14358     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14359       fname = _ExpandInstanceName(self.cfg, self.op.name)
14360       self.op.name = fname
14361       self.relocate_from = \
14362           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14363     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14364                           constants.IALLOCATOR_MODE_NODE_EVAC):
14365       if not self.op.instances:
14366         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14367       self.op.instances = _GetWantedInstances(self, self.op.instances)
14368     else:
14369       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14370                                  self.op.mode, errors.ECODE_INVAL)
14371
14372     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14373       if self.op.allocator is None:
14374         raise errors.OpPrereqError("Missing allocator name",
14375                                    errors.ECODE_INVAL)
14376     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14377       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14378                                  self.op.direction, errors.ECODE_INVAL)
14379
14380   def Exec(self, feedback_fn):
14381     """Run the allocator test.
14382
14383     """
14384     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14385       ial = IAllocator(self.cfg, self.rpc,
14386                        mode=self.op.mode,
14387                        name=self.op.name,
14388                        memory=self.op.memory,
14389                        disks=self.op.disks,
14390                        disk_template=self.op.disk_template,
14391                        os=self.op.os,
14392                        tags=self.op.tags,
14393                        nics=self.op.nics,
14394                        vcpus=self.op.vcpus,
14395                        hypervisor=self.op.hypervisor,
14396                        )
14397     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14398       ial = IAllocator(self.cfg, self.rpc,
14399                        mode=self.op.mode,
14400                        name=self.op.name,
14401                        relocate_from=list(self.relocate_from),
14402                        )
14403     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14404       ial = IAllocator(self.cfg, self.rpc,
14405                        mode=self.op.mode,
14406                        instances=self.op.instances,
14407                        target_groups=self.op.target_groups)
14408     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14409       ial = IAllocator(self.cfg, self.rpc,
14410                        mode=self.op.mode,
14411                        instances=self.op.instances,
14412                        evac_mode=self.op.evac_mode)
14413     else:
14414       raise errors.ProgrammerError("Uncatched mode %s in"
14415                                    " LUTestAllocator.Exec", self.op.mode)
14416
14417     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14418       result = ial.in_text
14419     else:
14420       ial.Run(self.op.allocator, validate=False)
14421       result = ial.out_text
14422     return result
14423
14424
14425 #: Query type implementations
14426 _QUERY_IMPL = {
14427   constants.QR_INSTANCE: _InstanceQuery,
14428   constants.QR_NODE: _NodeQuery,
14429   constants.QR_GROUP: _GroupQuery,
14430   constants.QR_OS: _OsQuery,
14431   }
14432
14433 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14434
14435
14436 def _GetQueryImplementation(name):
14437   """Returns the implemtnation for a query type.
14438
14439   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14440
14441   """
14442   try:
14443     return _QUERY_IMPL[name]
14444   except KeyError:
14445     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14446                                errors.ECODE_INVAL)